valor-lite 0.33.6__tar.gz → 0.33.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {valor_lite-0.33.6 → valor_lite-0.33.8}/PKG-INFO +1 -1
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/conftest.py +95 -142
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_accuracy.py +43 -58
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_confusion_matrix.py +47 -97
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_counts.py +69 -178
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_dataloader.py +3 -14
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_evaluator.py +9 -9
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_f1.py +45 -57
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_filtering.py +71 -61
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_precision.py +44 -57
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_recall.py +45 -57
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_rocauc.py +157 -111
- valor_lite-0.33.8/tests/classification/test_schemas.py +30 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_stability.py +4 -9
- valor_lite-0.33.8/tests/detection/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/conftest.py +178 -91
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_average_precision.py +199 -177
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_average_recall.py +151 -81
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_confusion_matrix.py +182 -119
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_counts.py +125 -79
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_dataloader.py +15 -31
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_evaluator.py +12 -22
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_filtering.py +161 -145
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_iou.py +51 -203
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_pr_curve.py +13 -14
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_precision.py +123 -78
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_recall.py +122 -78
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_schemas.py +21 -27
- {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_stability.py +5 -9
- valor_lite-0.33.8/tests/segmentation/__init__.py +0 -0
- valor_lite-0.33.8/tests/segmentation/conftest.py +170 -0
- valor_lite-0.33.8/tests/segmentation/test_accuracy.py +64 -0
- valor_lite-0.33.8/tests/segmentation/test_annotation.py +80 -0
- valor_lite-0.33.8/tests/segmentation/test_confusion_matrix.py +87 -0
- valor_lite-0.33.8/tests/segmentation/test_dataloader.py +8 -0
- valor_lite-0.33.8/tests/segmentation/test_evaluator.py +29 -0
- valor_lite-0.33.8/tests/segmentation/test_f1.py +89 -0
- valor_lite-0.33.8/tests/segmentation/test_filtering.py +75 -0
- valor_lite-0.33.8/tests/segmentation/test_iou.py +126 -0
- valor_lite-0.33.8/tests/segmentation/test_precision.py +96 -0
- valor_lite-0.33.8/tests/segmentation/test_recall.py +94 -0
- valor_lite-0.33.8/tests/segmentation/test_stability.py +85 -0
- valor_lite-0.33.8/valor_lite/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/annotation.py +6 -2
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/computation.py +31 -52
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/manager.py +68 -131
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/metric.py +7 -23
- valor_lite-0.33.8/valor_lite/detection/annotation.py +133 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/computation.py +130 -92
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/manager.py +211 -462
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/metric.py +20 -48
- valor_lite-0.33.8/valor_lite/segmentation/__init__.py +27 -0
- valor_lite-0.33.8/valor_lite/segmentation/annotation.py +49 -0
- valor_lite-0.33.8/valor_lite/segmentation/computation.py +186 -0
- valor_lite-0.33.8/valor_lite/segmentation/manager.py +532 -0
- valor_lite-0.33.8/valor_lite/segmentation/metric.py +119 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/PKG-INFO +1 -1
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/SOURCES.txt +21 -1
- valor_lite-0.33.6/tests/classification/test_schemas.py +0 -21
- valor_lite-0.33.6/valor_lite/detection/annotation.py +0 -98
- {valor_lite-0.33.6 → valor_lite-0.33.8}/LICENSE +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/README.md +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/benchmarks/.gitignore +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/benchmarks/benchmark_classification.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/benchmarks/benchmark_objdet.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/examples/.gitignore +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/examples/object-detection.ipynb +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/examples/tabular_classification.ipynb +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/pyproject.toml +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/setup.cfg +0 -0
- {valor_lite-0.33.6/tests/detection → valor_lite-0.33.8/tests}/__init__.py +0 -0
- {valor_lite-0.33.6/valor_lite → valor_lite-0.33.8/tests/classification}/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/__init__.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/schemas.py +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/dependency_links.txt +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/requires.txt +0 -0
- {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/top_level.txt +0 -0
|
@@ -7,34 +7,34 @@ def basic_classifications() -> list[Classification]:
|
|
|
7
7
|
return [
|
|
8
8
|
Classification(
|
|
9
9
|
uid="uid0",
|
|
10
|
-
|
|
10
|
+
groundtruth="0",
|
|
11
11
|
predictions=[
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
"0",
|
|
13
|
+
"1",
|
|
14
|
+
"2",
|
|
15
|
+
"3",
|
|
16
16
|
],
|
|
17
17
|
scores=[1.0, 0.0, 0.0, 0.0],
|
|
18
18
|
),
|
|
19
19
|
Classification(
|
|
20
20
|
uid="uid1",
|
|
21
|
-
|
|
21
|
+
groundtruth="0",
|
|
22
22
|
predictions=[
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
"0",
|
|
24
|
+
"1",
|
|
25
|
+
"2",
|
|
26
|
+
"3",
|
|
27
27
|
],
|
|
28
28
|
scores=[0.0, 0.0, 1.0, 0.0],
|
|
29
29
|
),
|
|
30
30
|
Classification(
|
|
31
31
|
uid="uid2",
|
|
32
|
-
|
|
32
|
+
groundtruth="3",
|
|
33
33
|
predictions=[
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
"0",
|
|
35
|
+
"1",
|
|
36
|
+
"2",
|
|
37
|
+
"3",
|
|
38
38
|
],
|
|
39
39
|
scores=[0.0, 0.0, 0.0, 0.3],
|
|
40
40
|
),
|
|
@@ -57,61 +57,61 @@ def classifications_from_api_unit_tests() -> list[Classification]:
|
|
|
57
57
|
return [
|
|
58
58
|
Classification(
|
|
59
59
|
uid="uid0",
|
|
60
|
-
|
|
60
|
+
groundtruth="0",
|
|
61
61
|
predictions=[
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
62
|
+
"0",
|
|
63
|
+
"1",
|
|
64
|
+
"2",
|
|
65
65
|
],
|
|
66
66
|
scores=[1.0, 0.0, 0.0],
|
|
67
67
|
),
|
|
68
68
|
Classification(
|
|
69
69
|
uid="uid1",
|
|
70
|
-
|
|
70
|
+
groundtruth="0",
|
|
71
71
|
predictions=[
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
"0",
|
|
73
|
+
"1",
|
|
74
|
+
"2",
|
|
75
75
|
],
|
|
76
76
|
scores=[0.0, 1.0, 0.0],
|
|
77
77
|
),
|
|
78
78
|
Classification(
|
|
79
79
|
uid="uid2",
|
|
80
|
-
|
|
80
|
+
groundtruth="0",
|
|
81
81
|
predictions=[
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
"0",
|
|
83
|
+
"1",
|
|
84
|
+
"2",
|
|
85
85
|
],
|
|
86
86
|
scores=[0.0, 0.0, 1.0],
|
|
87
87
|
),
|
|
88
88
|
Classification(
|
|
89
89
|
uid="uid3",
|
|
90
|
-
|
|
90
|
+
groundtruth="1",
|
|
91
91
|
predictions=[
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
"0",
|
|
93
|
+
"1",
|
|
94
|
+
"2",
|
|
95
95
|
],
|
|
96
96
|
scores=[0.0, 1.0, 0.0],
|
|
97
97
|
),
|
|
98
98
|
Classification(
|
|
99
99
|
uid="uid4",
|
|
100
|
-
|
|
100
|
+
groundtruth="2",
|
|
101
101
|
predictions=[
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
102
|
+
"0",
|
|
103
|
+
"1",
|
|
104
|
+
"2",
|
|
105
105
|
],
|
|
106
106
|
scores=[0.0, 1.0, 0.0],
|
|
107
107
|
),
|
|
108
108
|
Classification(
|
|
109
109
|
uid="uid5",
|
|
110
|
-
|
|
110
|
+
groundtruth="2",
|
|
111
111
|
predictions=[
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
112
|
+
"0",
|
|
113
|
+
"1",
|
|
114
|
+
"2",
|
|
115
115
|
],
|
|
116
116
|
scores=[0.0, 1.0, 0.0],
|
|
117
117
|
),
|
|
@@ -119,7 +119,7 @@ def classifications_from_api_unit_tests() -> list[Classification]:
|
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
@pytest.fixture
|
|
122
|
-
def
|
|
122
|
+
def classifications_animal_example() -> list[Classification]:
|
|
123
123
|
animal_gts = ["bird", "dog", "bird", "bird", "cat", "dog"]
|
|
124
124
|
animal_pds = [
|
|
125
125
|
{"bird": 0.6, "dog": 0.2, "cat": 0.2},
|
|
@@ -131,6 +131,19 @@ def classifications_two_categories() -> list[Classification]:
|
|
|
131
131
|
# Note: In the case of a tied score, the ordering of predictions is used.
|
|
132
132
|
]
|
|
133
133
|
|
|
134
|
+
return [
|
|
135
|
+
Classification(
|
|
136
|
+
uid=f"uid{idx}",
|
|
137
|
+
groundtruth=gt,
|
|
138
|
+
predictions=list(pd.keys()),
|
|
139
|
+
scores=list(pd.values()),
|
|
140
|
+
)
|
|
141
|
+
for idx, (gt, pd) in enumerate(zip(animal_gts, animal_pds))
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@pytest.fixture
|
|
146
|
+
def classifications_color_example() -> list[Classification]:
|
|
134
147
|
color_gts = ["white", "white", "red", "blue", "black", "red"]
|
|
135
148
|
color_pds = [
|
|
136
149
|
{"white": 0.65, "red": 0.1, "blue": 0.2, "black": 0.05},
|
|
@@ -141,29 +154,14 @@ def classifications_two_categories() -> list[Classification]:
|
|
|
141
154
|
{"red": 0.9, "white": 0.06, "blue": 0.01, "black": 0.03},
|
|
142
155
|
]
|
|
143
156
|
|
|
144
|
-
joint_gts = zip(animal_gts, color_gts)
|
|
145
|
-
joint_pds = [
|
|
146
|
-
{
|
|
147
|
-
"animal": animal,
|
|
148
|
-
"color": color,
|
|
149
|
-
}
|
|
150
|
-
for animal, color in zip(animal_pds, color_pds)
|
|
151
|
-
]
|
|
152
|
-
|
|
153
157
|
return [
|
|
154
158
|
Classification(
|
|
155
159
|
uid=f"uid{idx}",
|
|
156
|
-
|
|
157
|
-
predictions=
|
|
158
|
-
|
|
159
|
-
for key, values in pd.items()
|
|
160
|
-
for value in values.keys()
|
|
161
|
-
],
|
|
162
|
-
scores=[
|
|
163
|
-
score for values in pd.values() for score in values.values()
|
|
164
|
-
],
|
|
160
|
+
groundtruth=gt,
|
|
161
|
+
predictions=list(pd.keys()),
|
|
162
|
+
scores=list(pd.values()),
|
|
165
163
|
)
|
|
166
|
-
for idx, (gt, pd) in enumerate(zip(
|
|
164
|
+
for idx, (gt, pd) in enumerate(zip(color_gts, color_pds))
|
|
167
165
|
]
|
|
168
166
|
|
|
169
167
|
|
|
@@ -172,36 +170,21 @@ def classifications_image_example() -> list[Classification]:
|
|
|
172
170
|
return [
|
|
173
171
|
Classification(
|
|
174
172
|
uid="uid5",
|
|
175
|
-
|
|
176
|
-
("k4", "v4"),
|
|
177
|
-
("k5", "v5"),
|
|
178
|
-
],
|
|
173
|
+
groundtruth="v4",
|
|
179
174
|
predictions=[
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
("k5", "v1"),
|
|
175
|
+
"v1",
|
|
176
|
+
"v8",
|
|
183
177
|
],
|
|
184
|
-
scores=[0.47, 0.53
|
|
178
|
+
scores=[0.47, 0.53],
|
|
185
179
|
),
|
|
186
180
|
Classification(
|
|
187
181
|
uid="uid6",
|
|
188
|
-
|
|
189
|
-
("k4", "v4"),
|
|
190
|
-
],
|
|
191
|
-
predictions=[("k4", "v4"), ("k4", "v5")],
|
|
192
|
-
scores=[0.71, 0.29],
|
|
193
|
-
),
|
|
194
|
-
Classification(
|
|
195
|
-
uid="uid8",
|
|
196
|
-
groundtruths=[
|
|
197
|
-
("k3", "v3"),
|
|
198
|
-
],
|
|
182
|
+
groundtruth="v4",
|
|
199
183
|
predictions=[
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
scores=[
|
|
203
|
-
1.0,
|
|
184
|
+
"v4",
|
|
185
|
+
"v5",
|
|
204
186
|
],
|
|
187
|
+
scores=[0.71, 0.29],
|
|
205
188
|
),
|
|
206
189
|
]
|
|
207
190
|
|
|
@@ -224,10 +207,8 @@ def classifications_tabular_example() -> list[Classification]:
|
|
|
224
207
|
return [
|
|
225
208
|
Classification(
|
|
226
209
|
uid=f"uid{i}",
|
|
227
|
-
|
|
228
|
-
predictions=[
|
|
229
|
-
("class", str(pd_label)) for pd_label, _ in enumerate(pds)
|
|
230
|
-
],
|
|
210
|
+
groundtruth=str(gt_label),
|
|
211
|
+
predictions=[str(pd_label) for pd_label, _ in enumerate(pds)],
|
|
231
212
|
scores=pds,
|
|
232
213
|
)
|
|
233
214
|
for i, (gt_label, pds) in enumerate(
|
|
@@ -236,24 +217,12 @@ def classifications_tabular_example() -> list[Classification]:
|
|
|
236
217
|
]
|
|
237
218
|
|
|
238
219
|
|
|
239
|
-
@pytest.fixture
|
|
240
|
-
def classifications_no_groundtruths() -> list[Classification]:
|
|
241
|
-
return [
|
|
242
|
-
Classification(
|
|
243
|
-
uid="uid1",
|
|
244
|
-
groundtruths=[],
|
|
245
|
-
predictions=[("k1", "v1"), ("k1", "v2")],
|
|
246
|
-
scores=[0.8, 0.2],
|
|
247
|
-
)
|
|
248
|
-
]
|
|
249
|
-
|
|
250
|
-
|
|
251
220
|
@pytest.fixture
|
|
252
221
|
def classifications_no_predictions() -> list[Classification]:
|
|
253
222
|
return [
|
|
254
223
|
Classification(
|
|
255
224
|
uid="uid1",
|
|
256
|
-
|
|
225
|
+
groundtruth="v1",
|
|
257
226
|
predictions=[],
|
|
258
227
|
scores=[],
|
|
259
228
|
)
|
|
@@ -265,11 +234,11 @@ def classifications_multiclass() -> list[Classification]:
|
|
|
265
234
|
return [
|
|
266
235
|
Classification(
|
|
267
236
|
uid="uid0",
|
|
268
|
-
|
|
237
|
+
groundtruth="cat",
|
|
269
238
|
predictions=[
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
239
|
+
"cat",
|
|
240
|
+
"dog",
|
|
241
|
+
"bee",
|
|
273
242
|
],
|
|
274
243
|
scores=[
|
|
275
244
|
0.44598543489942505,
|
|
@@ -279,11 +248,11 @@ def classifications_multiclass() -> list[Classification]:
|
|
|
279
248
|
),
|
|
280
249
|
Classification(
|
|
281
250
|
uid="uid1",
|
|
282
|
-
|
|
251
|
+
groundtruth="bee",
|
|
283
252
|
predictions=[
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
253
|
+
"cat",
|
|
254
|
+
"dog",
|
|
255
|
+
"bee",
|
|
287
256
|
],
|
|
288
257
|
scores=[
|
|
289
258
|
0.4076893257212283,
|
|
@@ -293,11 +262,11 @@ def classifications_multiclass() -> list[Classification]:
|
|
|
293
262
|
),
|
|
294
263
|
Classification(
|
|
295
264
|
uid="uid2",
|
|
296
|
-
|
|
265
|
+
groundtruth="cat",
|
|
297
266
|
predictions=[
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
267
|
+
"cat",
|
|
268
|
+
"dog",
|
|
269
|
+
"bee",
|
|
301
270
|
],
|
|
302
271
|
scores=[
|
|
303
272
|
0.25060075263871917,
|
|
@@ -307,11 +276,11 @@ def classifications_multiclass() -> list[Classification]:
|
|
|
307
276
|
),
|
|
308
277
|
Classification(
|
|
309
278
|
uid="uid3",
|
|
310
|
-
|
|
279
|
+
groundtruth="bee",
|
|
311
280
|
predictions=[
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
281
|
+
"cat",
|
|
282
|
+
"dog",
|
|
283
|
+
"bee",
|
|
315
284
|
],
|
|
316
285
|
scores=[
|
|
317
286
|
0.2003514145616792,
|
|
@@ -321,11 +290,11 @@ def classifications_multiclass() -> list[Classification]:
|
|
|
321
290
|
),
|
|
322
291
|
Classification(
|
|
323
292
|
uid="uid4",
|
|
324
|
-
|
|
293
|
+
groundtruth="dog",
|
|
325
294
|
predictions=[
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
295
|
+
"cat",
|
|
296
|
+
"dog",
|
|
297
|
+
"bee",
|
|
329
298
|
],
|
|
330
299
|
scores=[
|
|
331
300
|
0.33443897813714385,
|
|
@@ -337,18 +306,14 @@ def classifications_multiclass() -> list[Classification]:
|
|
|
337
306
|
|
|
338
307
|
|
|
339
308
|
@pytest.fixture
|
|
340
|
-
def classifications_multiclass_true_negatives_check() ->
|
|
309
|
+
def classifications_multiclass_true_negatives_check() -> (
|
|
310
|
+
list[Classification]
|
|
311
|
+
):
|
|
341
312
|
return [
|
|
342
313
|
Classification(
|
|
343
314
|
uid="uid1",
|
|
344
|
-
|
|
345
|
-
predictions=[
|
|
346
|
-
scores=[0.15, 0.48, 0.37],
|
|
347
|
-
),
|
|
348
|
-
Classification(
|
|
349
|
-
uid="uid2",
|
|
350
|
-
groundtruths=[("k2", "egg")],
|
|
351
|
-
predictions=[("k2", "egg"), ("k2", "milk"), ("k2", "flour")],
|
|
315
|
+
groundtruth="ant",
|
|
316
|
+
predictions=["ant", "bee", "cat"],
|
|
352
317
|
scores=[0.15, 0.48, 0.37],
|
|
353
318
|
),
|
|
354
319
|
]
|
|
@@ -359,20 +324,8 @@ def classifications_multiclass_zero_count() -> list[Classification]:
|
|
|
359
324
|
return [
|
|
360
325
|
Classification(
|
|
361
326
|
uid="uid1",
|
|
362
|
-
|
|
363
|
-
predictions=[
|
|
327
|
+
groundtruth="ant",
|
|
328
|
+
predictions=["ant", "bee", "cat"],
|
|
364
329
|
scores=[0.15, 0.48, 0.37],
|
|
365
330
|
)
|
|
366
331
|
]
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
@pytest.fixture
|
|
370
|
-
def classifications_with_label_key_mismatch() -> list[Classification]:
|
|
371
|
-
return [
|
|
372
|
-
Classification(
|
|
373
|
-
uid="uid0",
|
|
374
|
-
groundtruths=[("k1", "V1")],
|
|
375
|
-
predictions=[("k2", "v1")],
|
|
376
|
-
scores=[1.0],
|
|
377
|
-
)
|
|
378
|
-
]
|
|
@@ -77,16 +77,12 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
|
|
|
77
77
|
"n_groundtruths": 3,
|
|
78
78
|
"n_predictions": 12,
|
|
79
79
|
"n_labels": 4,
|
|
80
|
-
"ignored_prediction_labels": [
|
|
81
|
-
("class", "1"),
|
|
82
|
-
("class", "2"),
|
|
83
|
-
],
|
|
80
|
+
"ignored_prediction_labels": ["1", "2"],
|
|
84
81
|
"missing_prediction_labels": [],
|
|
85
82
|
}
|
|
86
83
|
|
|
87
84
|
metrics = evaluator.evaluate(score_thresholds=[0.25, 0.75], as_dict=True)
|
|
88
85
|
|
|
89
|
-
# test Accuracy
|
|
90
86
|
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
91
87
|
expected_metrics = [
|
|
92
88
|
{
|
|
@@ -95,7 +91,7 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
|
|
|
95
91
|
"parameters": {
|
|
96
92
|
"score_thresholds": [0.25, 0.75],
|
|
97
93
|
"hardmax": True,
|
|
98
|
-
"label":
|
|
94
|
+
"label": "0",
|
|
99
95
|
},
|
|
100
96
|
},
|
|
101
97
|
{
|
|
@@ -104,7 +100,7 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
|
|
|
104
100
|
"parameters": {
|
|
105
101
|
"score_thresholds": [0.25, 0.75],
|
|
106
102
|
"hardmax": True,
|
|
107
|
-
"label":
|
|
103
|
+
"label": "3",
|
|
108
104
|
},
|
|
109
105
|
},
|
|
110
106
|
]
|
|
@@ -114,17 +110,16 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
|
|
|
114
110
|
assert m in actual_metrics
|
|
115
111
|
|
|
116
112
|
|
|
117
|
-
def
|
|
118
|
-
|
|
113
|
+
def test_accuracy_with_animal_example(
|
|
114
|
+
classifications_animal_example: list[Classification],
|
|
119
115
|
):
|
|
120
116
|
|
|
121
117
|
loader = DataLoader()
|
|
122
|
-
loader.add_data(
|
|
118
|
+
loader.add_data(classifications_animal_example)
|
|
123
119
|
evaluator = loader.finalize()
|
|
124
120
|
|
|
125
121
|
metrics = evaluator.evaluate(score_thresholds=[0.5], as_dict=True)
|
|
126
122
|
|
|
127
|
-
# test Accuracy
|
|
128
123
|
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
129
124
|
expected_metrics = [
|
|
130
125
|
{
|
|
@@ -133,7 +128,7 @@ def test_accuracy_with_example(
|
|
|
133
128
|
"parameters": {
|
|
134
129
|
"score_thresholds": [0.5],
|
|
135
130
|
"hardmax": True,
|
|
136
|
-
"label":
|
|
131
|
+
"label": "bird",
|
|
137
132
|
},
|
|
138
133
|
},
|
|
139
134
|
{
|
|
@@ -142,7 +137,7 @@ def test_accuracy_with_example(
|
|
|
142
137
|
"parameters": {
|
|
143
138
|
"score_thresholds": [0.5],
|
|
144
139
|
"hardmax": True,
|
|
145
|
-
"label":
|
|
140
|
+
"label": "dog",
|
|
146
141
|
},
|
|
147
142
|
},
|
|
148
143
|
{
|
|
@@ -151,16 +146,35 @@ def test_accuracy_with_example(
|
|
|
151
146
|
"parameters": {
|
|
152
147
|
"score_thresholds": [0.5],
|
|
153
148
|
"hardmax": True,
|
|
154
|
-
"label":
|
|
149
|
+
"label": "cat",
|
|
155
150
|
},
|
|
156
151
|
},
|
|
152
|
+
]
|
|
153
|
+
for m in actual_metrics:
|
|
154
|
+
assert m in expected_metrics
|
|
155
|
+
for m in expected_metrics:
|
|
156
|
+
assert m in actual_metrics
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_accuracy_color_example(
|
|
160
|
+
classifications_color_example: list[Classification],
|
|
161
|
+
):
|
|
162
|
+
|
|
163
|
+
loader = DataLoader()
|
|
164
|
+
loader.add_data(classifications_color_example)
|
|
165
|
+
evaluator = loader.finalize()
|
|
166
|
+
|
|
167
|
+
metrics = evaluator.evaluate(score_thresholds=[0.5], as_dict=True)
|
|
168
|
+
|
|
169
|
+
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
170
|
+
expected_metrics = [
|
|
157
171
|
{
|
|
158
172
|
"type": "Accuracy",
|
|
159
173
|
"value": [2 / 3],
|
|
160
174
|
"parameters": {
|
|
161
175
|
"score_thresholds": [0.5],
|
|
162
176
|
"hardmax": True,
|
|
163
|
-
"label":
|
|
177
|
+
"label": "white",
|
|
164
178
|
},
|
|
165
179
|
},
|
|
166
180
|
{
|
|
@@ -169,7 +183,7 @@ def test_accuracy_with_example(
|
|
|
169
183
|
"parameters": {
|
|
170
184
|
"score_thresholds": [0.5],
|
|
171
185
|
"hardmax": True,
|
|
172
|
-
"label":
|
|
186
|
+
"label": "red",
|
|
173
187
|
},
|
|
174
188
|
},
|
|
175
189
|
{
|
|
@@ -178,7 +192,7 @@ def test_accuracy_with_example(
|
|
|
178
192
|
"parameters": {
|
|
179
193
|
"score_thresholds": [0.5],
|
|
180
194
|
"hardmax": True,
|
|
181
|
-
"label":
|
|
195
|
+
"label": "blue",
|
|
182
196
|
},
|
|
183
197
|
},
|
|
184
198
|
{
|
|
@@ -187,7 +201,7 @@ def test_accuracy_with_example(
|
|
|
187
201
|
"parameters": {
|
|
188
202
|
"score_thresholds": [0.5],
|
|
189
203
|
"hardmax": True,
|
|
190
|
-
"label":
|
|
204
|
+
"label": "black",
|
|
191
205
|
},
|
|
192
206
|
},
|
|
193
207
|
]
|
|
@@ -205,53 +219,25 @@ def test_accuracy_with_image_example(
|
|
|
205
219
|
evaluator = loader.finalize()
|
|
206
220
|
|
|
207
221
|
assert evaluator.metadata == {
|
|
208
|
-
"n_datums":
|
|
209
|
-
"n_groundtruths":
|
|
210
|
-
"n_predictions":
|
|
211
|
-
"n_labels":
|
|
212
|
-
"ignored_prediction_labels": [
|
|
213
|
-
|
|
214
|
-
("k4", "v8"),
|
|
215
|
-
("k5", "v1"),
|
|
216
|
-
("k4", "v5"),
|
|
217
|
-
("k3", "v1"),
|
|
218
|
-
],
|
|
219
|
-
"missing_prediction_labels": [
|
|
220
|
-
("k5", "v5"),
|
|
221
|
-
("k3", "v3"),
|
|
222
|
-
],
|
|
222
|
+
"n_datums": 2,
|
|
223
|
+
"n_groundtruths": 2,
|
|
224
|
+
"n_predictions": 4,
|
|
225
|
+
"n_labels": 4,
|
|
226
|
+
"ignored_prediction_labels": ["v1", "v8", "v5"],
|
|
227
|
+
"missing_prediction_labels": [],
|
|
223
228
|
}
|
|
224
229
|
|
|
225
230
|
metrics = evaluator.evaluate(as_dict=True)
|
|
226
231
|
|
|
227
|
-
# test Accuracy
|
|
228
232
|
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
229
233
|
expected_metrics = [
|
|
230
234
|
{
|
|
231
235
|
"type": "Accuracy",
|
|
232
|
-
"value": [0.
|
|
233
|
-
"parameters": {
|
|
234
|
-
"score_thresholds": [0.0],
|
|
235
|
-
"hardmax": True,
|
|
236
|
-
"label": {"key": "k4", "value": "v4"},
|
|
237
|
-
},
|
|
238
|
-
},
|
|
239
|
-
{
|
|
240
|
-
"type": "Accuracy",
|
|
241
|
-
"value": [0.0],
|
|
242
|
-
"parameters": {
|
|
243
|
-
"score_thresholds": [0.0],
|
|
244
|
-
"hardmax": True,
|
|
245
|
-
"label": {"key": "k5", "value": "v5"},
|
|
246
|
-
},
|
|
247
|
-
},
|
|
248
|
-
{
|
|
249
|
-
"type": "Accuracy",
|
|
250
|
-
"value": [0.0],
|
|
236
|
+
"value": [0.5],
|
|
251
237
|
"parameters": {
|
|
252
238
|
"score_thresholds": [0.0],
|
|
253
239
|
"hardmax": True,
|
|
254
|
-
"label":
|
|
240
|
+
"label": "v4",
|
|
255
241
|
},
|
|
256
242
|
},
|
|
257
243
|
]
|
|
@@ -279,7 +265,6 @@ def test_accuracy_with_tabular_example(
|
|
|
279
265
|
|
|
280
266
|
metrics = evaluator.evaluate(as_dict=True)
|
|
281
267
|
|
|
282
|
-
# test Accuracy
|
|
283
268
|
actual_metrics = [m for m in metrics[MetricType.Accuracy]]
|
|
284
269
|
expected_metrics = [
|
|
285
270
|
{
|
|
@@ -288,7 +273,7 @@ def test_accuracy_with_tabular_example(
|
|
|
288
273
|
"parameters": {
|
|
289
274
|
"score_thresholds": [0.0],
|
|
290
275
|
"hardmax": True,
|
|
291
|
-
"label":
|
|
276
|
+
"label": "0",
|
|
292
277
|
},
|
|
293
278
|
},
|
|
294
279
|
{
|
|
@@ -297,7 +282,7 @@ def test_accuracy_with_tabular_example(
|
|
|
297
282
|
"parameters": {
|
|
298
283
|
"score_thresholds": [0.0],
|
|
299
284
|
"hardmax": True,
|
|
300
|
-
"label":
|
|
285
|
+
"label": "1",
|
|
301
286
|
},
|
|
302
287
|
},
|
|
303
288
|
{
|
|
@@ -306,7 +291,7 @@ def test_accuracy_with_tabular_example(
|
|
|
306
291
|
"parameters": {
|
|
307
292
|
"score_thresholds": [0.0],
|
|
308
293
|
"hardmax": True,
|
|
309
|
-
"label":
|
|
294
|
+
"label": "2",
|
|
310
295
|
},
|
|
311
296
|
},
|
|
312
297
|
]
|