valor-lite 0.33.6__tar.gz → 0.33.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {valor_lite-0.33.6 → valor_lite-0.33.8}/PKG-INFO +1 -1
  2. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/conftest.py +95 -142
  3. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_accuracy.py +43 -58
  4. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_confusion_matrix.py +47 -97
  5. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_counts.py +69 -178
  6. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_dataloader.py +3 -14
  7. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_evaluator.py +9 -9
  8. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_f1.py +45 -57
  9. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_filtering.py +71 -61
  10. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_precision.py +44 -57
  11. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_recall.py +45 -57
  12. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_rocauc.py +157 -111
  13. valor_lite-0.33.8/tests/classification/test_schemas.py +30 -0
  14. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/classification/test_stability.py +4 -9
  15. valor_lite-0.33.8/tests/detection/__init__.py +0 -0
  16. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/conftest.py +178 -91
  17. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_average_precision.py +199 -177
  18. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_average_recall.py +151 -81
  19. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_confusion_matrix.py +182 -119
  20. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_counts.py +125 -79
  21. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_dataloader.py +15 -31
  22. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_evaluator.py +12 -22
  23. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_filtering.py +161 -145
  24. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_iou.py +51 -203
  25. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_pr_curve.py +13 -14
  26. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_precision.py +123 -78
  27. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_recall.py +122 -78
  28. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_schemas.py +21 -27
  29. {valor_lite-0.33.6 → valor_lite-0.33.8}/tests/detection/test_stability.py +5 -9
  30. valor_lite-0.33.8/tests/segmentation/__init__.py +0 -0
  31. valor_lite-0.33.8/tests/segmentation/conftest.py +170 -0
  32. valor_lite-0.33.8/tests/segmentation/test_accuracy.py +64 -0
  33. valor_lite-0.33.8/tests/segmentation/test_annotation.py +80 -0
  34. valor_lite-0.33.8/tests/segmentation/test_confusion_matrix.py +87 -0
  35. valor_lite-0.33.8/tests/segmentation/test_dataloader.py +8 -0
  36. valor_lite-0.33.8/tests/segmentation/test_evaluator.py +29 -0
  37. valor_lite-0.33.8/tests/segmentation/test_f1.py +89 -0
  38. valor_lite-0.33.8/tests/segmentation/test_filtering.py +75 -0
  39. valor_lite-0.33.8/tests/segmentation/test_iou.py +126 -0
  40. valor_lite-0.33.8/tests/segmentation/test_precision.py +96 -0
  41. valor_lite-0.33.8/tests/segmentation/test_recall.py +94 -0
  42. valor_lite-0.33.8/tests/segmentation/test_stability.py +85 -0
  43. valor_lite-0.33.8/valor_lite/__init__.py +0 -0
  44. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/annotation.py +6 -2
  45. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/computation.py +31 -52
  46. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/manager.py +68 -131
  47. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/metric.py +7 -23
  48. valor_lite-0.33.8/valor_lite/detection/annotation.py +133 -0
  49. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/computation.py +130 -92
  50. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/manager.py +211 -462
  51. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/metric.py +20 -48
  52. valor_lite-0.33.8/valor_lite/segmentation/__init__.py +27 -0
  53. valor_lite-0.33.8/valor_lite/segmentation/annotation.py +49 -0
  54. valor_lite-0.33.8/valor_lite/segmentation/computation.py +186 -0
  55. valor_lite-0.33.8/valor_lite/segmentation/manager.py +532 -0
  56. valor_lite-0.33.8/valor_lite/segmentation/metric.py +119 -0
  57. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/PKG-INFO +1 -1
  58. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/SOURCES.txt +21 -1
  59. valor_lite-0.33.6/tests/classification/test_schemas.py +0 -21
  60. valor_lite-0.33.6/valor_lite/detection/annotation.py +0 -98
  61. {valor_lite-0.33.6 → valor_lite-0.33.8}/LICENSE +0 -0
  62. {valor_lite-0.33.6 → valor_lite-0.33.8}/README.md +0 -0
  63. {valor_lite-0.33.6 → valor_lite-0.33.8}/benchmarks/.gitignore +0 -0
  64. {valor_lite-0.33.6 → valor_lite-0.33.8}/benchmarks/benchmark_classification.py +0 -0
  65. {valor_lite-0.33.6 → valor_lite-0.33.8}/benchmarks/benchmark_objdet.py +0 -0
  66. {valor_lite-0.33.6 → valor_lite-0.33.8}/examples/.gitignore +0 -0
  67. {valor_lite-0.33.6 → valor_lite-0.33.8}/examples/object-detection.ipynb +0 -0
  68. {valor_lite-0.33.6 → valor_lite-0.33.8}/examples/tabular_classification.ipynb +0 -0
  69. {valor_lite-0.33.6 → valor_lite-0.33.8}/pyproject.toml +0 -0
  70. {valor_lite-0.33.6 → valor_lite-0.33.8}/setup.cfg +0 -0
  71. {valor_lite-0.33.6/tests/detection → valor_lite-0.33.8/tests}/__init__.py +0 -0
  72. {valor_lite-0.33.6/valor_lite → valor_lite-0.33.8/tests/classification}/__init__.py +0 -0
  73. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/classification/__init__.py +0 -0
  74. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/detection/__init__.py +0 -0
  75. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite/schemas.py +0 -0
  76. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/dependency_links.txt +0 -0
  77. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/requires.txt +0 -0
  78. {valor_lite-0.33.6 → valor_lite-0.33.8}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.6
3
+ Version: 0.33.8
4
4
  Summary: Compute valor metrics directly in your client.
5
5
  License: MIT License
6
6
 
@@ -7,34 +7,34 @@ def basic_classifications() -> list[Classification]:
7
7
  return [
8
8
  Classification(
9
9
  uid="uid0",
10
- groundtruths=[("class", "0")],
10
+ groundtruth="0",
11
11
  predictions=[
12
- ("class", "0"),
13
- ("class", "1"),
14
- ("class", "2"),
15
- ("class", "3"),
12
+ "0",
13
+ "1",
14
+ "2",
15
+ "3",
16
16
  ],
17
17
  scores=[1.0, 0.0, 0.0, 0.0],
18
18
  ),
19
19
  Classification(
20
20
  uid="uid1",
21
- groundtruths=[("class", "0")],
21
+ groundtruth="0",
22
22
  predictions=[
23
- ("class", "0"),
24
- ("class", "1"),
25
- ("class", "2"),
26
- ("class", "3"),
23
+ "0",
24
+ "1",
25
+ "2",
26
+ "3",
27
27
  ],
28
28
  scores=[0.0, 0.0, 1.0, 0.0],
29
29
  ),
30
30
  Classification(
31
31
  uid="uid2",
32
- groundtruths=[("class", "3")],
32
+ groundtruth="3",
33
33
  predictions=[
34
- ("class", "0"),
35
- ("class", "1"),
36
- ("class", "2"),
37
- ("class", "3"),
34
+ "0",
35
+ "1",
36
+ "2",
37
+ "3",
38
38
  ],
39
39
  scores=[0.0, 0.0, 0.0, 0.3],
40
40
  ),
@@ -57,61 +57,61 @@ def classifications_from_api_unit_tests() -> list[Classification]:
57
57
  return [
58
58
  Classification(
59
59
  uid="uid0",
60
- groundtruths=[("class", "0")],
60
+ groundtruth="0",
61
61
  predictions=[
62
- ("class", "0"),
63
- ("class", "1"),
64
- ("class", "2"),
62
+ "0",
63
+ "1",
64
+ "2",
65
65
  ],
66
66
  scores=[1.0, 0.0, 0.0],
67
67
  ),
68
68
  Classification(
69
69
  uid="uid1",
70
- groundtruths=[("class", "0")],
70
+ groundtruth="0",
71
71
  predictions=[
72
- ("class", "0"),
73
- ("class", "1"),
74
- ("class", "2"),
72
+ "0",
73
+ "1",
74
+ "2",
75
75
  ],
76
76
  scores=[0.0, 1.0, 0.0],
77
77
  ),
78
78
  Classification(
79
79
  uid="uid2",
80
- groundtruths=[("class", "0")],
80
+ groundtruth="0",
81
81
  predictions=[
82
- ("class", "0"),
83
- ("class", "1"),
84
- ("class", "2"),
82
+ "0",
83
+ "1",
84
+ "2",
85
85
  ],
86
86
  scores=[0.0, 0.0, 1.0],
87
87
  ),
88
88
  Classification(
89
89
  uid="uid3",
90
- groundtruths=[("class", "1")],
90
+ groundtruth="1",
91
91
  predictions=[
92
- ("class", "0"),
93
- ("class", "1"),
94
- ("class", "2"),
92
+ "0",
93
+ "1",
94
+ "2",
95
95
  ],
96
96
  scores=[0.0, 1.0, 0.0],
97
97
  ),
98
98
  Classification(
99
99
  uid="uid4",
100
- groundtruths=[("class", "2")],
100
+ groundtruth="2",
101
101
  predictions=[
102
- ("class", "0"),
103
- ("class", "1"),
104
- ("class", "2"),
102
+ "0",
103
+ "1",
104
+ "2",
105
105
  ],
106
106
  scores=[0.0, 1.0, 0.0],
107
107
  ),
108
108
  Classification(
109
109
  uid="uid5",
110
- groundtruths=[("class", "2")],
110
+ groundtruth="2",
111
111
  predictions=[
112
- ("class", "0"),
113
- ("class", "1"),
114
- ("class", "2"),
112
+ "0",
113
+ "1",
114
+ "2",
115
115
  ],
116
116
  scores=[0.0, 1.0, 0.0],
117
117
  ),
@@ -119,7 +119,7 @@ def classifications_from_api_unit_tests() -> list[Classification]:
119
119
 
120
120
 
121
121
  @pytest.fixture
122
- def classifications_two_categories() -> list[Classification]:
122
+ def classifications_animal_example() -> list[Classification]:
123
123
  animal_gts = ["bird", "dog", "bird", "bird", "cat", "dog"]
124
124
  animal_pds = [
125
125
  {"bird": 0.6, "dog": 0.2, "cat": 0.2},
@@ -131,6 +131,19 @@ def classifications_two_categories() -> list[Classification]:
131
131
  # Note: In the case of a tied score, the ordering of predictions is used.
132
132
  ]
133
133
 
134
+ return [
135
+ Classification(
136
+ uid=f"uid{idx}",
137
+ groundtruth=gt,
138
+ predictions=list(pd.keys()),
139
+ scores=list(pd.values()),
140
+ )
141
+ for idx, (gt, pd) in enumerate(zip(animal_gts, animal_pds))
142
+ ]
143
+
144
+
145
+ @pytest.fixture
146
+ def classifications_color_example() -> list[Classification]:
134
147
  color_gts = ["white", "white", "red", "blue", "black", "red"]
135
148
  color_pds = [
136
149
  {"white": 0.65, "red": 0.1, "blue": 0.2, "black": 0.05},
@@ -141,29 +154,14 @@ def classifications_two_categories() -> list[Classification]:
141
154
  {"red": 0.9, "white": 0.06, "blue": 0.01, "black": 0.03},
142
155
  ]
143
156
 
144
- joint_gts = zip(animal_gts, color_gts)
145
- joint_pds = [
146
- {
147
- "animal": animal,
148
- "color": color,
149
- }
150
- for animal, color in zip(animal_pds, color_pds)
151
- ]
152
-
153
157
  return [
154
158
  Classification(
155
159
  uid=f"uid{idx}",
156
- groundtruths=[("animal", gt[0]), ("color", gt[1])],
157
- predictions=[
158
- (key, value)
159
- for key, values in pd.items()
160
- for value in values.keys()
161
- ],
162
- scores=[
163
- score for values in pd.values() for score in values.values()
164
- ],
160
+ groundtruth=gt,
161
+ predictions=list(pd.keys()),
162
+ scores=list(pd.values()),
165
163
  )
166
- for idx, (gt, pd) in enumerate(zip(joint_gts, joint_pds))
164
+ for idx, (gt, pd) in enumerate(zip(color_gts, color_pds))
167
165
  ]
168
166
 
169
167
 
@@ -172,36 +170,21 @@ def classifications_image_example() -> list[Classification]:
172
170
  return [
173
171
  Classification(
174
172
  uid="uid5",
175
- groundtruths=[
176
- ("k4", "v4"),
177
- ("k5", "v5"),
178
- ],
173
+ groundtruth="v4",
179
174
  predictions=[
180
- ("k4", "v1"),
181
- ("k4", "v8"),
182
- ("k5", "v1"),
175
+ "v1",
176
+ "v8",
183
177
  ],
184
- scores=[0.47, 0.53, 1.0],
178
+ scores=[0.47, 0.53],
185
179
  ),
186
180
  Classification(
187
181
  uid="uid6",
188
- groundtruths=[
189
- ("k4", "v4"),
190
- ],
191
- predictions=[("k4", "v4"), ("k4", "v5")],
192
- scores=[0.71, 0.29],
193
- ),
194
- Classification(
195
- uid="uid8",
196
- groundtruths=[
197
- ("k3", "v3"),
198
- ],
182
+ groundtruth="v4",
199
183
  predictions=[
200
- ("k3", "v1"),
201
- ],
202
- scores=[
203
- 1.0,
184
+ "v4",
185
+ "v5",
204
186
  ],
187
+ scores=[0.71, 0.29],
205
188
  ),
206
189
  ]
207
190
 
@@ -224,10 +207,8 @@ def classifications_tabular_example() -> list[Classification]:
224
207
  return [
225
208
  Classification(
226
209
  uid=f"uid{i}",
227
- groundtruths=[("class", str(gt_label))],
228
- predictions=[
229
- ("class", str(pd_label)) for pd_label, _ in enumerate(pds)
230
- ],
210
+ groundtruth=str(gt_label),
211
+ predictions=[str(pd_label) for pd_label, _ in enumerate(pds)],
231
212
  scores=pds,
232
213
  )
233
214
  for i, (gt_label, pds) in enumerate(
@@ -236,24 +217,12 @@ def classifications_tabular_example() -> list[Classification]:
236
217
  ]
237
218
 
238
219
 
239
- @pytest.fixture
240
- def classifications_no_groundtruths() -> list[Classification]:
241
- return [
242
- Classification(
243
- uid="uid1",
244
- groundtruths=[],
245
- predictions=[("k1", "v1"), ("k1", "v2")],
246
- scores=[0.8, 0.2],
247
- )
248
- ]
249
-
250
-
251
220
  @pytest.fixture
252
221
  def classifications_no_predictions() -> list[Classification]:
253
222
  return [
254
223
  Classification(
255
224
  uid="uid1",
256
- groundtruths=[("k1", "v1"), ("k2", "v2")],
225
+ groundtruth="v1",
257
226
  predictions=[],
258
227
  scores=[],
259
228
  )
@@ -265,11 +234,11 @@ def classifications_multiclass() -> list[Classification]:
265
234
  return [
266
235
  Classification(
267
236
  uid="uid0",
268
- groundtruths=[("class_label", "cat")],
237
+ groundtruth="cat",
269
238
  predictions=[
270
- ("class_label", "cat"),
271
- ("class_label", "dog"),
272
- ("class_label", "bee"),
239
+ "cat",
240
+ "dog",
241
+ "bee",
273
242
  ],
274
243
  scores=[
275
244
  0.44598543489942505,
@@ -279,11 +248,11 @@ def classifications_multiclass() -> list[Classification]:
279
248
  ),
280
249
  Classification(
281
250
  uid="uid1",
282
- groundtruths=[("class_label", "bee")],
251
+ groundtruth="bee",
283
252
  predictions=[
284
- ("class_label", "cat"),
285
- ("class_label", "dog"),
286
- ("class_label", "bee"),
253
+ "cat",
254
+ "dog",
255
+ "bee",
287
256
  ],
288
257
  scores=[
289
258
  0.4076893257212283,
@@ -293,11 +262,11 @@ def classifications_multiclass() -> list[Classification]:
293
262
  ),
294
263
  Classification(
295
264
  uid="uid2",
296
- groundtruths=[("class_label", "cat")],
265
+ groundtruth="cat",
297
266
  predictions=[
298
- ("class_label", "cat"),
299
- ("class_label", "dog"),
300
- ("class_label", "bee"),
267
+ "cat",
268
+ "dog",
269
+ "bee",
301
270
  ],
302
271
  scores=[
303
272
  0.25060075263871917,
@@ -307,11 +276,11 @@ def classifications_multiclass() -> list[Classification]:
307
276
  ),
308
277
  Classification(
309
278
  uid="uid3",
310
- groundtruths=[("class_label", "bee")],
279
+ groundtruth="bee",
311
280
  predictions=[
312
- ("class_label", "cat"),
313
- ("class_label", "dog"),
314
- ("class_label", "bee"),
281
+ "cat",
282
+ "dog",
283
+ "bee",
315
284
  ],
316
285
  scores=[
317
286
  0.2003514145616792,
@@ -321,11 +290,11 @@ def classifications_multiclass() -> list[Classification]:
321
290
  ),
322
291
  Classification(
323
292
  uid="uid4",
324
- groundtruths=[("class_label", "dog")],
293
+ groundtruth="dog",
325
294
  predictions=[
326
- ("class_label", "cat"),
327
- ("class_label", "dog"),
328
- ("class_label", "bee"),
295
+ "cat",
296
+ "dog",
297
+ "bee",
329
298
  ],
330
299
  scores=[
331
300
  0.33443897813714385,
@@ -337,18 +306,14 @@ def classifications_multiclass() -> list[Classification]:
337
306
 
338
307
 
339
308
  @pytest.fixture
340
- def classifications_multiclass_true_negatives_check() -> list[Classification]:
309
+ def classifications_multiclass_true_negatives_check() -> (
310
+ list[Classification]
311
+ ):
341
312
  return [
342
313
  Classification(
343
314
  uid="uid1",
344
- groundtruths=[("k1", "ant")],
345
- predictions=[("k1", "ant"), ("k1", "bee"), ("k1", "cat")],
346
- scores=[0.15, 0.48, 0.37],
347
- ),
348
- Classification(
349
- uid="uid2",
350
- groundtruths=[("k2", "egg")],
351
- predictions=[("k2", "egg"), ("k2", "milk"), ("k2", "flour")],
315
+ groundtruth="ant",
316
+ predictions=["ant", "bee", "cat"],
352
317
  scores=[0.15, 0.48, 0.37],
353
318
  ),
354
319
  ]
@@ -359,20 +324,8 @@ def classifications_multiclass_zero_count() -> list[Classification]:
359
324
  return [
360
325
  Classification(
361
326
  uid="uid1",
362
- groundtruths=[("k", "ant")],
363
- predictions=[("k", "ant"), ("k", "bee"), ("k", "cat")],
327
+ groundtruth="ant",
328
+ predictions=["ant", "bee", "cat"],
364
329
  scores=[0.15, 0.48, 0.37],
365
330
  )
366
331
  ]
367
-
368
-
369
- @pytest.fixture
370
- def classifications_with_label_key_mismatch() -> list[Classification]:
371
- return [
372
- Classification(
373
- uid="uid0",
374
- groundtruths=[("k1", "V1")],
375
- predictions=[("k2", "v1")],
376
- scores=[1.0],
377
- )
378
- ]
@@ -77,16 +77,12 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
77
77
  "n_groundtruths": 3,
78
78
  "n_predictions": 12,
79
79
  "n_labels": 4,
80
- "ignored_prediction_labels": [
81
- ("class", "1"),
82
- ("class", "2"),
83
- ],
80
+ "ignored_prediction_labels": ["1", "2"],
84
81
  "missing_prediction_labels": [],
85
82
  }
86
83
 
87
84
  metrics = evaluator.evaluate(score_thresholds=[0.25, 0.75], as_dict=True)
88
85
 
89
- # test Accuracy
90
86
  actual_metrics = [m for m in metrics[MetricType.Accuracy]]
91
87
  expected_metrics = [
92
88
  {
@@ -95,7 +91,7 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
95
91
  "parameters": {
96
92
  "score_thresholds": [0.25, 0.75],
97
93
  "hardmax": True,
98
- "label": {"key": "class", "value": "0"},
94
+ "label": "0",
99
95
  },
100
96
  },
101
97
  {
@@ -104,7 +100,7 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
104
100
  "parameters": {
105
101
  "score_thresholds": [0.25, 0.75],
106
102
  "hardmax": True,
107
- "label": {"key": "class", "value": "3"},
103
+ "label": "3",
108
104
  },
109
105
  },
110
106
  ]
@@ -114,17 +110,16 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
114
110
  assert m in actual_metrics
115
111
 
116
112
 
117
- def test_accuracy_with_example(
118
- classifications_two_categories: list[Classification],
113
+ def test_accuracy_with_animal_example(
114
+ classifications_animal_example: list[Classification],
119
115
  ):
120
116
 
121
117
  loader = DataLoader()
122
- loader.add_data(classifications_two_categories)
118
+ loader.add_data(classifications_animal_example)
123
119
  evaluator = loader.finalize()
124
120
 
125
121
  metrics = evaluator.evaluate(score_thresholds=[0.5], as_dict=True)
126
122
 
127
- # test Accuracy
128
123
  actual_metrics = [m for m in metrics[MetricType.Accuracy]]
129
124
  expected_metrics = [
130
125
  {
@@ -133,7 +128,7 @@ def test_accuracy_with_example(
133
128
  "parameters": {
134
129
  "score_thresholds": [0.5],
135
130
  "hardmax": True,
136
- "label": {"key": "animal", "value": "bird"},
131
+ "label": "bird",
137
132
  },
138
133
  },
139
134
  {
@@ -142,7 +137,7 @@ def test_accuracy_with_example(
142
137
  "parameters": {
143
138
  "score_thresholds": [0.5],
144
139
  "hardmax": True,
145
- "label": {"key": "animal", "value": "dog"},
140
+ "label": "dog",
146
141
  },
147
142
  },
148
143
  {
@@ -151,16 +146,35 @@ def test_accuracy_with_example(
151
146
  "parameters": {
152
147
  "score_thresholds": [0.5],
153
148
  "hardmax": True,
154
- "label": {"key": "animal", "value": "cat"},
149
+ "label": "cat",
155
150
  },
156
151
  },
152
+ ]
153
+ for m in actual_metrics:
154
+ assert m in expected_metrics
155
+ for m in expected_metrics:
156
+ assert m in actual_metrics
157
+
158
+
159
+ def test_accuracy_color_example(
160
+ classifications_color_example: list[Classification],
161
+ ):
162
+
163
+ loader = DataLoader()
164
+ loader.add_data(classifications_color_example)
165
+ evaluator = loader.finalize()
166
+
167
+ metrics = evaluator.evaluate(score_thresholds=[0.5], as_dict=True)
168
+
169
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
170
+ expected_metrics = [
157
171
  {
158
172
  "type": "Accuracy",
159
173
  "value": [2 / 3],
160
174
  "parameters": {
161
175
  "score_thresholds": [0.5],
162
176
  "hardmax": True,
163
- "label": {"key": "color", "value": "white"},
177
+ "label": "white",
164
178
  },
165
179
  },
166
180
  {
@@ -169,7 +183,7 @@ def test_accuracy_with_example(
169
183
  "parameters": {
170
184
  "score_thresholds": [0.5],
171
185
  "hardmax": True,
172
- "label": {"key": "color", "value": "red"},
186
+ "label": "red",
173
187
  },
174
188
  },
175
189
  {
@@ -178,7 +192,7 @@ def test_accuracy_with_example(
178
192
  "parameters": {
179
193
  "score_thresholds": [0.5],
180
194
  "hardmax": True,
181
- "label": {"key": "color", "value": "blue"},
195
+ "label": "blue",
182
196
  },
183
197
  },
184
198
  {
@@ -187,7 +201,7 @@ def test_accuracy_with_example(
187
201
  "parameters": {
188
202
  "score_thresholds": [0.5],
189
203
  "hardmax": True,
190
- "label": {"key": "color", "value": "black"},
204
+ "label": "black",
191
205
  },
192
206
  },
193
207
  ]
@@ -205,53 +219,25 @@ def test_accuracy_with_image_example(
205
219
  evaluator = loader.finalize()
206
220
 
207
221
  assert evaluator.metadata == {
208
- "n_datums": 3,
209
- "n_groundtruths": 4,
210
- "n_predictions": 6,
211
- "n_labels": 8,
212
- "ignored_prediction_labels": [
213
- ("k4", "v1"),
214
- ("k4", "v8"),
215
- ("k5", "v1"),
216
- ("k4", "v5"),
217
- ("k3", "v1"),
218
- ],
219
- "missing_prediction_labels": [
220
- ("k5", "v5"),
221
- ("k3", "v3"),
222
- ],
222
+ "n_datums": 2,
223
+ "n_groundtruths": 2,
224
+ "n_predictions": 4,
225
+ "n_labels": 4,
226
+ "ignored_prediction_labels": ["v1", "v8", "v5"],
227
+ "missing_prediction_labels": [],
223
228
  }
224
229
 
225
230
  metrics = evaluator.evaluate(as_dict=True)
226
231
 
227
- # test Accuracy
228
232
  actual_metrics = [m for m in metrics[MetricType.Accuracy]]
229
233
  expected_metrics = [
230
234
  {
231
235
  "type": "Accuracy",
232
- "value": [0.3333333333333333],
233
- "parameters": {
234
- "score_thresholds": [0.0],
235
- "hardmax": True,
236
- "label": {"key": "k4", "value": "v4"},
237
- },
238
- },
239
- {
240
- "type": "Accuracy",
241
- "value": [0.0],
242
- "parameters": {
243
- "score_thresholds": [0.0],
244
- "hardmax": True,
245
- "label": {"key": "k5", "value": "v5"},
246
- },
247
- },
248
- {
249
- "type": "Accuracy",
250
- "value": [0.0],
236
+ "value": [0.5],
251
237
  "parameters": {
252
238
  "score_thresholds": [0.0],
253
239
  "hardmax": True,
254
- "label": {"key": "k3", "value": "v3"},
240
+ "label": "v4",
255
241
  },
256
242
  },
257
243
  ]
@@ -279,7 +265,6 @@ def test_accuracy_with_tabular_example(
279
265
 
280
266
  metrics = evaluator.evaluate(as_dict=True)
281
267
 
282
- # test Accuracy
283
268
  actual_metrics = [m for m in metrics[MetricType.Accuracy]]
284
269
  expected_metrics = [
285
270
  {
@@ -288,7 +273,7 @@ def test_accuracy_with_tabular_example(
288
273
  "parameters": {
289
274
  "score_thresholds": [0.0],
290
275
  "hardmax": True,
291
- "label": {"key": "class", "value": "0"},
276
+ "label": "0",
292
277
  },
293
278
  },
294
279
  {
@@ -297,7 +282,7 @@ def test_accuracy_with_tabular_example(
297
282
  "parameters": {
298
283
  "score_thresholds": [0.0],
299
284
  "hardmax": True,
300
- "label": {"key": "class", "value": "1"},
285
+ "label": "1",
301
286
  },
302
287
  },
303
288
  {
@@ -306,7 +291,7 @@ def test_accuracy_with_tabular_example(
306
291
  "parameters": {
307
292
  "score_thresholds": [0.0],
308
293
  "hardmax": True,
309
- "label": {"key": "class", "value": "2"},
294
+ "label": "2",
310
295
  },
311
296
  },
312
297
  ]