valor-lite 0.33.14__tar.gz → 0.33.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

Files changed (84) hide show
  1. {valor_lite-0.33.14/valor_lite.egg-info → valor_lite-0.33.15}/PKG-INFO +1 -1
  2. valor_lite-0.33.15/tests/object_detection/test_f1.py +470 -0
  3. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_precision.py +38 -0
  4. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_recall.py +38 -0
  5. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/computation.py +5 -2
  6. {valor_lite-0.33.14 → valor_lite-0.33.15/valor_lite.egg-info}/PKG-INFO +1 -1
  7. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/SOURCES.txt +1 -0
  8. {valor_lite-0.33.14 → valor_lite-0.33.15}/LICENSE +0 -0
  9. {valor_lite-0.33.14 → valor_lite-0.33.15}/README.md +0 -0
  10. {valor_lite-0.33.14 → valor_lite-0.33.15}/benchmarks/.gitignore +0 -0
  11. {valor_lite-0.33.14 → valor_lite-0.33.15}/benchmarks/benchmark_classification.py +0 -0
  12. {valor_lite-0.33.14 → valor_lite-0.33.15}/benchmarks/benchmark_objdet.py +0 -0
  13. {valor_lite-0.33.14 → valor_lite-0.33.15}/examples/.gitignore +0 -0
  14. {valor_lite-0.33.14 → valor_lite-0.33.15}/examples/object-detection.ipynb +0 -0
  15. {valor_lite-0.33.14 → valor_lite-0.33.15}/examples/tabular_classification.ipynb +0 -0
  16. {valor_lite-0.33.14 → valor_lite-0.33.15}/pyproject.toml +0 -0
  17. {valor_lite-0.33.14 → valor_lite-0.33.15}/setup.cfg +0 -0
  18. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/__init__.py +0 -0
  19. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/__init__.py +0 -0
  20. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/conftest.py +0 -0
  21. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_accuracy.py +0 -0
  22. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_confusion_matrix.py +0 -0
  23. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_counts.py +0 -0
  24. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_dataloader.py +0 -0
  25. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_evaluator.py +0 -0
  26. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_f1.py +0 -0
  27. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_filtering.py +0 -0
  28. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_precision.py +0 -0
  29. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_recall.py +0 -0
  30. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_rocauc.py +0 -0
  31. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_schemas.py +0 -0
  32. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/classification/test_stability.py +0 -0
  33. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/__init__.py +0 -0
  34. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/conftest.py +0 -0
  35. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_accuracy.py +0 -0
  36. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_average_precision.py +0 -0
  37. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_average_recall.py +0 -0
  38. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_confusion_matrix.py +0 -0
  39. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_counts.py +0 -0
  40. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_dataloader.py +0 -0
  41. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_evaluator.py +0 -0
  42. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_filtering.py +0 -0
  43. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_iou.py +0 -0
  44. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_pr_curve.py +0 -0
  45. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_schemas.py +0 -0
  46. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/object_detection/test_stability.py +0 -0
  47. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/__init__.py +0 -0
  48. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/conftest.py +0 -0
  49. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_accuracy.py +0 -0
  50. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_annotation.py +0 -0
  51. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_confusion_matrix.py +0 -0
  52. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_dataloader.py +0 -0
  53. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_evaluator.py +0 -0
  54. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_f1.py +0 -0
  55. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_filtering.py +0 -0
  56. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_iou.py +0 -0
  57. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_precision.py +0 -0
  58. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_recall.py +0 -0
  59. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/semantic_segmentation/test_stability.py +0 -0
  60. {valor_lite-0.33.14 → valor_lite-0.33.15}/tests/text_generation/__init__.py +0 -0
  61. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/LICENSE +0 -0
  62. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/__init__.py +0 -0
  63. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/__init__.py +0 -0
  64. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/annotation.py +0 -0
  65. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/computation.py +0 -0
  66. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/manager.py +0 -0
  67. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/metric.py +0 -0
  68. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/classification/utilities.py +0 -0
  69. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/__init__.py +0 -0
  70. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/annotation.py +0 -0
  71. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/manager.py +0 -0
  72. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/metric.py +0 -0
  73. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/object_detection/utilities.py +0 -0
  74. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/schemas.py +0 -0
  75. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/__init__.py +0 -0
  76. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/annotation.py +0 -0
  77. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/computation.py +0 -0
  78. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/manager.py +0 -0
  79. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/metric.py +0 -0
  80. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/semantic_segmentation/utilities.py +0 -0
  81. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite/text_generation/__init__.py +0 -0
  82. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/dependency_links.txt +0 -0
  83. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/requires.txt +0 -0
  84. {valor_lite-0.33.14 → valor_lite-0.33.15}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.14
3
+ Version: 0.33.15
4
4
  Summary: Compute valor metrics locally.
5
5
  License: MIT License
6
6
 
@@ -0,0 +1,470 @@
1
+ import numpy as np
2
+ from valor_lite.object_detection import DataLoader, Detection, MetricType
3
+ from valor_lite.object_detection.computation import compute_precion_recall
4
+
5
+
6
+ def test__compute_f1():
7
+
8
+ sorted_pairs = np.array(
9
+ [
10
+ # dt, gt, pd, iou, gl, pl, score,
11
+ [0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
12
+ [0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
13
+ [0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
14
+ [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
15
+ [0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
16
+ ]
17
+ )
18
+
19
+ label_metadata = np.array([[1, 5, 0]])
20
+ iou_thresholds = np.array([0.1, 0.6])
21
+ score_thresholds = np.array([0.0])
22
+
23
+ (_, _, _, counts, _) = compute_precion_recall(
24
+ sorted_pairs,
25
+ label_metadata=label_metadata,
26
+ iou_thresholds=iou_thresholds,
27
+ score_thresholds=score_thresholds,
28
+ )
29
+
30
+ f1 = counts[:, :, :, 5]
31
+
32
+ # f1
33
+ expected = np.array(
34
+ [
35
+ [[1 / 3]], # iou = 0.1
36
+ [[1 / 3]], # iou = 0.6
37
+ ]
38
+ )
39
+ assert np.isclose(f1, expected).all()
40
+
41
+
42
+ def test_f1_metrics_first_class(
43
+ basic_detections_first_class: list[Detection],
44
+ basic_rotated_detections_first_class: list[Detection],
45
+ ):
46
+ """
47
+ Basic object detection test.
48
+
49
+ groundtruths
50
+ datum uid1
51
+ box 1 - label v1 - tp
52
+ box 3 - label v2 - fn missing prediction
53
+ datum uid2
54
+ box 2 - label v1 - fn missing prediction
55
+
56
+ predictions
57
+ datum uid1
58
+ box 1 - label v1 - score 0.3 - tp
59
+ datum uid2
60
+ box 2 - label v2 - score 0.98 - fp
61
+ """
62
+ for input_, method in [
63
+ (basic_detections_first_class, DataLoader.add_bounding_boxes),
64
+ (basic_rotated_detections_first_class, DataLoader.add_polygons),
65
+ ]:
66
+ loader = DataLoader()
67
+ method(loader, input_)
68
+ evaluator = loader.finalize()
69
+
70
+ metrics = evaluator.evaluate(
71
+ iou_thresholds=[0.1, 0.6],
72
+ score_thresholds=[0.0, 0.5],
73
+ )
74
+
75
+ assert evaluator.ignored_prediction_labels == []
76
+ assert evaluator.missing_prediction_labels == []
77
+ assert evaluator.n_datums == 2
78
+ assert evaluator.n_labels == 1
79
+ assert evaluator.n_groundtruths == 2
80
+ assert evaluator.n_predictions == 1
81
+
82
+ # test F1
83
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
84
+ expected_metrics = [
85
+ {
86
+ "type": "F1",
87
+ "value": 2 / 3,
88
+ "parameters": {
89
+ "iou_threshold": 0.1,
90
+ "score_threshold": 0.0,
91
+ "label": "v1",
92
+ },
93
+ },
94
+ {
95
+ "type": "F1",
96
+ "value": 2 / 3,
97
+ "parameters": {
98
+ "iou_threshold": 0.6,
99
+ "score_threshold": 0.0,
100
+ "label": "v1",
101
+ },
102
+ },
103
+ {
104
+ "type": "F1",
105
+ "value": 0.0,
106
+ "parameters": {
107
+ "iou_threshold": 0.1,
108
+ "score_threshold": 0.5,
109
+ "label": "v1",
110
+ },
111
+ },
112
+ {
113
+ "type": "F1",
114
+ "value": 0.0,
115
+ "parameters": {
116
+ "iou_threshold": 0.6,
117
+ "score_threshold": 0.5,
118
+ "label": "v1",
119
+ },
120
+ },
121
+ ]
122
+ for m in actual_metrics:
123
+ assert m in expected_metrics
124
+ for m in expected_metrics:
125
+ assert m in actual_metrics
126
+
127
+
128
+ def test_f1_metrics_second_class(
129
+ basic_detections_second_class: list[Detection],
130
+ basic_rotated_detections_second_class: list[Detection],
131
+ ):
132
+ """
133
+ Basic object detection test.
134
+
135
+ groundtruths
136
+ datum uid1
137
+ box 3 - label v2 - fn missing prediction
138
+ datum uid2
139
+ none
140
+ predictions
141
+ datum uid1
142
+ none
143
+ datum uid2
144
+ box 2 - label v2 - score 0.98 - fp
145
+ """
146
+ for input_, method in [
147
+ (basic_detections_second_class, DataLoader.add_bounding_boxes),
148
+ (basic_rotated_detections_second_class, DataLoader.add_polygons),
149
+ ]:
150
+ loader = DataLoader()
151
+ method(loader, input_)
152
+ evaluator = loader.finalize()
153
+
154
+ metrics = evaluator.evaluate(
155
+ iou_thresholds=[0.1, 0.6],
156
+ score_thresholds=[0.0, 0.5],
157
+ )
158
+
159
+ assert evaluator.ignored_prediction_labels == []
160
+ assert evaluator.missing_prediction_labels == []
161
+ assert evaluator.n_datums == 2
162
+ assert evaluator.n_labels == 1
163
+ assert evaluator.n_groundtruths == 1
164
+ assert evaluator.n_predictions == 1
165
+
166
+ # test F1
167
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
168
+ expected_metrics = [
169
+ {
170
+ "type": "F1",
171
+ "value": 0.0,
172
+ "parameters": {
173
+ "iou_threshold": 0.1,
174
+ "score_threshold": 0.0,
175
+ "label": "v2",
176
+ },
177
+ },
178
+ {
179
+ "type": "F1",
180
+ "value": 0.0,
181
+ "parameters": {
182
+ "iou_threshold": 0.6,
183
+ "score_threshold": 0.0,
184
+ "label": "v2",
185
+ },
186
+ },
187
+ {
188
+ "type": "F1",
189
+ "value": 0.0,
190
+ "parameters": {
191
+ "iou_threshold": 0.1,
192
+ "score_threshold": 0.5,
193
+ "label": "v2",
194
+ },
195
+ },
196
+ {
197
+ "type": "F1",
198
+ "value": 0.0,
199
+ "parameters": {
200
+ "iou_threshold": 0.6,
201
+ "score_threshold": 0.5,
202
+ "label": "v2",
203
+ },
204
+ },
205
+ ]
206
+ for m in actual_metrics:
207
+ assert m in expected_metrics
208
+ for m in expected_metrics:
209
+ assert m in actual_metrics
210
+
211
+
212
+ def test_f1_false_negatives_single_datum_baseline(
213
+ false_negatives_single_datum_baseline_detections: list[Detection],
214
+ ):
215
+ """This is the baseline for the below test. In this case there are two predictions and
216
+ one groundtruth, but the highest confident prediction overlaps sufficiently with the groundtruth
217
+ so there is not a penalty for the false negative so the AP is 1
218
+ """
219
+
220
+ loader = DataLoader()
221
+ loader.add_bounding_boxes(false_negatives_single_datum_baseline_detections)
222
+ evaluator = loader.finalize()
223
+
224
+ metrics = evaluator.evaluate(
225
+ iou_thresholds=[0.5],
226
+ score_thresholds=[0.0, 0.9],
227
+ )
228
+
229
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
230
+ expected_metrics = [
231
+ {
232
+ "type": "F1",
233
+ "value": 2 / 3,
234
+ "parameters": {
235
+ "iou_threshold": 0.5,
236
+ "score_threshold": 0.0,
237
+ "label": "value",
238
+ },
239
+ },
240
+ {
241
+ "type": "F1",
242
+ "value": 0.0,
243
+ "parameters": {
244
+ "iou_threshold": 0.5,
245
+ "score_threshold": 0.9,
246
+ "label": "value",
247
+ },
248
+ },
249
+ ]
250
+ for m in actual_metrics:
251
+ assert m in expected_metrics
252
+ for m in expected_metrics:
253
+ assert m in actual_metrics
254
+
255
+
256
+ def test_f1_false_negatives_single_datum(
257
+ false_negatives_single_datum_detections: list[Detection],
258
+ ):
259
+ """Tests where high confidence false negative was not being penalized. The
260
+ difference between this test and the above is that here the prediction with higher confidence
261
+ does not sufficiently overlap the groundtruth and so is penalized and we get an AP of 0.5
262
+ """
263
+
264
+ loader = DataLoader()
265
+ loader.add_bounding_boxes(false_negatives_single_datum_detections)
266
+ evaluator = loader.finalize()
267
+ metrics = evaluator.evaluate(
268
+ iou_thresholds=[0.5],
269
+ score_thresholds=[0.0],
270
+ )
271
+
272
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
273
+ expected_metrics = [
274
+ {
275
+ "type": "F1",
276
+ "value": 2 / 3,
277
+ "parameters": {
278
+ "iou_threshold": 0.5,
279
+ "score_threshold": 0.0,
280
+ "label": "value",
281
+ },
282
+ }
283
+ ]
284
+ for m in actual_metrics:
285
+ assert m in expected_metrics
286
+ for m in expected_metrics:
287
+ assert m in actual_metrics
288
+
289
+
290
+ def test_f1_false_negatives_two_datums_one_empty_low_confidence_of_fp(
291
+ false_negatives_two_datums_one_empty_low_confidence_of_fp_detections: list[
292
+ Detection
293
+ ],
294
+ ):
295
+ """In this test we have
296
+ 1. An image with a matching groundtruth and prediction (same class and high IOU)
297
+ 2. A second image with empty groundtruth annotation but a prediction with lower confidence
298
+ then the prediction on the first image.
299
+
300
+ In this case, the AP should be 1.0 since the false positive has lower confidence than the true positive
301
+
302
+ """
303
+
304
+ loader = DataLoader()
305
+ loader.add_bounding_boxes(
306
+ false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
307
+ )
308
+ evaluator = loader.finalize()
309
+ metrics = evaluator.evaluate(
310
+ iou_thresholds=[0.5],
311
+ score_thresholds=[0.0],
312
+ )
313
+
314
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
315
+ expected_metrics = [
316
+ {
317
+ "type": "F1",
318
+ "value": 2 / 3,
319
+ "parameters": {
320
+ "iou_threshold": 0.5,
321
+ "score_threshold": 0.0,
322
+ "label": "value",
323
+ },
324
+ }
325
+ ]
326
+ for m in actual_metrics:
327
+ assert m in expected_metrics
328
+ for m in expected_metrics:
329
+ assert m in actual_metrics
330
+
331
+
332
+ def test_f1_false_negatives_two_datums_one_empty_high_confidence_of_fp(
333
+ false_negatives_two_datums_one_empty_high_confidence_of_fp_detections: list[
334
+ Detection
335
+ ],
336
+ ):
337
+ """In this test we have
338
+ 1. An image with a matching groundtruth and prediction (same class and high IOU)
339
+ 2. A second image with empty groundtruth annotation and a prediction with higher confidence
340
+ then the prediction on the first image.
341
+
342
+ In this case, the AP should be 0.5 since the false positive has higher confidence than the true positive
343
+ """
344
+
345
+ loader = DataLoader()
346
+ loader.add_bounding_boxes(
347
+ false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
348
+ )
349
+ evaluator = loader.finalize()
350
+ metrics = evaluator.evaluate(
351
+ iou_thresholds=[0.5],
352
+ score_thresholds=[0.0],
353
+ )
354
+
355
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
356
+ expected_metrics = [
357
+ {
358
+ "type": "F1",
359
+ "value": 2 / 3,
360
+ "parameters": {
361
+ "iou_threshold": 0.5,
362
+ "score_threshold": 0.0,
363
+ "label": "value",
364
+ },
365
+ }
366
+ ]
367
+ for m in actual_metrics:
368
+ assert m in expected_metrics
369
+ for m in expected_metrics:
370
+ assert m in actual_metrics
371
+
372
+
373
+ def test_f1_false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp(
374
+ false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections: list[
375
+ Detection
376
+ ],
377
+ ):
378
+ """In this test we have
379
+ 1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
380
+ 2. A second image with a groundtruth annotation with class `"other value"` and a prediction with lower confidence
381
+ then the prediction on the first image.
382
+
383
+ In this case, the AP for class `"value"` should be 1 since the false positive has lower confidence than the true positive.
384
+ AP for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
385
+ """
386
+ loader = DataLoader()
387
+ loader.add_bounding_boxes(
388
+ false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
389
+ )
390
+ evaluator = loader.finalize()
391
+ metrics = evaluator.evaluate(
392
+ iou_thresholds=[0.5],
393
+ score_thresholds=[0.0],
394
+ )
395
+
396
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
397
+ expected_metrics = [
398
+ {
399
+ "type": "F1",
400
+ "value": 2 / 3,
401
+ "parameters": {
402
+ "iou_threshold": 0.5,
403
+ "score_threshold": 0.0,
404
+ "label": "value",
405
+ },
406
+ },
407
+ {
408
+ "type": "F1",
409
+ "value": 0.0,
410
+ "parameters": {
411
+ "iou_threshold": 0.5,
412
+ "score_threshold": 0.0,
413
+ "label": "other value",
414
+ },
415
+ },
416
+ ]
417
+ for m in actual_metrics:
418
+ assert m in expected_metrics
419
+ for m in expected_metrics:
420
+ assert m in actual_metrics
421
+
422
+
423
+ def test_f1_false_negatives_two_datums_one_only_with_different_class_high_confidence_of_fp(
424
+ false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections: list[
425
+ Detection
426
+ ],
427
+ ):
428
+ """In this test we have
429
+ 1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
430
+ 2. A second image with a groundtruth annotation with class `"other value"` and a prediction with higher confidence
431
+ then the prediction on the first image.
432
+
433
+ In this case, the AP for class `"value"` should be 0.5 since the false positive has higher confidence than the true positive.
434
+ AP for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
435
+ """
436
+ loader = DataLoader()
437
+ loader.add_bounding_boxes(
438
+ false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
439
+ )
440
+ evaluator = loader.finalize()
441
+ metrics = evaluator.evaluate(
442
+ iou_thresholds=[0.5],
443
+ score_thresholds=[0.0],
444
+ )
445
+
446
+ actual_metrics = [m.to_dict() for m in metrics[MetricType.F1]]
447
+ expected_metrics = [
448
+ {
449
+ "type": "F1",
450
+ "value": 2 / 3,
451
+ "parameters": {
452
+ "iou_threshold": 0.5,
453
+ "score_threshold": 0.0,
454
+ "label": "value",
455
+ },
456
+ },
457
+ {
458
+ "type": "F1",
459
+ "value": 0.0,
460
+ "parameters": {
461
+ "iou_threshold": 0.5,
462
+ "score_threshold": 0.0,
463
+ "label": "other value",
464
+ },
465
+ },
466
+ ]
467
+ for m in actual_metrics:
468
+ assert m in expected_metrics
469
+ for m in expected_metrics:
470
+ assert m in actual_metrics
@@ -1,4 +1,42 @@
1
+ import numpy as np
1
2
  from valor_lite.object_detection import DataLoader, Detection, MetricType
3
+ from valor_lite.object_detection.computation import compute_precion_recall
4
+
5
+
6
+ def test__compute_precision():
7
+
8
+ sorted_pairs = np.array(
9
+ [
10
+ # dt, gt, pd, iou, gl, pl, score,
11
+ [0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
12
+ [0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
13
+ [0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
14
+ [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
15
+ [0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
16
+ ]
17
+ )
18
+
19
+ label_metadata = np.array([[1, 5, 0]])
20
+ iou_thresholds = np.array([0.1, 0.6])
21
+ score_thresholds = np.array([0.0])
22
+
23
+ (_, _, _, counts, _) = compute_precion_recall(
24
+ sorted_pairs,
25
+ label_metadata=label_metadata,
26
+ iou_thresholds=iou_thresholds,
27
+ score_thresholds=score_thresholds,
28
+ )
29
+
30
+ precision = counts[:, :, :, 3]
31
+
32
+ # precision
33
+ expected = np.array(
34
+ [
35
+ [0.2], # iou = 0.1
36
+ [0.2], # iou = 0.6
37
+ ]
38
+ )
39
+ assert (precision == expected).all()
2
40
 
3
41
 
4
42
  def test_precision_metrics_first_class(
@@ -1,4 +1,42 @@
1
+ import numpy as np
1
2
  from valor_lite.object_detection import DataLoader, Detection, MetricType
3
+ from valor_lite.object_detection.computation import compute_precion_recall
4
+
5
+
6
+ def test__compute_recall():
7
+
8
+ sorted_pairs = np.array(
9
+ [
10
+ # dt, gt, pd, iou, gl, pl, score,
11
+ [0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
12
+ [0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
13
+ [0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
14
+ [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
15
+ [0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
16
+ ]
17
+ )
18
+
19
+ label_metadata = np.array([[1, 5, 0]])
20
+ iou_thresholds = np.array([0.1, 0.6])
21
+ score_thresholds = np.array([0.0])
22
+
23
+ (_, _, _, counts, _) = compute_precion_recall(
24
+ sorted_pairs,
25
+ label_metadata=label_metadata,
26
+ iou_thresholds=iou_thresholds,
27
+ score_thresholds=score_thresholds,
28
+ )
29
+
30
+ recall = counts[:, :, :, 4]
31
+
32
+ # precision
33
+ expected = np.array(
34
+ [
35
+ [1.0], # iou = 0.1
36
+ [1.0], # iou = 0.6
37
+ ]
38
+ )
39
+ assert (recall == expected).all()
2
40
 
3
41
 
4
42
  def test_recall_metrics_first_class(
@@ -408,17 +408,20 @@ def compute_precion_recall(
408
408
 
409
409
  # calculate component metrics
410
410
  recall = np.zeros_like(tp_count)
411
- precision = np.zeros_like(tp_count)
412
411
  np.divide(tp_count, gt_count, where=gt_count > 1e-9, out=recall)
412
+
413
+ precision = np.zeros_like(tp_count)
413
414
  np.divide(tp_count, pd_count, where=pd_count > 1e-9, out=precision)
415
+
414
416
  fn_count = gt_count - tp_count
415
417
 
416
418
  f1_score = np.zeros_like(precision)
417
419
  np.divide(
418
- np.multiply(precision, recall),
420
+ 2 * np.multiply(precision, recall),
419
421
  (precision + recall),
420
422
  where=(precision + recall) > 1e-9,
421
423
  out=f1_score,
424
+ dtype=np.float64,
422
425
  )
423
426
 
424
427
  counts[iou_idx][score_idx] = np.concatenate(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.14
3
+ Version: 0.33.15
4
4
  Summary: Compute valor metrics locally.
5
5
  License: MIT License
6
6
 
@@ -31,6 +31,7 @@ tests/object_detection/test_confusion_matrix.py
31
31
  tests/object_detection/test_counts.py
32
32
  tests/object_detection/test_dataloader.py
33
33
  tests/object_detection/test_evaluator.py
34
+ tests/object_detection/test_f1.py
34
35
  tests/object_detection/test_filtering.py
35
36
  tests/object_detection/test_iou.py
36
37
  tests/object_detection/test_pr_curve.py
File without changes
File without changes
File without changes