valor-lite 0.33.11__tar.gz → 0.33.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

Files changed (80) hide show
  1. {valor_lite-0.33.11/valor_lite.egg-info → valor_lite-0.33.13}/PKG-INFO +1 -1
  2. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_accuracy.py +7 -90
  3. valor_lite-0.33.13/tests/object_detection/test_accuracy.py +492 -0
  4. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_average_precision.py +1 -1
  5. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_average_recall.py +1 -1
  6. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_pr_curve.py +1 -1
  7. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/computation.py +2 -2
  8. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/manager.py +8 -6
  9. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/metric.py +29 -17
  10. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/computation.py +21 -17
  11. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/manager.py +11 -6
  12. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/metric.py +23 -10
  13. {valor_lite-0.33.11 → valor_lite-0.33.13/valor_lite.egg-info}/PKG-INFO +1 -1
  14. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/SOURCES.txt +1 -0
  15. {valor_lite-0.33.11 → valor_lite-0.33.13}/LICENSE +0 -0
  16. {valor_lite-0.33.11 → valor_lite-0.33.13}/README.md +0 -0
  17. {valor_lite-0.33.11 → valor_lite-0.33.13}/benchmarks/.gitignore +0 -0
  18. {valor_lite-0.33.11 → valor_lite-0.33.13}/benchmarks/benchmark_classification.py +0 -0
  19. {valor_lite-0.33.11 → valor_lite-0.33.13}/benchmarks/benchmark_objdet.py +0 -0
  20. {valor_lite-0.33.11 → valor_lite-0.33.13}/examples/.gitignore +0 -0
  21. {valor_lite-0.33.11 → valor_lite-0.33.13}/examples/object-detection.ipynb +0 -0
  22. {valor_lite-0.33.11 → valor_lite-0.33.13}/examples/tabular_classification.ipynb +0 -0
  23. {valor_lite-0.33.11 → valor_lite-0.33.13}/pyproject.toml +0 -0
  24. {valor_lite-0.33.11 → valor_lite-0.33.13}/setup.cfg +0 -0
  25. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/__init__.py +0 -0
  26. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/__init__.py +0 -0
  27. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/conftest.py +0 -0
  28. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_confusion_matrix.py +0 -0
  29. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_counts.py +0 -0
  30. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_dataloader.py +0 -0
  31. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_evaluator.py +0 -0
  32. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_f1.py +0 -0
  33. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_filtering.py +0 -0
  34. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_precision.py +0 -0
  35. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_recall.py +0 -0
  36. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_rocauc.py +0 -0
  37. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_schemas.py +0 -0
  38. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/classification/test_stability.py +0 -0
  39. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/__init__.py +0 -0
  40. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/conftest.py +0 -0
  41. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_confusion_matrix.py +0 -0
  42. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_counts.py +0 -0
  43. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_dataloader.py +0 -0
  44. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_evaluator.py +0 -0
  45. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_filtering.py +0 -0
  46. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_iou.py +0 -0
  47. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_precision.py +0 -0
  48. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_recall.py +0 -0
  49. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_schemas.py +0 -0
  50. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/object_detection/test_stability.py +0 -0
  51. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/__init__.py +0 -0
  52. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/conftest.py +0 -0
  53. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_accuracy.py +0 -0
  54. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_annotation.py +0 -0
  55. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_confusion_matrix.py +0 -0
  56. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_dataloader.py +0 -0
  57. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_evaluator.py +0 -0
  58. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_f1.py +0 -0
  59. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_filtering.py +0 -0
  60. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_iou.py +0 -0
  61. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_precision.py +0 -0
  62. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_recall.py +0 -0
  63. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/semantic_segmentation/test_stability.py +0 -0
  64. {valor_lite-0.33.11 → valor_lite-0.33.13}/tests/text_generation/__init__.py +0 -0
  65. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/LICENSE +0 -0
  66. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/__init__.py +0 -0
  67. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/__init__.py +0 -0
  68. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/classification/annotation.py +0 -0
  69. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/__init__.py +0 -0
  70. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/object_detection/annotation.py +0 -0
  71. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/schemas.py +0 -0
  72. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/__init__.py +0 -0
  73. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/annotation.py +0 -0
  74. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/computation.py +0 -0
  75. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/manager.py +0 -0
  76. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/semantic_segmentation/metric.py +0 -0
  77. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite/text_generation/__init__.py +0 -0
  78. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/dependency_links.txt +0 -0
  79. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/requires.txt +0 -0
  80. {valor_lite-0.33.11 → valor_lite-0.33.13}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.11
3
+ Version: 0.33.13
4
4
  Summary: Compute valor metrics locally.
5
5
  License: MIT License
6
6
 
@@ -53,18 +53,12 @@ def test_accuracy_computation():
53
53
  )
54
54
 
55
55
  # score threshold, label, count metric
56
- assert accuracy.shape == (2, 4)
56
+ assert accuracy.shape == (2,)
57
57
 
58
58
  # score >= 0.25
59
- assert accuracy[0][0] == 2 / 3
60
- assert accuracy[0][1] == 1.0
61
- assert accuracy[0][2] == 2 / 3
62
- assert accuracy[0][3] == 1.0
59
+ assert accuracy[0] == 2 / 3
63
60
  # score >= 0.75
64
- assert accuracy[1][0] == 2 / 3
65
- assert accuracy[1][1] == 1.0
66
- assert accuracy[1][2] == 2 / 3
67
- assert accuracy[1][3] == 2 / 3
61
+ assert accuracy[1] == 1 / 3
68
62
 
69
63
 
70
64
  def test_accuracy_basic(basic_classifications: list[Classification]):
@@ -87,20 +81,10 @@ def test_accuracy_basic(basic_classifications: list[Classification]):
87
81
  expected_metrics = [
88
82
  {
89
83
  "type": "Accuracy",
90
- "value": [2 / 3, 2 / 3],
84
+ "value": [2 / 3, 1 / 3],
91
85
  "parameters": {
92
86
  "score_thresholds": [0.25, 0.75],
93
87
  "hardmax": True,
94
- "label": "0",
95
- },
96
- },
97
- {
98
- "type": "Accuracy",
99
- "value": [1.0, 2 / 3],
100
- "parameters": {
101
- "score_thresholds": [0.25, 0.75],
102
- "hardmax": True,
103
- "label": "3",
104
88
  },
105
89
  },
106
90
  ]
@@ -124,29 +108,10 @@ def test_accuracy_with_animal_example(
124
108
  expected_metrics = [
125
109
  {
126
110
  "type": "Accuracy",
127
- "value": [2.0 / 3.0],
128
- "parameters": {
129
- "score_thresholds": [0.5],
130
- "hardmax": True,
131
- "label": "bird",
132
- },
133
- },
134
- {
135
- "type": "Accuracy",
136
- "value": [0.5],
111
+ "value": [2.0 / 6.0],
137
112
  "parameters": {
138
113
  "score_thresholds": [0.5],
139
114
  "hardmax": True,
140
- "label": "dog",
141
- },
142
- },
143
- {
144
- "type": "Accuracy",
145
- "value": [2 / 3],
146
- "parameters": {
147
- "score_thresholds": [0.5],
148
- "hardmax": True,
149
- "label": "cat",
150
115
  },
151
116
  },
152
117
  ]
@@ -170,38 +135,10 @@ def test_accuracy_color_example(
170
135
  expected_metrics = [
171
136
  {
172
137
  "type": "Accuracy",
173
- "value": [2 / 3],
174
- "parameters": {
175
- "score_thresholds": [0.5],
176
- "hardmax": True,
177
- "label": "white",
178
- },
179
- },
180
- {
181
- "type": "Accuracy",
182
- "value": [2 / 3],
138
+ "value": [2 / 6],
183
139
  "parameters": {
184
140
  "score_thresholds": [0.5],
185
141
  "hardmax": True,
186
- "label": "red",
187
- },
188
- },
189
- {
190
- "type": "Accuracy",
191
- "value": [2 / 3],
192
- "parameters": {
193
- "score_thresholds": [0.5],
194
- "hardmax": True,
195
- "label": "blue",
196
- },
197
- },
198
- {
199
- "type": "Accuracy",
200
- "value": [5 / 6],
201
- "parameters": {
202
- "score_thresholds": [0.5],
203
- "hardmax": True,
204
- "label": "black",
205
142
  },
206
143
  },
207
144
  ]
@@ -237,7 +174,6 @@ def test_accuracy_with_image_example(
237
174
  "parameters": {
238
175
  "score_thresholds": [0.0],
239
176
  "hardmax": True,
240
- "label": "v4",
241
177
  },
242
178
  },
243
179
  ]
@@ -269,29 +205,10 @@ def test_accuracy_with_tabular_example(
269
205
  expected_metrics = [
270
206
  {
271
207
  "type": "Accuracy",
272
- "value": [0.7],
273
- "parameters": {
274
- "score_thresholds": [0.0],
275
- "hardmax": True,
276
- "label": "0",
277
- },
278
- },
279
- {
280
- "type": "Accuracy",
281
- "value": [0.5],
282
- "parameters": {
283
- "score_thresholds": [0.0],
284
- "hardmax": True,
285
- "label": "1",
286
- },
287
- },
288
- {
289
- "type": "Accuracy",
290
- "value": [0.8],
208
+ "value": [5 / 10],
291
209
  "parameters": {
292
210
  "score_thresholds": [0.0],
293
211
  "hardmax": True,
294
- "label": "2",
295
212
  },
296
213
  },
297
214
  ]
@@ -0,0 +1,492 @@
1
+ import numpy as np
2
+ from valor_lite.object_detection import DataLoader, Detection, MetricType
3
+ from valor_lite.object_detection.computation import compute_metrics
4
+
5
+
6
+ def test__compute_average_precision():
7
+
8
+ sorted_pairs = np.array(
9
+ [
10
+ # dt, gt, pd, iou, gl, pl, score,
11
+ [0.0, 0.0, 2.0, 0.25, 0.0, 0.0, 0.95],
12
+ [0.0, 0.0, 3.0, 0.33333, 0.0, 0.0, 0.9],
13
+ [0.0, 0.0, 4.0, 0.66667, 0.0, 0.0, 0.65],
14
+ [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.1],
15
+ [0.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.01],
16
+ ]
17
+ )
18
+
19
+ label_metadata = np.array([[1, 5, 0]])
20
+ iou_thresholds = np.array([0.1, 0.6])
21
+ score_thresholds = np.array([0.0])
22
+
23
+ (_, _, accuracy, _, _) = compute_metrics(
24
+ sorted_pairs,
25
+ label_metadata=label_metadata,
26
+ iou_thresholds=iou_thresholds,
27
+ score_thresholds=score_thresholds,
28
+ )
29
+
30
+ expected = np.array(
31
+ [
32
+ [0.2], # iou = 0.1
33
+ [0.2], # iou = 0.6
34
+ ]
35
+ )
36
+ assert (accuracy == expected).all()
37
+
38
+
39
+ def test_ap_using_torch_metrics_example(
40
+ torchmetrics_detections: list[Detection],
41
+ ):
42
+ """
43
+ cf with torch metrics/pycocotools results listed here:
44
+ https://github.com/Lightning-AI/metrics/blob/107dbfd5fb158b7ae6d76281df44bd94c836bfce/tests/unittests/detection/test_map.py#L231
45
+ """
46
+
47
+ loader = DataLoader()
48
+ loader.add_bounding_boxes(torchmetrics_detections)
49
+ evaluator = loader.finalize()
50
+
51
+ assert evaluator.ignored_prediction_labels == ["3"]
52
+ assert evaluator.missing_prediction_labels == []
53
+ assert evaluator.n_datums == 4
54
+ assert evaluator.n_labels == 6
55
+ assert evaluator.n_groundtruths == 20
56
+ assert evaluator.n_predictions == 19
57
+
58
+ metrics = evaluator.evaluate(
59
+ iou_thresholds=[0.5, 0.75],
60
+ as_dict=True,
61
+ )
62
+
63
+ # test Accuracy
64
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
65
+ expected_metrics = [
66
+ {
67
+ "type": "Accuracy",
68
+ "value": 9 / 19,
69
+ "parameters": {
70
+ "iou_threshold": 0.5,
71
+ "score_threshold": 0.5,
72
+ },
73
+ },
74
+ {
75
+ "type": "Accuracy",
76
+ "value": 8 / 19,
77
+ "parameters": {
78
+ "iou_threshold": 0.75,
79
+ "score_threshold": 0.5,
80
+ },
81
+ },
82
+ ]
83
+ for m in actual_metrics:
84
+ assert m in expected_metrics
85
+ for m in expected_metrics:
86
+ assert m in actual_metrics
87
+
88
+
89
+ def test_accuracy_metrics_first_class(
90
+ basic_detections_first_class: list[Detection],
91
+ basic_rotated_detections_first_class: list[Detection],
92
+ ):
93
+ """
94
+ Basic object detection test.
95
+
96
+ groundtruths
97
+ datum uid1
98
+ box 1 - label v1 - tp
99
+ box 3 - label v2 - fn missing prediction
100
+ datum uid2
101
+ box 2 - label v1 - fn missing prediction
102
+
103
+ predictions
104
+ datum uid1
105
+ box 1 - label v1 - score 0.3 - tp
106
+ datum uid2
107
+ box 2 - label v2 - score 0.98 - fp
108
+ """
109
+ for input_, method in [
110
+ (basic_detections_first_class, DataLoader.add_bounding_boxes),
111
+ (basic_rotated_detections_first_class, DataLoader.add_polygons),
112
+ ]:
113
+ loader = DataLoader()
114
+ method(loader, input_)
115
+ evaluator = loader.finalize()
116
+
117
+ metrics = evaluator.evaluate(
118
+ iou_thresholds=[0.1, 0.6],
119
+ score_thresholds=[0.0, 0.5],
120
+ as_dict=True,
121
+ )
122
+
123
+ assert evaluator.ignored_prediction_labels == []
124
+ assert evaluator.missing_prediction_labels == []
125
+ assert evaluator.n_datums == 2
126
+ assert evaluator.n_labels == 1
127
+ assert evaluator.n_groundtruths == 2
128
+ assert evaluator.n_predictions == 1
129
+
130
+ # test Accuracy
131
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
132
+ expected_metrics = [
133
+ {
134
+ "type": "Accuracy",
135
+ "value": 1.0,
136
+ "parameters": {
137
+ "iou_threshold": 0.1,
138
+ "score_threshold": 0.0,
139
+ },
140
+ },
141
+ {
142
+ "type": "Accuracy",
143
+ "value": 1.0,
144
+ "parameters": {
145
+ "iou_threshold": 0.6,
146
+ "score_threshold": 0.0,
147
+ },
148
+ },
149
+ {
150
+ "type": "Accuracy",
151
+ "value": 0.0,
152
+ "parameters": {
153
+ "iou_threshold": 0.1,
154
+ "score_threshold": 0.5,
155
+ },
156
+ },
157
+ {
158
+ "type": "Accuracy",
159
+ "value": 0.0,
160
+ "parameters": {
161
+ "iou_threshold": 0.6,
162
+ "score_threshold": 0.5,
163
+ },
164
+ },
165
+ ]
166
+ for m in actual_metrics:
167
+ assert m in expected_metrics
168
+ for m in expected_metrics:
169
+ assert m in actual_metrics
170
+
171
+
172
+ def test_accuracy_metrics_second_class(
173
+ basic_detections_second_class: list[Detection],
174
+ basic_rotated_detections_second_class: list[Detection],
175
+ ):
176
+ """
177
+ Basic object detection test.
178
+
179
+ groundtruths
180
+ datum uid1
181
+ box 3 - label v2 - fn missing prediction
182
+ datum uid2
183
+ none
184
+ predictions
185
+ datum uid1
186
+ none
187
+ datum uid2
188
+ box 2 - label v2 - score 0.98 - fp
189
+ """
190
+ for input_, method in [
191
+ (basic_detections_second_class, DataLoader.add_bounding_boxes),
192
+ (basic_rotated_detections_second_class, DataLoader.add_polygons),
193
+ ]:
194
+ loader = DataLoader()
195
+ method(loader, input_)
196
+ evaluator = loader.finalize()
197
+
198
+ metrics = evaluator.evaluate(
199
+ iou_thresholds=[0.1, 0.6],
200
+ score_thresholds=[0.0, 0.5],
201
+ as_dict=True,
202
+ )
203
+
204
+ assert evaluator.ignored_prediction_labels == []
205
+ assert evaluator.missing_prediction_labels == []
206
+ assert evaluator.n_datums == 2
207
+ assert evaluator.n_labels == 1
208
+ assert evaluator.n_groundtruths == 1
209
+ assert evaluator.n_predictions == 1
210
+
211
+ # test Accuracy
212
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
213
+ expected_metrics = [
214
+ {
215
+ "type": "Accuracy",
216
+ "value": 0.0,
217
+ "parameters": {
218
+ "iou_threshold": 0.1,
219
+ "score_threshold": 0.0,
220
+ },
221
+ },
222
+ {
223
+ "type": "Accuracy",
224
+ "value": 0.0,
225
+ "parameters": {
226
+ "iou_threshold": 0.6,
227
+ "score_threshold": 0.0,
228
+ },
229
+ },
230
+ {
231
+ "type": "Accuracy",
232
+ "value": 0.0,
233
+ "parameters": {
234
+ "iou_threshold": 0.1,
235
+ "score_threshold": 0.5,
236
+ },
237
+ },
238
+ {
239
+ "type": "Accuracy",
240
+ "value": 0.0,
241
+ "parameters": {
242
+ "iou_threshold": 0.6,
243
+ "score_threshold": 0.5,
244
+ },
245
+ },
246
+ ]
247
+ for m in actual_metrics:
248
+ assert m in expected_metrics
249
+ for m in expected_metrics:
250
+ assert m in actual_metrics
251
+
252
+
253
+ def test_accuracy_false_negatives_single_datum_baseline(
254
+ false_negatives_single_datum_baseline_detections: list[Detection],
255
+ ):
256
+ """This is the baseline for the below test. In this case there are two predictions and
257
+ one groundtruth, but the highest confident prediction overlaps sufficiently with the groundtruth
258
+ so there is not a penalty for the false negative so the Accuracy is 1
259
+ """
260
+
261
+ loader = DataLoader()
262
+ loader.add_bounding_boxes(false_negatives_single_datum_baseline_detections)
263
+ evaluator = loader.finalize()
264
+
265
+ metrics = evaluator.evaluate(
266
+ iou_thresholds=[0.5],
267
+ score_thresholds=[0.0, 0.9],
268
+ as_dict=True,
269
+ )
270
+
271
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
272
+ expected_metrics = [
273
+ {
274
+ "type": "Accuracy",
275
+ "value": 0.5,
276
+ "parameters": {
277
+ "iou_threshold": 0.5,
278
+ "score_threshold": 0.0,
279
+ },
280
+ },
281
+ {
282
+ "type": "Accuracy",
283
+ "value": 0.0,
284
+ "parameters": {
285
+ "iou_threshold": 0.5,
286
+ "score_threshold": 0.9,
287
+ },
288
+ },
289
+ ]
290
+ for m in actual_metrics:
291
+ assert m in expected_metrics
292
+ for m in expected_metrics:
293
+ assert m in actual_metrics
294
+
295
+
296
+ def test_accuracy_false_negatives_single_datum(
297
+ false_negatives_single_datum_detections: list[Detection],
298
+ ):
299
+ """Tests where high confidence false negative was not being penalized. The
300
+ difference between this test and the above is that here the prediction with higher confidence
301
+ does not sufficiently overlap the groundtruth and so is penalized and we get an Accuracy of 0.5
302
+ """
303
+
304
+ loader = DataLoader()
305
+ loader.add_bounding_boxes(false_negatives_single_datum_detections)
306
+ evaluator = loader.finalize()
307
+ metrics = evaluator.evaluate(
308
+ iou_thresholds=[0.5],
309
+ score_thresholds=[0.0],
310
+ as_dict=True,
311
+ )
312
+
313
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
314
+ expected_metrics = [
315
+ {
316
+ "type": "Accuracy",
317
+ "value": 0.5,
318
+ "parameters": {
319
+ "iou_threshold": 0.5,
320
+ "score_threshold": 0.0,
321
+ },
322
+ }
323
+ ]
324
+ for m in actual_metrics:
325
+ assert m in expected_metrics
326
+ for m in expected_metrics:
327
+ assert m in actual_metrics
328
+
329
+
330
+ def test_accuracy_false_negatives_two_datums_one_empty_low_confidence_of_fp(
331
+ false_negatives_two_datums_one_empty_low_confidence_of_fp_detections: list[
332
+ Detection
333
+ ],
334
+ ):
335
+ """In this test we have
336
+ 1. An image with a matching groundtruth and prediction (same class and high IOU)
337
+ 2. A second image with empty groundtruth annotation but a prediction with lower confidence
338
+ then the prediction on the first image.
339
+
340
+ In this case, the Accuracy should be 1.0 since the false positive has lower confidence than the true positive
341
+
342
+ """
343
+
344
+ loader = DataLoader()
345
+ loader.add_bounding_boxes(
346
+ false_negatives_two_datums_one_empty_low_confidence_of_fp_detections
347
+ )
348
+ evaluator = loader.finalize()
349
+ metrics = evaluator.evaluate(
350
+ iou_thresholds=[0.5],
351
+ score_thresholds=[0.0],
352
+ as_dict=True,
353
+ )
354
+
355
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
356
+ expected_metrics = [
357
+ {
358
+ "type": "Accuracy",
359
+ "value": 0.5,
360
+ "parameters": {
361
+ "iou_threshold": 0.5,
362
+ "score_threshold": 0.0,
363
+ },
364
+ }
365
+ ]
366
+ for m in actual_metrics:
367
+ assert m in expected_metrics
368
+ for m in expected_metrics:
369
+ assert m in actual_metrics
370
+
371
+
372
+ def test_accuracy_false_negatives_two_datums_one_empty_high_confidence_of_fp(
373
+ false_negatives_two_datums_one_empty_high_confidence_of_fp_detections: list[
374
+ Detection
375
+ ],
376
+ ):
377
+ """In this test we have
378
+ 1. An image with a matching groundtruth and prediction (same class and high IOU)
379
+ 2. A second image with empty groundtruth annotation and a prediction with higher confidence
380
+ then the prediction on the first image.
381
+
382
+ In this case, the Accuracy should be 0.5 since the false positive has higher confidence than the true positive
383
+ """
384
+
385
+ loader = DataLoader()
386
+ loader.add_bounding_boxes(
387
+ false_negatives_two_datums_one_empty_high_confidence_of_fp_detections
388
+ )
389
+ evaluator = loader.finalize()
390
+ metrics = evaluator.evaluate(
391
+ iou_thresholds=[0.5],
392
+ score_thresholds=[0.0],
393
+ as_dict=True,
394
+ )
395
+
396
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
397
+ expected_metrics = [
398
+ {
399
+ "type": "Accuracy",
400
+ "value": 0.5,
401
+ "parameters": {
402
+ "iou_threshold": 0.5,
403
+ "score_threshold": 0.0,
404
+ },
405
+ }
406
+ ]
407
+ for m in actual_metrics:
408
+ assert m in expected_metrics
409
+ for m in expected_metrics:
410
+ assert m in actual_metrics
411
+
412
+
413
+ def test_accuracy_false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp(
414
+ false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections: list[
415
+ Detection
416
+ ],
417
+ ):
418
+ """In this test we have
419
+ 1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
420
+ 2. A second image with a groundtruth annotation with class `"other value"` and a prediction with lower confidence
421
+ then the prediction on the first image.
422
+
423
+ In this case, the Accuracy for class `"value"` should be 1 since the false positive has lower confidence than the true positive.
424
+ Accuracy for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
425
+ """
426
+ loader = DataLoader()
427
+ loader.add_bounding_boxes(
428
+ false_negatives_two_datums_one_only_with_different_class_low_confidence_of_fp_detections
429
+ )
430
+ evaluator = loader.finalize()
431
+ metrics = evaluator.evaluate(
432
+ iou_thresholds=[0.5],
433
+ score_thresholds=[0.0],
434
+ as_dict=True,
435
+ )
436
+
437
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
438
+ expected_metrics = [
439
+ {
440
+ "type": "Accuracy",
441
+ "value": 0.5,
442
+ "parameters": {
443
+ "iou_threshold": 0.5,
444
+ "score_threshold": 0.0,
445
+ },
446
+ },
447
+ ]
448
+ for m in actual_metrics:
449
+ assert m in expected_metrics
450
+ for m in expected_metrics:
451
+ assert m in actual_metrics
452
+
453
+
454
+ def test_accuracy_false_negatives_two_datums_one_only_with_different_class_high_confidence_of_fp(
455
+ false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections: list[
456
+ Detection
457
+ ],
458
+ ):
459
+ """In this test we have
460
+ 1. An image with a matching groundtruth and prediction (same class, `"value"`, and high IOU)
461
+ 2. A second image with a groundtruth annotation with class `"other value"` and a prediction with higher confidence
462
+ then the prediction on the first image.
463
+
464
+ In this case, the Accuracy for class `"value"` should be 0.5 since the false positive has higher confidence than the true positive.
465
+ Accuracy for class `"other value"` should be 0 since there is no prediction for the `"other value"` groundtruth
466
+ """
467
+ loader = DataLoader()
468
+ loader.add_bounding_boxes(
469
+ false_negatives_two_images_one_only_with_different_class_high_confidence_of_fp_detections
470
+ )
471
+ evaluator = loader.finalize()
472
+ metrics = evaluator.evaluate(
473
+ iou_thresholds=[0.5],
474
+ score_thresholds=[0.0],
475
+ as_dict=True,
476
+ )
477
+
478
+ actual_metrics = [m for m in metrics[MetricType.Accuracy]]
479
+ expected_metrics = [
480
+ {
481
+ "type": "Accuracy",
482
+ "value": 0.5,
483
+ "parameters": {
484
+ "iou_threshold": 0.5,
485
+ "score_threshold": 0.0,
486
+ },
487
+ },
488
+ ]
489
+ for m in actual_metrics:
490
+ assert m in expected_metrics
491
+ for m in expected_metrics:
492
+ assert m in actual_metrics
@@ -24,7 +24,7 @@ def test__compute_average_precision():
24
24
  iou_thresholds = np.array([0.1, 0.6])
25
25
  score_thresholds = np.array([0.0])
26
26
 
27
- (results, _, _, _,) = compute_metrics(
27
+ (results, _, _, _, _) = compute_metrics(
28
28
  sorted_pairs,
29
29
  label_metadata=label_metadata,
30
30
  iou_thresholds=iou_thresholds,
@@ -25,7 +25,7 @@ def test__compute_average_recall():
25
25
  iou_thresholds = np.array([0.1, 0.6])
26
26
  score_thresholds = np.array([0.5, 0.93, 0.98])
27
27
 
28
- (_, results, _, _,) = compute_metrics(
28
+ (_, results, _, _, _,) = compute_metrics(
29
29
  sorted_pairs,
30
30
  label_metadata=label_metadata,
31
31
  iou_thresholds=iou_thresholds,
@@ -24,7 +24,7 @@ def test_pr_curve_simple():
24
24
  iou_thresholds = np.array([0.1, 0.6])
25
25
  score_thresholds = np.array([0.0])
26
26
 
27
- (_, _, _, pr_curve) = compute_metrics(
27
+ (_, _, _, _, pr_curve) = compute_metrics(
28
28
  sorted_pairs,
29
29
  label_metadata=label_metadata,
30
30
  iou_thresholds=iou_thresholds,
@@ -182,9 +182,9 @@ def compute_metrics(
182
182
  out=precision,
183
183
  )
184
184
 
185
- accuracy = np.zeros_like(recall)
185
+ accuracy = np.zeros(n_scores, dtype=np.float64)
186
186
  np.divide(
187
- (counts[:, :, 0] + counts[:, :, 3]),
187
+ counts[:, :, 0].sum(axis=1),
188
188
  float(n_datums),
189
189
  out=accuracy,
190
190
  )
@@ -367,6 +367,14 @@ class Evaluator:
367
367
  )
368
368
  ]
369
369
 
370
+ metrics[MetricType.Accuracy] = [
371
+ Accuracy(
372
+ value=accuracy.astype(float).tolist(),
373
+ score_thresholds=score_thresholds,
374
+ hardmax=hardmax,
375
+ )
376
+ ]
377
+
370
378
  for label_idx, label in self.index_to_label.items():
371
379
 
372
380
  kwargs = {
@@ -401,12 +409,6 @@ class Evaluator:
401
409
  **kwargs,
402
410
  )
403
411
  )
404
- metrics[MetricType.Accuracy].append(
405
- Accuracy(
406
- value=accuracy[:, label_idx].astype(float).tolist(),
407
- **kwargs,
408
- )
409
- )
410
412
  metrics[MetricType.F1].append(
411
413
  F1(
412
414
  value=f1_score[:, label_idx].astype(float).tolist(),
@@ -158,24 +158,23 @@ class Recall(_ThresholdValue):
158
158
  pass
159
159
 
160
160
 
161
- class Accuracy(_ThresholdValue):
161
+ class F1(_ThresholdValue):
162
162
  """
163
- Accuracy metric for a specific class label.
163
+ F1 score for a specific class label.
164
164
 
165
- This class calculates the accuracy at various score thresholds for a binary
166
- classification task. Accuracy is defined as the ratio of the sum of true positives and
167
- true negatives over all predictions.
165
+ This class calculates the F1 score at various score thresholds for a binary
166
+ classification task.
168
167
 
169
168
  Attributes
170
169
  ----------
171
170
  value : list[float]
172
- Accuracy values computed at each score threshold.
171
+ F1 scores computed at each score threshold.
173
172
  score_thresholds : list[float]
174
- Score thresholds at which the accuracy values are computed.
173
+ Score thresholds at which the F1 scores are computed.
175
174
  hardmax : bool
176
175
  Indicates whether hardmax thresholding was used.
177
176
  label : str
178
- The class label for which the accuracy is computed.
177
+ The class label for which the F1 score is computed.
179
178
 
180
179
  Methods
181
180
  -------
@@ -188,23 +187,21 @@ class Accuracy(_ThresholdValue):
188
187
  pass
189
188
 
190
189
 
191
- class F1(_ThresholdValue):
190
+ @dataclass
191
+ class Accuracy:
192
192
  """
193
- F1 score for a specific class label.
193
+ Multiclass accuracy metric.
194
194
 
195
- This class calculates the F1 score at various score thresholds for a binary
196
- classification task.
195
+ This class calculates the accuracy at various score thresholds.
197
196
 
198
197
  Attributes
199
198
  ----------
200
199
  value : list[float]
201
- F1 scores computed at each score threshold.
200
+ Accuracy values computed at each score threshold.
202
201
  score_thresholds : list[float]
203
- Score thresholds at which the F1 scores are computed.
202
+ Score thresholds at which the accuracy values are computed.
204
203
  hardmax : bool
205
204
  Indicates whether hardmax thresholding was used.
206
- label : str
207
- The class label for which the F1 score is computed.
208
205
 
209
206
  Methods
210
207
  -------
@@ -214,7 +211,22 @@ class F1(_ThresholdValue):
214
211
  Converts the instance to a dictionary representation.
215
212
  """
216
213
 
217
- pass
214
+ value: list[float]
215
+ score_thresholds: list[float]
216
+ hardmax: bool
217
+
218
+ def to_metric(self) -> Metric:
219
+ return Metric(
220
+ type=type(self).__name__,
221
+ value=self.value,
222
+ parameters={
223
+ "score_thresholds": self.score_thresholds,
224
+ "hardmax": self.hardmax,
225
+ },
226
+ )
227
+
228
+ def to_dict(self) -> dict:
229
+ return self.to_metric().to_dict()
218
230
 
219
231
 
220
232
  @dataclass
@@ -282,6 +282,7 @@ def compute_metrics(
282
282
  ],
283
283
  NDArray[np.float64],
284
284
  NDArray[np.float64],
285
+ NDArray[np.float64],
285
286
  ]:
286
287
  """
287
288
  Computes Object Detection metrics.
@@ -309,13 +310,15 @@ def compute_metrics(
309
310
 
310
311
  Returns
311
312
  -------
312
- tuple[NDArray, NDArray, NDArray, float]
313
+ tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64], float]
313
314
  Average Precision results.
314
- tuple[NDArray, NDArray, NDArray, float]
315
+ tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64], float]
315
316
  Average Recall results.
316
- np.ndarray
317
- Precision, Recall, TP, FP, FN, F1 Score, Accuracy.
318
- np.ndarray
317
+ NDArray[np.float64]
318
+ Accuracy.
319
+ NDArray[np.float64]
320
+ Precision, Recall, TP, FP, FN, F1 Score.
321
+ NDArray[np.float64]
319
322
  Interpolated Precision-Recall Curves.
320
323
  """
321
324
 
@@ -329,9 +332,10 @@ def compute_metrics(
329
332
  elif n_scores == 0:
330
333
  raise ValueError("At least one score threshold must be passed.")
331
334
 
332
- average_precision = np.zeros((n_ious, n_labels))
333
- average_recall = np.zeros((n_scores, n_labels))
334
- counts = np.zeros((n_ious, n_scores, n_labels, 7))
335
+ average_precision = np.zeros((n_ious, n_labels), dtype=np.float64)
336
+ average_recall = np.zeros((n_scores, n_labels), dtype=np.float64)
337
+ accuracy = np.zeros((n_ious, n_scores), dtype=np.float64)
338
+ counts = np.zeros((n_ious, n_scores, n_labels, 6), dtype=np.float64)
335
339
 
336
340
  pd_labels = data[:, 5].astype(np.int32)
337
341
  scores = data[:, 6]
@@ -417,14 +421,6 @@ def compute_metrics(
417
421
  out=f1_score,
418
422
  )
419
423
 
420
- accuracy = np.zeros_like(tp_count)
421
- np.divide(
422
- tp_count,
423
- (gt_count + pd_count),
424
- where=(gt_count + pd_count) > 1e-9,
425
- out=accuracy,
426
- )
427
-
428
424
  counts[iou_idx][score_idx] = np.concatenate(
429
425
  (
430
426
  tp_count[:, np.newaxis],
@@ -433,11 +429,18 @@ def compute_metrics(
433
429
  precision[:, np.newaxis],
434
430
  recall[:, np.newaxis],
435
431
  f1_score[:, np.newaxis],
436
- accuracy[:, np.newaxis],
437
432
  ),
438
433
  axis=1,
439
434
  )
440
435
 
436
+ # caluculate accuracy
437
+ total_pd_count = label_metadata[:, 1].sum()
438
+ accuracy[iou_idx, score_idx] = (
439
+ (tp_count.sum() / total_pd_count)
440
+ if total_pd_count > 1e-9
441
+ else 0.0
442
+ )
443
+
441
444
  # calculate recall for AR
442
445
  average_recall[score_idx] += recall
443
446
 
@@ -552,6 +555,7 @@ def compute_metrics(
552
555
  return (
553
556
  ap_results,
554
557
  ar_results,
558
+ accuracy,
555
559
  counts,
556
560
  pr_curve,
557
561
  )
@@ -506,6 +506,7 @@ class Evaluator:
506
506
  average_recall_averaged_over_scores,
507
507
  mean_average_recall_averaged_over_scores,
508
508
  ),
509
+ accuracy,
509
510
  precision_recall,
510
511
  pr_curves,
511
512
  ) = compute_metrics(
@@ -593,6 +594,16 @@ class Evaluator:
593
594
  )
594
595
  ]
595
596
 
597
+ metrics[MetricType.Accuracy] = [
598
+ Accuracy(
599
+ value=float(accuracy[iou_idx, score_idx]),
600
+ iou_threshold=iou_thresholds[iou_idx],
601
+ score_threshold=score_thresholds[score_idx],
602
+ )
603
+ for iou_idx in range(accuracy.shape[0])
604
+ for score_idx in range(accuracy.shape[1])
605
+ ]
606
+
596
607
  metrics[MetricType.PrecisionRecallCurve] = [
597
608
  PrecisionRecallCurve(
598
609
  precisions=pr_curves[iou_idx, label_idx, :, 0]
@@ -650,12 +661,6 @@ class Evaluator:
650
661
  **kwargs,
651
662
  )
652
663
  )
653
- metrics[MetricType.Accuracy].append(
654
- Accuracy(
655
- value=float(row[6]),
656
- **kwargs,
657
- )
658
- )
659
664
 
660
665
  if as_dict:
661
666
  return {
@@ -160,9 +160,9 @@ class Recall(_ClassMetric):
160
160
  pass
161
161
 
162
162
 
163
- class Accuracy(_ClassMetric):
163
+ class F1(_ClassMetric):
164
164
  """
165
- Accuracy metric for a specific class label in object detection.
165
+ F1 score for a specific class label in object detection.
166
166
 
167
167
  This class encapsulates a metric value for a particular class label,
168
168
  along with the associated Intersection over Union (IoU) threshold and
@@ -190,20 +190,18 @@ class Accuracy(_ClassMetric):
190
190
  pass
191
191
 
192
192
 
193
- class F1(_ClassMetric):
193
+ @dataclass
194
+ class Accuracy:
194
195
  """
195
- F1 score for a specific class label in object detection.
196
+ Accuracy metric for the object detection task type.
196
197
 
197
- This class encapsulates a metric value for a particular class label,
198
- along with the associated Intersection over Union (IoU) threshold and
199
- confidence score threshold.
198
+ This class encapsulates a metric value at a specific Intersection
199
+ over Union (IoU) threshold and confidence score threshold.
200
200
 
201
201
  Attributes
202
202
  ----------
203
203
  value : float
204
204
  The metric value.
205
- label : str
206
- The class label for which the metric is calculated.
207
205
  iou_threshold : float
208
206
  The IoU threshold used to determine matches between predicted and ground truth boxes.
209
207
  score_threshold : float
@@ -217,7 +215,22 @@ class F1(_ClassMetric):
217
215
  Converts the instance to a dictionary representation.
218
216
  """
219
217
 
220
- pass
218
+ value: float
219
+ iou_threshold: float
220
+ score_threshold: float
221
+
222
+ def to_metric(self) -> Metric:
223
+ return Metric(
224
+ type=type(self).__name__,
225
+ value=self.value,
226
+ parameters={
227
+ "iou_threshold": self.iou_threshold,
228
+ "score_threshold": self.score_threshold,
229
+ },
230
+ )
231
+
232
+ def to_dict(self) -> dict:
233
+ return self.to_metric().to_dict()
221
234
 
222
235
 
223
236
  @dataclass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.11
3
+ Version: 0.33.13
4
4
  Summary: Compute valor metrics locally.
5
5
  License: MIT License
6
6
 
@@ -24,6 +24,7 @@ tests/classification/test_schemas.py
24
24
  tests/classification/test_stability.py
25
25
  tests/object_detection/__init__.py
26
26
  tests/object_detection/conftest.py
27
+ tests/object_detection/test_accuracy.py
27
28
  tests/object_detection/test_average_precision.py
28
29
  tests/object_detection/test_average_recall.py
29
30
  tests/object_detection/test_confusion_matrix.py
File without changes
File without changes
File without changes