valor-lite 0.33.12__py3-none-any.whl → 0.33.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/__init__.py +8 -21
- valor_lite/classification/computation.py +2 -2
- valor_lite/classification/manager.py +32 -244
- valor_lite/classification/metric.py +331 -372
- valor_lite/classification/utilities.py +222 -0
- valor_lite/object_detection/__init__.py +4 -35
- valor_lite/object_detection/computation.py +41 -37
- valor_lite/object_detection/manager.py +38 -492
- valor_lite/object_detection/metric.py +636 -696
- valor_lite/object_detection/utilities.py +505 -0
- valor_lite/schemas.py +10 -8
- valor_lite/semantic_segmentation/__init__.py +2 -17
- valor_lite/semantic_segmentation/computation.py +1 -1
- valor_lite/semantic_segmentation/manager.py +13 -116
- valor_lite/semantic_segmentation/metric.py +216 -239
- valor_lite/semantic_segmentation/utilities.py +104 -0
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/METADATA +1 -1
- valor_lite-0.33.14.dist-info/RECORD +27 -0
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/WHEEL +1 -1
- valor_lite-0.33.12.dist-info/RECORD +0 -24
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
1
|
from enum import Enum
|
|
3
2
|
|
|
4
|
-
from valor_lite.schemas import
|
|
3
|
+
from valor_lite.schemas import BaseMetric
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class MetricType(str, Enum):
|
|
@@ -22,774 +21,715 @@ class MetricType(str, Enum):
|
|
|
22
21
|
ConfusionMatrix = "ConfusionMatrix"
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
class Counts:
|
|
24
|
+
class Metric(BaseMetric):
|
|
27
25
|
"""
|
|
28
|
-
|
|
29
|
-
and false negatives (`fn`) for object detection evaluation, along with the associated
|
|
30
|
-
class label, Intersection over Union (IoU) threshold, and confidence score threshold.
|
|
26
|
+
Object Detection Metric.
|
|
31
27
|
|
|
32
28
|
Attributes
|
|
33
29
|
----------
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
label : str
|
|
41
|
-
The class label for which the counts are calculated.
|
|
42
|
-
iou_threshold : float
|
|
43
|
-
The IoU threshold used to determine a match between predicted and ground truth boxes.
|
|
44
|
-
score_threshold : float
|
|
45
|
-
The confidence score threshold above which predictions are considered.
|
|
46
|
-
|
|
47
|
-
Methods
|
|
48
|
-
-------
|
|
49
|
-
to_metric()
|
|
50
|
-
Converts the instance to a generic `Metric` object.
|
|
51
|
-
to_dict()
|
|
52
|
-
Converts the instance to a dictionary representation.
|
|
30
|
+
type : str
|
|
31
|
+
The metric type.
|
|
32
|
+
value : int | float | dict
|
|
33
|
+
The metric value.
|
|
34
|
+
parameters : dict[str, Any]
|
|
35
|
+
A dictionary containing metric parameters.
|
|
53
36
|
"""
|
|
54
37
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
38
|
+
@classmethod
|
|
39
|
+
def precision(
|
|
40
|
+
cls,
|
|
41
|
+
value: float,
|
|
42
|
+
label: str,
|
|
43
|
+
iou_threshold: float,
|
|
44
|
+
score_threshold: float,
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Precision metric for a specific class label in object detection.
|
|
48
|
+
|
|
49
|
+
This class encapsulates a metric value for a particular class label,
|
|
50
|
+
along with the associated Intersection over Union (IOU) threshold and
|
|
51
|
+
confidence score threshold.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
value : float
|
|
56
|
+
The metric value.
|
|
57
|
+
label : str
|
|
58
|
+
The class label for which the metric is calculated.
|
|
59
|
+
iou_threshold : float
|
|
60
|
+
The IOU threshold used to determine matches between predicted and ground truth boxes.
|
|
61
|
+
score_threshold : float
|
|
62
|
+
The confidence score threshold above which predictions are considered.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
Metric
|
|
67
|
+
"""
|
|
68
|
+
return cls(
|
|
69
|
+
type=MetricType.Precision.value,
|
|
70
|
+
value=value,
|
|
70
71
|
parameters={
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
72
|
+
"label": label,
|
|
73
|
+
"iou_threshold": iou_threshold,
|
|
74
|
+
"score_threshold": score_threshold,
|
|
74
75
|
},
|
|
75
76
|
)
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
78
|
+
@classmethod
|
|
79
|
+
def recall(
|
|
80
|
+
cls,
|
|
81
|
+
value: float,
|
|
82
|
+
label: str,
|
|
83
|
+
iou_threshold: float,
|
|
84
|
+
score_threshold: float,
|
|
85
|
+
):
|
|
86
|
+
"""
|
|
87
|
+
Recall metric for a specific class label in object detection.
|
|
88
|
+
|
|
89
|
+
This class encapsulates a metric value for a particular class label,
|
|
90
|
+
along with the associated Intersection over Union (IOU) threshold and
|
|
91
|
+
confidence score threshold.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
value : float
|
|
96
|
+
The metric value.
|
|
97
|
+
label : str
|
|
98
|
+
The class label for which the metric is calculated.
|
|
99
|
+
iou_threshold : float
|
|
100
|
+
The IOU threshold used to determine matches between predicted and ground truth boxes.
|
|
101
|
+
score_threshold : float
|
|
102
|
+
The confidence score threshold above which predictions are considered.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
Metric
|
|
107
|
+
"""
|
|
108
|
+
return cls(
|
|
109
|
+
type=MetricType.Recall.value,
|
|
110
|
+
value=value,
|
|
92
111
|
parameters={
|
|
93
|
-
"
|
|
94
|
-
"
|
|
95
|
-
"
|
|
112
|
+
"label": label,
|
|
113
|
+
"iou_threshold": iou_threshold,
|
|
114
|
+
"score_threshold": score_threshold,
|
|
96
115
|
},
|
|
97
116
|
)
|
|
98
117
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
class Recall(_ClassMetric):
|
|
134
|
-
"""
|
|
135
|
-
Recall metric for a specific class label in object detection.
|
|
136
|
-
|
|
137
|
-
This class encapsulates a metric value for a particular class label,
|
|
138
|
-
along with the associated Intersection over Union (IoU) threshold and
|
|
139
|
-
confidence score threshold.
|
|
140
|
-
|
|
141
|
-
Attributes
|
|
142
|
-
----------
|
|
143
|
-
value : float
|
|
144
|
-
The metric value.
|
|
145
|
-
label : str
|
|
146
|
-
The class label for which the metric is calculated.
|
|
147
|
-
iou_threshold : float
|
|
148
|
-
The IoU threshold used to determine matches between predicted and ground truth boxes.
|
|
149
|
-
score_threshold : float
|
|
150
|
-
The confidence score threshold above which predictions are considered.
|
|
151
|
-
|
|
152
|
-
Methods
|
|
153
|
-
-------
|
|
154
|
-
to_metric()
|
|
155
|
-
Converts the instance to a generic `Metric` object.
|
|
156
|
-
to_dict()
|
|
157
|
-
Converts the instance to a dictionary representation.
|
|
158
|
-
"""
|
|
159
|
-
|
|
160
|
-
pass
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
class Accuracy(_ClassMetric):
|
|
164
|
-
"""
|
|
165
|
-
Accuracy metric for a specific class label in object detection.
|
|
166
|
-
|
|
167
|
-
This class encapsulates a metric value for a particular class label,
|
|
168
|
-
along with the associated Intersection over Union (IoU) threshold and
|
|
169
|
-
confidence score threshold.
|
|
170
|
-
|
|
171
|
-
Attributes
|
|
172
|
-
----------
|
|
173
|
-
value : float
|
|
174
|
-
The metric value.
|
|
175
|
-
label : str
|
|
176
|
-
The class label for which the metric is calculated.
|
|
177
|
-
iou_threshold : float
|
|
178
|
-
The IoU threshold used to determine matches between predicted and ground truth boxes.
|
|
179
|
-
score_threshold : float
|
|
180
|
-
The confidence score threshold above which predictions are considered.
|
|
181
|
-
|
|
182
|
-
Methods
|
|
183
|
-
-------
|
|
184
|
-
to_metric()
|
|
185
|
-
Converts the instance to a generic `Metric` object.
|
|
186
|
-
to_dict()
|
|
187
|
-
Converts the instance to a dictionary representation.
|
|
188
|
-
"""
|
|
189
|
-
|
|
190
|
-
pass
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
class F1(_ClassMetric):
|
|
194
|
-
"""
|
|
195
|
-
F1 score for a specific class label in object detection.
|
|
196
|
-
|
|
197
|
-
This class encapsulates a metric value for a particular class label,
|
|
198
|
-
along with the associated Intersection over Union (IoU) threshold and
|
|
199
|
-
confidence score threshold.
|
|
200
|
-
|
|
201
|
-
Attributes
|
|
202
|
-
----------
|
|
203
|
-
value : float
|
|
204
|
-
The metric value.
|
|
205
|
-
label : str
|
|
206
|
-
The class label for which the metric is calculated.
|
|
207
|
-
iou_threshold : float
|
|
208
|
-
The IoU threshold used to determine matches between predicted and ground truth boxes.
|
|
209
|
-
score_threshold : float
|
|
210
|
-
The confidence score threshold above which predictions are considered.
|
|
211
|
-
|
|
212
|
-
Methods
|
|
213
|
-
-------
|
|
214
|
-
to_metric()
|
|
215
|
-
Converts the instance to a generic `Metric` object.
|
|
216
|
-
to_dict()
|
|
217
|
-
Converts the instance to a dictionary representation.
|
|
218
|
-
"""
|
|
219
|
-
|
|
220
|
-
pass
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
@dataclass
|
|
224
|
-
class AP:
|
|
225
|
-
"""
|
|
226
|
-
Average Precision (AP) metric for object detection tasks.
|
|
227
|
-
|
|
228
|
-
The AP computation uses 101-point interpolation, which calculates the average
|
|
229
|
-
precision by interpolating the precision-recall curve at 101 evenly spaced recall
|
|
230
|
-
levels from 0 to 1.
|
|
231
|
-
|
|
232
|
-
Attributes
|
|
233
|
-
----------
|
|
234
|
-
value : float
|
|
235
|
-
The average precision value.
|
|
236
|
-
iou_threshold : float
|
|
237
|
-
The IoU threshold used to compute the AP.
|
|
238
|
-
label : str
|
|
239
|
-
The class label for which the AP is computed.
|
|
240
|
-
|
|
241
|
-
Methods
|
|
242
|
-
-------
|
|
243
|
-
to_metric()
|
|
244
|
-
Converts the instance to a generic `Metric` object.
|
|
245
|
-
to_dict()
|
|
246
|
-
Converts the instance to a dictionary representation.
|
|
247
|
-
"""
|
|
248
|
-
|
|
249
|
-
value: float
|
|
250
|
-
iou_threshold: float
|
|
251
|
-
label: str
|
|
252
|
-
|
|
253
|
-
def to_metric(self) -> Metric:
|
|
254
|
-
return Metric(
|
|
255
|
-
type=type(self).__name__,
|
|
256
|
-
value=self.value,
|
|
118
|
+
@classmethod
|
|
119
|
+
def f1_score(
|
|
120
|
+
cls,
|
|
121
|
+
value: float,
|
|
122
|
+
label: str,
|
|
123
|
+
iou_threshold: float,
|
|
124
|
+
score_threshold: float,
|
|
125
|
+
):
|
|
126
|
+
"""
|
|
127
|
+
F1 score for a specific class label in object detection.
|
|
128
|
+
|
|
129
|
+
This class encapsulates a metric value for a particular class label,
|
|
130
|
+
along with the associated Intersection over Union (IOU) threshold and
|
|
131
|
+
confidence score threshold.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
value : float
|
|
136
|
+
The metric value.
|
|
137
|
+
label : str
|
|
138
|
+
The class label for which the metric is calculated.
|
|
139
|
+
iou_threshold : float
|
|
140
|
+
The IOU threshold used to determine matches between predicted and ground truth boxes.
|
|
141
|
+
score_threshold : float
|
|
142
|
+
The confidence score threshold above which predictions are considered.
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
Metric
|
|
147
|
+
"""
|
|
148
|
+
return cls(
|
|
149
|
+
type=MetricType.F1.value,
|
|
150
|
+
value=value,
|
|
257
151
|
parameters={
|
|
258
|
-
"
|
|
259
|
-
"
|
|
152
|
+
"label": label,
|
|
153
|
+
"iou_threshold": iou_threshold,
|
|
154
|
+
"score_threshold": score_threshold,
|
|
260
155
|
},
|
|
261
156
|
)
|
|
262
157
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
value: float
|
|
293
|
-
iou_threshold: float
|
|
294
|
-
|
|
295
|
-
def to_metric(self) -> Metric:
|
|
296
|
-
return Metric(
|
|
297
|
-
type=type(self).__name__,
|
|
298
|
-
value=self.value,
|
|
158
|
+
@classmethod
|
|
159
|
+
def accuracy(
|
|
160
|
+
cls,
|
|
161
|
+
value: float,
|
|
162
|
+
iou_threshold: float,
|
|
163
|
+
score_threshold: float,
|
|
164
|
+
):
|
|
165
|
+
"""
|
|
166
|
+
Accuracy metric for the object detection task type.
|
|
167
|
+
|
|
168
|
+
This class encapsulates a metric value at a specific Intersection
|
|
169
|
+
over Union (IOU) threshold and confidence score threshold.
|
|
170
|
+
|
|
171
|
+
Parameters
|
|
172
|
+
----------
|
|
173
|
+
value : float
|
|
174
|
+
The metric value.
|
|
175
|
+
iou_threshold : float
|
|
176
|
+
The IOU threshold used to determine matches between predicted and ground truth boxes.
|
|
177
|
+
score_threshold : float
|
|
178
|
+
The confidence score threshold above which predictions are considered.
|
|
179
|
+
|
|
180
|
+
Returns
|
|
181
|
+
-------
|
|
182
|
+
Metric
|
|
183
|
+
"""
|
|
184
|
+
return cls(
|
|
185
|
+
type=MetricType.Accuracy.value,
|
|
186
|
+
value=value,
|
|
299
187
|
parameters={
|
|
300
|
-
"iou_threshold":
|
|
188
|
+
"iou_threshold": iou_threshold,
|
|
189
|
+
"score_threshold": score_threshold,
|
|
301
190
|
},
|
|
302
191
|
)
|
|
303
192
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
value: float
|
|
336
|
-
iou_thresholds: list[float]
|
|
337
|
-
label: str
|
|
338
|
-
|
|
339
|
-
def to_metric(self) -> Metric:
|
|
340
|
-
return Metric(
|
|
341
|
-
type=type(self).__name__,
|
|
342
|
-
value=self.value,
|
|
193
|
+
@classmethod
|
|
194
|
+
def average_precision(
|
|
195
|
+
cls,
|
|
196
|
+
value: float,
|
|
197
|
+
iou_threshold: float,
|
|
198
|
+
label: str,
|
|
199
|
+
):
|
|
200
|
+
"""
|
|
201
|
+
Average Precision (AP) metric for object detection tasks.
|
|
202
|
+
|
|
203
|
+
The AP computation uses 101-point interpolation, which calculates the average
|
|
204
|
+
precision by interpolating the precision-recall curve at 101 evenly spaced recall
|
|
205
|
+
levels from 0 to 1.
|
|
206
|
+
|
|
207
|
+
Parameters
|
|
208
|
+
----------
|
|
209
|
+
value : float
|
|
210
|
+
The average precision value.
|
|
211
|
+
iou_threshold : float
|
|
212
|
+
The IOU threshold used to compute the AP.
|
|
213
|
+
label : str
|
|
214
|
+
The class label for which the AP is computed.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
Metric
|
|
219
|
+
"""
|
|
220
|
+
return cls(
|
|
221
|
+
type=MetricType.AP.value,
|
|
222
|
+
value=value,
|
|
343
223
|
parameters={
|
|
344
|
-
"
|
|
345
|
-
"label":
|
|
224
|
+
"iou_threshold": iou_threshold,
|
|
225
|
+
"label": label,
|
|
346
226
|
},
|
|
347
227
|
)
|
|
348
228
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
value: float
|
|
379
|
-
iou_thresholds: list[float]
|
|
380
|
-
|
|
381
|
-
def to_metric(self) -> Metric:
|
|
382
|
-
return Metric(
|
|
383
|
-
type=type(self).__name__,
|
|
384
|
-
value=self.value,
|
|
229
|
+
@classmethod
|
|
230
|
+
def mean_average_precision(
|
|
231
|
+
cls,
|
|
232
|
+
value: float,
|
|
233
|
+
iou_threshold: float,
|
|
234
|
+
):
|
|
235
|
+
"""
|
|
236
|
+
Mean Average Precision (mAP) metric for object detection tasks.
|
|
237
|
+
|
|
238
|
+
The AP computation uses 101-point interpolation, which calculates the average
|
|
239
|
+
precision for each class by interpolating the precision-recall curve at 101 evenly
|
|
240
|
+
spaced recall levels from 0 to 1. The mAP is then calculated by averaging these
|
|
241
|
+
values across all class labels.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
value : float
|
|
246
|
+
The mean average precision value.
|
|
247
|
+
iou_threshold : float
|
|
248
|
+
The IOU threshold used to compute the mAP.
|
|
249
|
+
|
|
250
|
+
Returns
|
|
251
|
+
-------
|
|
252
|
+
Metric
|
|
253
|
+
"""
|
|
254
|
+
return cls(
|
|
255
|
+
type=MetricType.mAP.value,
|
|
256
|
+
value=value,
|
|
385
257
|
parameters={
|
|
386
|
-
"
|
|
258
|
+
"iou_threshold": iou_threshold,
|
|
387
259
|
},
|
|
388
260
|
)
|
|
389
261
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
Converts the instance to a dictionary representation.
|
|
422
|
-
"""
|
|
423
|
-
|
|
424
|
-
value: float
|
|
425
|
-
score_threshold: float
|
|
426
|
-
iou_thresholds: list[float]
|
|
427
|
-
label: str
|
|
428
|
-
|
|
429
|
-
def to_metric(self) -> Metric:
|
|
430
|
-
return Metric(
|
|
431
|
-
type=type(self).__name__,
|
|
432
|
-
value=self.value,
|
|
262
|
+
@classmethod
|
|
263
|
+
def average_precision_averaged_over_IOUs(
|
|
264
|
+
cls,
|
|
265
|
+
value: float,
|
|
266
|
+
iou_thresholds: list[float],
|
|
267
|
+
label: str,
|
|
268
|
+
):
|
|
269
|
+
"""
|
|
270
|
+
Average Precision (AP) metric averaged over multiple IOU thresholds.
|
|
271
|
+
|
|
272
|
+
The AP computation uses 101-point interpolation, which calculates the average precision
|
|
273
|
+
by interpolating the precision-recall curve at 101 evenly spaced recall levels from 0 to 1
|
|
274
|
+
for each IOU threshold specified in `iou_thresholds`. The final APAveragedOverIOUs value is
|
|
275
|
+
obtained by averaging these AP values across all specified IOU thresholds.
|
|
276
|
+
|
|
277
|
+
Parameters
|
|
278
|
+
----------
|
|
279
|
+
value : float
|
|
280
|
+
The average precision value averaged over the specified IOU thresholds.
|
|
281
|
+
iou_thresholds : list[float]
|
|
282
|
+
The list of IOU thresholds used to compute the AP values.
|
|
283
|
+
label : str
|
|
284
|
+
The class label for which the AP is computed.
|
|
285
|
+
|
|
286
|
+
Returns
|
|
287
|
+
-------
|
|
288
|
+
Metric
|
|
289
|
+
"""
|
|
290
|
+
return cls(
|
|
291
|
+
type=MetricType.APAveragedOverIOUs.value,
|
|
292
|
+
value=value,
|
|
433
293
|
parameters={
|
|
434
|
-
"
|
|
435
|
-
"
|
|
436
|
-
"label": self.label,
|
|
294
|
+
"iou_thresholds": iou_thresholds,
|
|
295
|
+
"label": label,
|
|
437
296
|
},
|
|
438
297
|
)
|
|
439
298
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
to_dict()
|
|
469
|
-
Converts the instance to a dictionary representation.
|
|
470
|
-
"""
|
|
471
|
-
|
|
472
|
-
value: float
|
|
473
|
-
score_threshold: float
|
|
474
|
-
iou_thresholds: list[float]
|
|
475
|
-
|
|
476
|
-
def to_metric(self) -> Metric:
|
|
477
|
-
return Metric(
|
|
478
|
-
type=type(self).__name__,
|
|
479
|
-
value=self.value,
|
|
299
|
+
@classmethod
|
|
300
|
+
def mean_average_precision_averaged_over_IOUs(
|
|
301
|
+
cls,
|
|
302
|
+
value: float,
|
|
303
|
+
iou_thresholds: list[float],
|
|
304
|
+
):
|
|
305
|
+
"""
|
|
306
|
+
Mean Average Precision (mAP) metric averaged over multiple IOU thresholds.
|
|
307
|
+
|
|
308
|
+
The AP computation uses 101-point interpolation, which calculates the average precision
|
|
309
|
+
by interpolating the precision-recall curve at 101 evenly spaced recall levels from 0 to 1
|
|
310
|
+
for each IOU threshold specified in `iou_thresholds`. The final mAPAveragedOverIOUs value is
|
|
311
|
+
obtained by averaging these AP values across all specified IOU thresholds and all class labels.
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
value : float
|
|
316
|
+
The average precision value averaged over the specified IOU thresholds.
|
|
317
|
+
iou_thresholds : list[float]
|
|
318
|
+
The list of IOU thresholds used to compute the AP values.
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
Metric
|
|
323
|
+
"""
|
|
324
|
+
return cls(
|
|
325
|
+
type=MetricType.mAPAveragedOverIOUs.value,
|
|
326
|
+
value=value,
|
|
480
327
|
parameters={
|
|
481
|
-
"
|
|
482
|
-
"iou_thresholds": self.iou_thresholds,
|
|
328
|
+
"iou_thresholds": iou_thresholds,
|
|
483
329
|
},
|
|
484
330
|
)
|
|
485
331
|
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
label: str
|
|
522
|
-
|
|
523
|
-
def to_metric(self) -> Metric:
|
|
524
|
-
return Metric(
|
|
525
|
-
type=type(self).__name__,
|
|
526
|
-
value=self.value,
|
|
332
|
+
@classmethod
|
|
333
|
+
def average_recall(
|
|
334
|
+
cls,
|
|
335
|
+
value: float,
|
|
336
|
+
score_threshold: float,
|
|
337
|
+
iou_thresholds: list[float],
|
|
338
|
+
label: str,
|
|
339
|
+
):
|
|
340
|
+
"""
|
|
341
|
+
Average Recall (AR) metric for object detection tasks.
|
|
342
|
+
|
|
343
|
+
The AR computation considers detections with confidence scores above the specified
|
|
344
|
+
`score_threshold` and calculates the recall at each IOU threshold in `iou_thresholds`.
|
|
345
|
+
The final AR value is the average of these recall values across all specified IOU
|
|
346
|
+
thresholds.
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
value : float
|
|
351
|
+
The average recall value averaged over the specified IOU thresholds.
|
|
352
|
+
score_threshold : float
|
|
353
|
+
The detection score threshold; only detections with confidence scores above this
|
|
354
|
+
threshold are considered.
|
|
355
|
+
iou_thresholds : list[float]
|
|
356
|
+
The list of IOU thresholds used to compute the recall values.
|
|
357
|
+
label : str
|
|
358
|
+
The class label for which the AR is computed.
|
|
359
|
+
|
|
360
|
+
Returns
|
|
361
|
+
-------
|
|
362
|
+
Metric
|
|
363
|
+
"""
|
|
364
|
+
return cls(
|
|
365
|
+
type=MetricType.AR.value,
|
|
366
|
+
value=value,
|
|
527
367
|
parameters={
|
|
528
|
-
"
|
|
529
|
-
"
|
|
530
|
-
"label":
|
|
368
|
+
"iou_thresholds": iou_thresholds,
|
|
369
|
+
"score_threshold": score_threshold,
|
|
370
|
+
"label": label,
|
|
531
371
|
},
|
|
532
372
|
)
|
|
533
373
|
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
score_thresholds: list[float]
|
|
567
|
-
iou_thresholds: list[float]
|
|
568
|
-
|
|
569
|
-
def to_metric(self) -> Metric:
|
|
570
|
-
return Metric(
|
|
571
|
-
type=type(self).__name__,
|
|
572
|
-
value=self.value,
|
|
374
|
+
@classmethod
|
|
375
|
+
def mean_average_recall(
|
|
376
|
+
cls,
|
|
377
|
+
value: float,
|
|
378
|
+
score_threshold: float,
|
|
379
|
+
iou_thresholds: list[float],
|
|
380
|
+
):
|
|
381
|
+
"""
|
|
382
|
+
Mean Average Recall (mAR) metric for object detection tasks.
|
|
383
|
+
|
|
384
|
+
The mAR computation considers detections with confidence scores above the specified
|
|
385
|
+
`score_threshold` and calculates recall at each IOU threshold in `iou_thresholds` for
|
|
386
|
+
each label. The final mAR value is obtained by averaging these recall values over the
|
|
387
|
+
specified IOU thresholds and then averaging across all labels.
|
|
388
|
+
|
|
389
|
+
Parameters
|
|
390
|
+
----------
|
|
391
|
+
value : float
|
|
392
|
+
The mean average recall value averaged over the specified IOU thresholds.
|
|
393
|
+
score_threshold : float
|
|
394
|
+
The detection score threshold; only detections with confidence scores above this
|
|
395
|
+
threshold are considered.
|
|
396
|
+
iou_thresholds : list[float]
|
|
397
|
+
The list of IOU thresholds used to compute the recall values.
|
|
398
|
+
|
|
399
|
+
Returns
|
|
400
|
+
-------
|
|
401
|
+
Metric
|
|
402
|
+
"""
|
|
403
|
+
return cls(
|
|
404
|
+
type=MetricType.mAR.value,
|
|
405
|
+
value=value,
|
|
573
406
|
parameters={
|
|
574
|
-
"
|
|
575
|
-
"
|
|
407
|
+
"iou_thresholds": iou_thresholds,
|
|
408
|
+
"score_threshold": score_threshold,
|
|
576
409
|
},
|
|
577
410
|
)
|
|
578
411
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
412
|
+
@classmethod
|
|
413
|
+
def average_recall_averaged_over_scores(
|
|
414
|
+
cls,
|
|
415
|
+
value: float,
|
|
416
|
+
score_thresholds: list[float],
|
|
417
|
+
iou_thresholds: list[float],
|
|
418
|
+
label: str,
|
|
419
|
+
):
|
|
420
|
+
"""
|
|
421
|
+
Average Recall (AR) metric averaged over multiple score thresholds for a specific object class label.
|
|
422
|
+
|
|
423
|
+
The AR computation considers detections across multiple `score_thresholds` and calculates
|
|
424
|
+
recall at each IOU threshold in `iou_thresholds`. The final AR value is obtained by averaging
|
|
425
|
+
the recall values over all specified score thresholds and IOU thresholds.
|
|
426
|
+
|
|
427
|
+
Parameters
|
|
428
|
+
----------
|
|
429
|
+
value : float
|
|
430
|
+
The average recall value averaged over the specified score thresholds and IOU thresholds.
|
|
431
|
+
score_thresholds : list[float]
|
|
432
|
+
The list of detection score thresholds; detections with confidence scores above each threshold are considered.
|
|
433
|
+
iou_thresholds : list[float]
|
|
434
|
+
The list of IOU thresholds used to compute the recall values.
|
|
435
|
+
label : str
|
|
436
|
+
The class label for which the AR is computed.
|
|
437
|
+
|
|
438
|
+
Returns
|
|
439
|
+
-------
|
|
440
|
+
Metric
|
|
441
|
+
"""
|
|
442
|
+
return cls(
|
|
443
|
+
type=MetricType.ARAveragedOverScores.value,
|
|
444
|
+
value=value,
|
|
445
|
+
parameters={
|
|
446
|
+
"iou_thresholds": iou_thresholds,
|
|
447
|
+
"score_thresholds": score_thresholds,
|
|
448
|
+
"label": label,
|
|
449
|
+
},
|
|
450
|
+
)
|
|
610
451
|
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
452
|
+
@classmethod
|
|
453
|
+
def mean_average_recall_averaged_over_scores(
|
|
454
|
+
cls,
|
|
455
|
+
value: float,
|
|
456
|
+
score_thresholds: list[float],
|
|
457
|
+
iou_thresholds: list[float],
|
|
458
|
+
):
|
|
459
|
+
"""
|
|
460
|
+
Mean Average Recall (mAR) metric averaged over multiple score thresholds and IOU thresholds.
|
|
461
|
+
|
|
462
|
+
The mAR computation considers detections across multiple `score_thresholds`, calculates recall
|
|
463
|
+
at each IOU threshold in `iou_thresholds` for each label, averages these recall values over all
|
|
464
|
+
specified score thresholds and IOU thresholds, and then computes the mean across all labels to
|
|
465
|
+
obtain the final mAR value.
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
value : float
|
|
470
|
+
The mean average recall value averaged over the specified score thresholds and IOU thresholds.
|
|
471
|
+
score_thresholds : list[float]
|
|
472
|
+
The list of detection score thresholds; detections with confidence scores above each threshold are considered.
|
|
473
|
+
iou_thresholds : list[float]
|
|
474
|
+
The list of IOU thresholds used to compute the recall values.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
Metric
|
|
479
|
+
"""
|
|
480
|
+
return cls(
|
|
481
|
+
type=MetricType.mARAveragedOverScores.value,
|
|
482
|
+
value=value,
|
|
483
|
+
parameters={
|
|
484
|
+
"iou_thresholds": iou_thresholds,
|
|
485
|
+
"score_thresholds": score_thresholds,
|
|
486
|
+
},
|
|
487
|
+
)
|
|
615
488
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
489
|
+
@classmethod
|
|
490
|
+
def precision_recall_curve(
|
|
491
|
+
cls,
|
|
492
|
+
precisions: list[float],
|
|
493
|
+
scores: list[float],
|
|
494
|
+
iou_threshold: float,
|
|
495
|
+
label: str,
|
|
496
|
+
):
|
|
497
|
+
"""
|
|
498
|
+
Interpolated precision-recall curve over 101 recall points.
|
|
499
|
+
|
|
500
|
+
The precision values are interpolated over recalls ranging from 0.0 to 1.0 in steps of 0.01,
|
|
501
|
+
resulting in 101 points. This is a byproduct of the 101-point interpolation used in calculating
|
|
502
|
+
the Average Precision (AP) metric in object detection tasks.
|
|
503
|
+
|
|
504
|
+
Parameters
|
|
505
|
+
----------
|
|
506
|
+
precisions : list[float]
|
|
507
|
+
Interpolated precision values corresponding to recalls at 0.0, 0.01, ..., 1.0.
|
|
508
|
+
scores : list[float]
|
|
509
|
+
Maximum prediction score for each point on the interpolated curve.
|
|
510
|
+
iou_threshold : float
|
|
511
|
+
The Intersection over Union (IOU) threshold used to determine true positives.
|
|
512
|
+
label : str
|
|
513
|
+
The class label associated with this precision-recall curve.
|
|
514
|
+
|
|
515
|
+
Returns
|
|
516
|
+
-------
|
|
517
|
+
Metric
|
|
518
|
+
"""
|
|
519
|
+
return cls(
|
|
520
|
+
type=MetricType.PrecisionRecallCurve.value,
|
|
619
521
|
value={
|
|
620
|
-
"precisions":
|
|
621
|
-
"scores":
|
|
522
|
+
"precisions": precisions,
|
|
523
|
+
"scores": scores,
|
|
622
524
|
},
|
|
623
525
|
parameters={
|
|
624
|
-
"iou_threshold":
|
|
625
|
-
"label":
|
|
526
|
+
"iou_threshold": iou_threshold,
|
|
527
|
+
"label": label,
|
|
626
528
|
},
|
|
627
529
|
)
|
|
628
530
|
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
531
|
+
@classmethod
|
|
532
|
+
def counts(
|
|
533
|
+
cls,
|
|
534
|
+
tp: int,
|
|
535
|
+
fp: int,
|
|
536
|
+
fn: int,
|
|
537
|
+
label: str,
|
|
538
|
+
iou_threshold: float,
|
|
539
|
+
score_threshold: float,
|
|
540
|
+
):
|
|
541
|
+
"""
|
|
542
|
+
`Counts` encapsulates the counts of true positives (`tp`), false positives (`fp`),
|
|
543
|
+
and false negatives (`fn`) for object detection evaluation, along with the associated
|
|
544
|
+
class label, Intersection over Union (IOU) threshold, and confidence score threshold.
|
|
545
|
+
|
|
546
|
+
Parameters
|
|
547
|
+
----------
|
|
548
|
+
tp : int
|
|
549
|
+
Number of true positives.
|
|
550
|
+
fp : int
|
|
551
|
+
Number of false positives.
|
|
552
|
+
fn : int
|
|
553
|
+
Number of false negatives.
|
|
554
|
+
label : str
|
|
555
|
+
The class label for which the counts are calculated.
|
|
556
|
+
iou_threshold : float
|
|
557
|
+
The IOU threshold used to determine a match between predicted and ground truth boxes.
|
|
558
|
+
score_threshold : float
|
|
559
|
+
The confidence score threshold above which predictions are considered.
|
|
560
|
+
|
|
561
|
+
Returns
|
|
562
|
+
-------
|
|
563
|
+
Metric
|
|
564
|
+
"""
|
|
565
|
+
return cls(
|
|
566
|
+
type=MetricType.Counts.value,
|
|
567
|
+
value={
|
|
568
|
+
"tp": tp,
|
|
569
|
+
"fp": fp,
|
|
570
|
+
"fn": fn,
|
|
571
|
+
},
|
|
572
|
+
parameters={
|
|
573
|
+
"iou_threshold": iou_threshold,
|
|
574
|
+
"score_threshold": score_threshold,
|
|
575
|
+
"label": label,
|
|
576
|
+
},
|
|
577
|
+
)
|
|
641
578
|
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
579
|
+
@classmethod
|
|
580
|
+
def confusion_matrix(
|
|
581
|
+
cls,
|
|
582
|
+
confusion_matrix: dict[
|
|
583
|
+
str, # ground truth label value
|
|
584
|
+
dict[
|
|
585
|
+
str, # prediction label value
|
|
586
|
+
dict[
|
|
587
|
+
str, # either `count` or `examples`
|
|
588
|
+
int
|
|
589
|
+
| list[
|
|
590
|
+
dict[
|
|
591
|
+
str, # either `datum`, `groundtruth`, `prediction` or score
|
|
592
|
+
str # datum uid
|
|
593
|
+
| dict[
|
|
594
|
+
str, float
|
|
595
|
+
] # bounding box (xmin, xmax, ymin, ymax)
|
|
596
|
+
| float, # prediction score
|
|
597
|
+
]
|
|
598
|
+
],
|
|
655
599
|
],
|
|
656
|
-
},
|
|
657
|
-
...
|
|
658
|
-
},
|
|
659
|
-
...
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
Hallucinations Format:
|
|
663
|
-
{
|
|
664
|
-
<prediction label>: {
|
|
665
|
-
'count': int,
|
|
666
|
-
'examples': [
|
|
667
|
-
{
|
|
668
|
-
'datum': str,
|
|
669
|
-
'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
670
|
-
'score': float,
|
|
671
|
-
},
|
|
672
|
-
...
|
|
673
600
|
],
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
Missing Prediction Format:
|
|
679
|
-
{
|
|
680
|
-
<ground truth label>: {
|
|
681
|
-
'count': int,
|
|
682
|
-
'examples': [
|
|
683
|
-
{
|
|
684
|
-
'datum': str,
|
|
685
|
-
'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
686
|
-
},
|
|
687
|
-
...
|
|
688
|
-
],
|
|
689
|
-
},
|
|
690
|
-
...
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
Attributes
|
|
694
|
-
----------
|
|
695
|
-
confusion_matrix : dict
|
|
696
|
-
A nested dictionary where the first key is the ground truth label value, the second key
|
|
697
|
-
is the prediction label value, and the innermost dictionary contains either a `count`
|
|
698
|
-
or a list of `examples`. Each example includes the datum UID, ground truth bounding box,
|
|
699
|
-
predicted bounding box, and prediction scores.
|
|
700
|
-
hallucinations : dict
|
|
701
|
-
A dictionary where each key is a prediction label value with no corresponding ground truth
|
|
702
|
-
(false positives). The value is a dictionary containing either a `count` or a list of
|
|
703
|
-
`examples`. Each example includes the datum UID, predicted bounding box, and prediction score.
|
|
704
|
-
missing_predictions : dict
|
|
705
|
-
A dictionary where each key is a ground truth label value for which the model failed to predict
|
|
706
|
-
(false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
|
|
707
|
-
Each example includes the datum UID and ground truth bounding box.
|
|
708
|
-
score_threshold : float
|
|
709
|
-
The confidence score threshold used to filter predictions.
|
|
710
|
-
iou_threshold : float
|
|
711
|
-
The Intersection over Union (IoU) threshold used to determine true positives.
|
|
712
|
-
number_of_examples : int
|
|
713
|
-
The maximum number of examples per element.
|
|
714
|
-
|
|
715
|
-
Methods
|
|
716
|
-
-------
|
|
717
|
-
to_metric()
|
|
718
|
-
Converts the instance to a generic `Metric` object.
|
|
719
|
-
to_dict()
|
|
720
|
-
Converts the instance to a dictionary representation.
|
|
721
|
-
"""
|
|
722
|
-
|
|
723
|
-
confusion_matrix: dict[
|
|
724
|
-
str, # ground truth label value
|
|
725
|
-
dict[
|
|
601
|
+
],
|
|
602
|
+
hallucinations: dict[
|
|
726
603
|
str, # prediction label value
|
|
727
604
|
dict[
|
|
728
605
|
str, # either `count` or `examples`
|
|
729
606
|
int
|
|
730
607
|
| list[
|
|
731
608
|
dict[
|
|
732
|
-
str, # either `datum`, `
|
|
609
|
+
str, # either `datum`, `prediction` or score
|
|
733
610
|
str # datum uid
|
|
611
|
+
| float # prediction score
|
|
734
612
|
| dict[
|
|
735
613
|
str, float
|
|
736
|
-
] # bounding box (xmin, xmax, ymin, ymax)
|
|
737
|
-
| float, # prediction score
|
|
614
|
+
], # bounding box (xmin, xmax, ymin, ymax)
|
|
738
615
|
]
|
|
739
616
|
],
|
|
740
617
|
],
|
|
741
618
|
],
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
]
|
|
757
|
-
],
|
|
758
|
-
],
|
|
759
|
-
]
|
|
760
|
-
missing_predictions: dict[
|
|
761
|
-
str, # ground truth label value
|
|
762
|
-
dict[
|
|
763
|
-
str, # either `count` or `examples`
|
|
764
|
-
int
|
|
765
|
-
| list[
|
|
766
|
-
dict[
|
|
767
|
-
str, # either `datum` or `groundtruth`
|
|
768
|
-
str # datum uid
|
|
769
|
-
| dict[
|
|
770
|
-
str, float
|
|
771
|
-
], # bounding box (xmin, xmax, ymin, ymax)
|
|
772
|
-
]
|
|
619
|
+
missing_predictions: dict[
|
|
620
|
+
str, # ground truth label value
|
|
621
|
+
dict[
|
|
622
|
+
str, # either `count` or `examples`
|
|
623
|
+
int
|
|
624
|
+
| list[
|
|
625
|
+
dict[
|
|
626
|
+
str, # either `datum` or `groundtruth`
|
|
627
|
+
str # datum uid
|
|
628
|
+
| dict[
|
|
629
|
+
str, float
|
|
630
|
+
], # bounding box (xmin, xmax, ymin, ymax)
|
|
631
|
+
]
|
|
632
|
+
],
|
|
773
633
|
],
|
|
774
634
|
],
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
635
|
+
score_threshold: float,
|
|
636
|
+
iou_threshold: float,
|
|
637
|
+
maximum_number_of_examples: int,
|
|
638
|
+
):
|
|
639
|
+
"""
|
|
640
|
+
Confusion matrix for object detection tasks.
|
|
641
|
+
|
|
642
|
+
This class encapsulates detailed information about the model's performance, including correct
|
|
643
|
+
predictions, misclassifications, hallucinations (false positives), and missing predictions
|
|
644
|
+
(false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
|
|
645
|
+
|
|
646
|
+
Confusion Matrix Format:
|
|
647
|
+
{
|
|
648
|
+
<ground truth label>: {
|
|
649
|
+
<prediction label>: {
|
|
650
|
+
'count': int,
|
|
651
|
+
'examples': [
|
|
652
|
+
{
|
|
653
|
+
'datum': str,
|
|
654
|
+
'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
655
|
+
'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
656
|
+
'score': float,
|
|
657
|
+
},
|
|
658
|
+
...
|
|
659
|
+
],
|
|
660
|
+
},
|
|
661
|
+
...
|
|
662
|
+
},
|
|
663
|
+
...
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
Hallucinations Format:
|
|
667
|
+
{
|
|
668
|
+
<prediction label>: {
|
|
669
|
+
'count': int,
|
|
670
|
+
'examples': [
|
|
671
|
+
{
|
|
672
|
+
'datum': str,
|
|
673
|
+
'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
674
|
+
'score': float,
|
|
675
|
+
},
|
|
676
|
+
...
|
|
677
|
+
],
|
|
678
|
+
},
|
|
679
|
+
...
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
Missing Prediction Format:
|
|
683
|
+
{
|
|
684
|
+
<ground truth label>: {
|
|
685
|
+
'count': int,
|
|
686
|
+
'examples': [
|
|
687
|
+
{
|
|
688
|
+
'datum': str,
|
|
689
|
+
'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
690
|
+
},
|
|
691
|
+
...
|
|
692
|
+
],
|
|
693
|
+
},
|
|
694
|
+
...
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
Parameters
|
|
698
|
+
----------
|
|
699
|
+
confusion_matrix : dict
|
|
700
|
+
A nested dictionary where the first key is the ground truth label value, the second key
|
|
701
|
+
is the prediction label value, and the innermost dictionary contains either a `count`
|
|
702
|
+
or a list of `examples`. Each example includes the datum UID, ground truth bounding box,
|
|
703
|
+
predicted bounding box, and prediction scores.
|
|
704
|
+
hallucinations : dict
|
|
705
|
+
A dictionary where each key is a prediction label value with no corresponding ground truth
|
|
706
|
+
(false positives). The value is a dictionary containing either a `count` or a list of
|
|
707
|
+
`examples`. Each example includes the datum UID, predicted bounding box, and prediction score.
|
|
708
|
+
missing_predictions : dict
|
|
709
|
+
A dictionary where each key is a ground truth label value for which the model failed to predict
|
|
710
|
+
(false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
|
|
711
|
+
Each example includes the datum UID and ground truth bounding box.
|
|
712
|
+
score_threshold : float
|
|
713
|
+
The confidence score threshold used to filter predictions.
|
|
714
|
+
iou_threshold : float
|
|
715
|
+
The Intersection over Union (IOU) threshold used to determine true positives.
|
|
716
|
+
maximum_number_of_examples : int
|
|
717
|
+
The maximum number of examples per element.
|
|
718
|
+
|
|
719
|
+
Returns
|
|
720
|
+
-------
|
|
721
|
+
Metric
|
|
722
|
+
"""
|
|
723
|
+
return cls(
|
|
724
|
+
type=MetricType.ConfusionMatrix.value,
|
|
783
725
|
value={
|
|
784
|
-
"confusion_matrix":
|
|
785
|
-
"hallucinations":
|
|
786
|
-
"missing_predictions":
|
|
726
|
+
"confusion_matrix": confusion_matrix,
|
|
727
|
+
"hallucinations": hallucinations,
|
|
728
|
+
"missing_predictions": missing_predictions,
|
|
787
729
|
},
|
|
788
730
|
parameters={
|
|
789
|
-
"score_threshold":
|
|
790
|
-
"iou_threshold":
|
|
731
|
+
"score_threshold": score_threshold,
|
|
732
|
+
"iou_threshold": iou_threshold,
|
|
733
|
+
"maximum_number_of_examples": maximum_number_of_examples,
|
|
791
734
|
},
|
|
792
735
|
)
|
|
793
|
-
|
|
794
|
-
def to_dict(self) -> dict:
|
|
795
|
-
return self.to_metric().to_dict()
|