valor-lite 0.33.12__py3-none-any.whl → 0.33.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/__init__.py +8 -21
- valor_lite/classification/computation.py +2 -2
- valor_lite/classification/manager.py +32 -244
- valor_lite/classification/metric.py +331 -372
- valor_lite/classification/utilities.py +222 -0
- valor_lite/object_detection/__init__.py +4 -35
- valor_lite/object_detection/computation.py +41 -37
- valor_lite/object_detection/manager.py +38 -492
- valor_lite/object_detection/metric.py +636 -696
- valor_lite/object_detection/utilities.py +505 -0
- valor_lite/schemas.py +10 -8
- valor_lite/semantic_segmentation/__init__.py +2 -17
- valor_lite/semantic_segmentation/computation.py +1 -1
- valor_lite/semantic_segmentation/manager.py +13 -116
- valor_lite/semantic_segmentation/metric.py +216 -239
- valor_lite/semantic_segmentation/utilities.py +104 -0
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/METADATA +1 -1
- valor_lite-0.33.14.dist-info/RECORD +27 -0
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/WHEEL +1 -1
- valor_lite-0.33.12.dist-info/RECORD +0 -24
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.12.dist-info → valor_lite-0.33.14.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
1
|
from enum import Enum
|
|
3
2
|
|
|
4
|
-
from valor_lite.schemas import
|
|
3
|
+
from valor_lite.schemas import BaseMetric
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class MetricType(Enum):
|
|
@@ -15,412 +14,372 @@ class MetricType(Enum):
|
|
|
15
14
|
ConfusionMatrix = "ConfusionMatrix"
|
|
16
15
|
|
|
17
16
|
|
|
18
|
-
|
|
19
|
-
class Counts:
|
|
17
|
+
class Metric(BaseMetric):
|
|
20
18
|
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
This class stores the true positive (`tp`), false positive (`fp`), false negative (`fn`), and true
|
|
24
|
-
negative (`tn`) counts computed at various score thresholds for a binary classification task.
|
|
19
|
+
Classification Metric.
|
|
25
20
|
|
|
26
21
|
Attributes
|
|
27
22
|
----------
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
tn : list[int]
|
|
35
|
-
True negative counts at each score threshold.
|
|
36
|
-
score_thresholds : list[float]
|
|
37
|
-
Score thresholds at which the counts are computed.
|
|
38
|
-
hardmax : bool
|
|
39
|
-
Indicates whether hardmax thresholding was used.
|
|
40
|
-
label : str
|
|
41
|
-
The class label for which the counts are computed.
|
|
42
|
-
|
|
43
|
-
Methods
|
|
44
|
-
-------
|
|
45
|
-
to_metric()
|
|
46
|
-
Converts the instance to a generic `Metric` object.
|
|
47
|
-
to_dict()
|
|
48
|
-
Converts the instance to a dictionary representation.
|
|
23
|
+
type : str
|
|
24
|
+
The metric type.
|
|
25
|
+
value : int | float | dict
|
|
26
|
+
The metric value.
|
|
27
|
+
parameters : dict[str, Any]
|
|
28
|
+
A dictionary containing metric parameters.
|
|
49
29
|
"""
|
|
50
30
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
31
|
+
@classmethod
|
|
32
|
+
def precision(
|
|
33
|
+
cls,
|
|
34
|
+
value: float,
|
|
35
|
+
score_threshold: float,
|
|
36
|
+
hardmax: bool,
|
|
37
|
+
label: str,
|
|
38
|
+
):
|
|
39
|
+
"""
|
|
40
|
+
Precision metric for a specific class label.
|
|
41
|
+
|
|
42
|
+
This class calculates the precision at a specific score threshold.
|
|
43
|
+
Precision is defined as the ratio of true positives to the sum of
|
|
44
|
+
true positives and false positives.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
value : float
|
|
49
|
+
Precision value computed at a specific score threshold.
|
|
50
|
+
score_threshold : float
|
|
51
|
+
Score threshold at which the precision value is computed.
|
|
52
|
+
hardmax : bool
|
|
53
|
+
Indicates whether hardmax thresholding was used.
|
|
54
|
+
label : str
|
|
55
|
+
The class label for which the precision is computed.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
Metric
|
|
60
|
+
"""
|
|
61
|
+
return cls(
|
|
62
|
+
type=MetricType.Precision.value,
|
|
63
|
+
value=value,
|
|
68
64
|
parameters={
|
|
69
|
-
"
|
|
70
|
-
"hardmax":
|
|
71
|
-
"label":
|
|
65
|
+
"score_threshold": score_threshold,
|
|
66
|
+
"hardmax": hardmax,
|
|
67
|
+
"label": label,
|
|
72
68
|
},
|
|
73
69
|
)
|
|
74
70
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
71
|
+
@classmethod
|
|
72
|
+
def recall(
|
|
73
|
+
cls,
|
|
74
|
+
value: float,
|
|
75
|
+
score_threshold: float,
|
|
76
|
+
hardmax: bool,
|
|
77
|
+
label: str,
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
Recall metric for a specific class label.
|
|
81
|
+
|
|
82
|
+
This class calculates the recall at a specific score threshold.
|
|
83
|
+
Recall is defined as the ratio of true positives to the sum of
|
|
84
|
+
true positives and false negatives.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
value : float
|
|
89
|
+
Recall value computed at a specific score threshold.
|
|
90
|
+
score_threshold : float
|
|
91
|
+
Score threshold at which the recall value is computed.
|
|
92
|
+
hardmax : bool
|
|
93
|
+
Indicates whether hardmax thresholding was used.
|
|
94
|
+
label : str
|
|
95
|
+
The class label for which the recall is computed.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
Metric
|
|
100
|
+
"""
|
|
101
|
+
return cls(
|
|
102
|
+
type=MetricType.Recall.value,
|
|
103
|
+
value=value,
|
|
90
104
|
parameters={
|
|
91
|
-
"
|
|
92
|
-
"hardmax":
|
|
93
|
-
"label":
|
|
105
|
+
"score_threshold": score_threshold,
|
|
106
|
+
"hardmax": hardmax,
|
|
107
|
+
"label": label,
|
|
94
108
|
},
|
|
95
109
|
)
|
|
96
110
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
"""
|
|
127
|
-
|
|
128
|
-
pass
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
class Recall(_ThresholdValue):
|
|
132
|
-
"""
|
|
133
|
-
Recall metric for a specific class label.
|
|
134
|
-
|
|
135
|
-
This class calculates the recall at various score thresholds for a binary
|
|
136
|
-
classification task. Recall is defined as the ratio of true positives to the
|
|
137
|
-
sum of true positives and false negatives.
|
|
138
|
-
|
|
139
|
-
Attributes
|
|
140
|
-
----------
|
|
141
|
-
value : list[float]
|
|
142
|
-
Recall values computed at each score threshold.
|
|
143
|
-
score_thresholds : list[float]
|
|
144
|
-
Score thresholds at which the recall values are computed.
|
|
145
|
-
hardmax : bool
|
|
146
|
-
Indicates whether hardmax thresholding was used.
|
|
147
|
-
label : str
|
|
148
|
-
The class label for which the recall is computed.
|
|
149
|
-
|
|
150
|
-
Methods
|
|
151
|
-
-------
|
|
152
|
-
to_metric()
|
|
153
|
-
Converts the instance to a generic `Metric` object.
|
|
154
|
-
to_dict()
|
|
155
|
-
Converts the instance to a dictionary representation.
|
|
156
|
-
"""
|
|
157
|
-
|
|
158
|
-
pass
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
class F1(_ThresholdValue):
|
|
162
|
-
"""
|
|
163
|
-
F1 score for a specific class label.
|
|
164
|
-
|
|
165
|
-
This class calculates the F1 score at various score thresholds for a binary
|
|
166
|
-
classification task.
|
|
167
|
-
|
|
168
|
-
Attributes
|
|
169
|
-
----------
|
|
170
|
-
value : list[float]
|
|
171
|
-
F1 scores computed at each score threshold.
|
|
172
|
-
score_thresholds : list[float]
|
|
173
|
-
Score thresholds at which the F1 scores are computed.
|
|
174
|
-
hardmax : bool
|
|
175
|
-
Indicates whether hardmax thresholding was used.
|
|
176
|
-
label : str
|
|
177
|
-
The class label for which the F1 score is computed.
|
|
178
|
-
|
|
179
|
-
Methods
|
|
180
|
-
-------
|
|
181
|
-
to_metric()
|
|
182
|
-
Converts the instance to a generic `Metric` object.
|
|
183
|
-
to_dict()
|
|
184
|
-
Converts the instance to a dictionary representation.
|
|
185
|
-
"""
|
|
186
|
-
|
|
187
|
-
pass
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
@dataclass
|
|
191
|
-
class Accuracy:
|
|
192
|
-
"""
|
|
193
|
-
Multiclass accuracy metric.
|
|
194
|
-
|
|
195
|
-
This class calculates the accuracy at various score thresholds.
|
|
196
|
-
|
|
197
|
-
Attributes
|
|
198
|
-
----------
|
|
199
|
-
value : list[float]
|
|
200
|
-
Accuracy values computed at each score threshold.
|
|
201
|
-
score_thresholds : list[float]
|
|
202
|
-
Score thresholds at which the accuracy values are computed.
|
|
203
|
-
hardmax : bool
|
|
204
|
-
Indicates whether hardmax thresholding was used.
|
|
205
|
-
|
|
206
|
-
Methods
|
|
207
|
-
-------
|
|
208
|
-
to_metric()
|
|
209
|
-
Converts the instance to a generic `Metric` object.
|
|
210
|
-
to_dict()
|
|
211
|
-
Converts the instance to a dictionary representation.
|
|
212
|
-
"""
|
|
213
|
-
|
|
214
|
-
value: list[float]
|
|
215
|
-
score_thresholds: list[float]
|
|
216
|
-
hardmax: bool
|
|
217
|
-
|
|
218
|
-
def to_metric(self) -> Metric:
|
|
219
|
-
return Metric(
|
|
220
|
-
type=type(self).__name__,
|
|
221
|
-
value=self.value,
|
|
111
|
+
@classmethod
|
|
112
|
+
def f1_score(
|
|
113
|
+
cls,
|
|
114
|
+
value: float,
|
|
115
|
+
score_threshold: float,
|
|
116
|
+
hardmax: bool,
|
|
117
|
+
label: str,
|
|
118
|
+
):
|
|
119
|
+
"""
|
|
120
|
+
F1 score for a specific class label and confidence score threshold.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
value : float
|
|
125
|
+
F1 score computed at a specific score threshold.
|
|
126
|
+
score_threshold : float
|
|
127
|
+
Score threshold at which the F1 score is computed.
|
|
128
|
+
hardmax : bool
|
|
129
|
+
Indicates whether hardmax thresholding was used.
|
|
130
|
+
label : str
|
|
131
|
+
The class label for which the F1 score is computed.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Metric
|
|
136
|
+
"""
|
|
137
|
+
return cls(
|
|
138
|
+
type=MetricType.F1.value,
|
|
139
|
+
value=value,
|
|
222
140
|
parameters={
|
|
223
|
-
"
|
|
224
|
-
"hardmax":
|
|
141
|
+
"score_threshold": score_threshold,
|
|
142
|
+
"hardmax": hardmax,
|
|
143
|
+
"label": label,
|
|
225
144
|
},
|
|
226
145
|
)
|
|
227
146
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
return Metric(
|
|
261
|
-
type=type(self).__name__,
|
|
262
|
-
value=self.value,
|
|
263
|
-
parameters={"label": self.label},
|
|
147
|
+
@classmethod
|
|
148
|
+
def accuracy(
|
|
149
|
+
cls,
|
|
150
|
+
value: float,
|
|
151
|
+
score_threshold: float,
|
|
152
|
+
hardmax: bool,
|
|
153
|
+
):
|
|
154
|
+
"""
|
|
155
|
+
Multiclass accuracy metric.
|
|
156
|
+
|
|
157
|
+
This class calculates the accuracy at various score thresholds.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
value : float
|
|
162
|
+
Accuracy value computed at a specific score threshold.
|
|
163
|
+
score_threshold : float
|
|
164
|
+
Score threshold at which the accuracy value is computed.
|
|
165
|
+
hardmax : bool
|
|
166
|
+
Indicates whether hardmax thresholding was used.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
Metric
|
|
171
|
+
"""
|
|
172
|
+
return cls(
|
|
173
|
+
type=MetricType.Accuracy.value,
|
|
174
|
+
value=value,
|
|
175
|
+
parameters={
|
|
176
|
+
"score_threshold": score_threshold,
|
|
177
|
+
"hardmax": hardmax,
|
|
178
|
+
},
|
|
264
179
|
)
|
|
265
180
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
value=self.value,
|
|
297
|
-
parameters={},
|
|
181
|
+
@classmethod
|
|
182
|
+
def roc_auc(
|
|
183
|
+
cls,
|
|
184
|
+
value: float,
|
|
185
|
+
label: str,
|
|
186
|
+
):
|
|
187
|
+
"""
|
|
188
|
+
Receiver Operating Characteristic Area Under the Curve (ROC AUC).
|
|
189
|
+
|
|
190
|
+
This class calculates the ROC AUC score for a specific class label in a multiclass classification task.
|
|
191
|
+
ROC AUC is a performance measurement for classification problems at various threshold settings.
|
|
192
|
+
It reflects the ability of the classifier to distinguish between the positive and negative classes.
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
value : float
|
|
197
|
+
The computed ROC AUC score.
|
|
198
|
+
label : str
|
|
199
|
+
The class label for which the ROC AUC is computed.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
Metric
|
|
204
|
+
"""
|
|
205
|
+
return cls(
|
|
206
|
+
type=MetricType.ROCAUC.value,
|
|
207
|
+
value=value,
|
|
208
|
+
parameters={
|
|
209
|
+
"label": label,
|
|
210
|
+
},
|
|
298
211
|
)
|
|
299
212
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
class
|
|
306
|
-
|
|
307
|
-
|
|
213
|
+
@classmethod
|
|
214
|
+
def mean_roc_auc(cls, value: float):
|
|
215
|
+
"""
|
|
216
|
+
Mean Receiver Operating Characteristic Area Under the Curve (mROC AUC).
|
|
217
|
+
|
|
218
|
+
This class calculates the mean ROC AUC score over all classes in a multiclass classification task.
|
|
219
|
+
It provides an aggregate measure of the model's ability to distinguish between classes.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
value : float
|
|
224
|
+
The computed mean ROC AUC score.
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
Metric
|
|
229
|
+
"""
|
|
230
|
+
return cls(type=MetricType.mROCAUC.value, value=value, parameters={})
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def counts(
|
|
234
|
+
cls,
|
|
235
|
+
tp: int,
|
|
236
|
+
fp: int,
|
|
237
|
+
fn: int,
|
|
238
|
+
tn: int,
|
|
239
|
+
score_threshold: float,
|
|
240
|
+
hardmax: bool,
|
|
241
|
+
label: str,
|
|
242
|
+
):
|
|
243
|
+
"""
|
|
244
|
+
Confusion matrix counts at specified score thresholds for binary classification.
|
|
245
|
+
|
|
246
|
+
This class stores the true positive (`tp`), false positive (`fp`), false negative (`fn`), and true
|
|
247
|
+
negative (`tn`) counts computed at various score thresholds for a binary classification task.
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
tp : int
|
|
252
|
+
True positive counts at each score threshold.
|
|
253
|
+
fp : int
|
|
254
|
+
False positive counts at each score threshold.
|
|
255
|
+
fn : int
|
|
256
|
+
False negative counts at each score threshold.
|
|
257
|
+
tn : int
|
|
258
|
+
True negative counts at each score threshold.
|
|
259
|
+
score_threshold : float
|
|
260
|
+
Score thresholds at which the counts are computed.
|
|
261
|
+
hardmax : bool
|
|
262
|
+
Indicates whether hardmax thresholding was used.
|
|
263
|
+
label : str
|
|
264
|
+
The class label for which the counts are computed.
|
|
265
|
+
|
|
266
|
+
Returns
|
|
267
|
+
-------
|
|
268
|
+
Metric
|
|
269
|
+
"""
|
|
270
|
+
return cls(
|
|
271
|
+
type=MetricType.Counts.value,
|
|
272
|
+
value={
|
|
273
|
+
"tp": tp,
|
|
274
|
+
"fp": fp,
|
|
275
|
+
"fn": fn,
|
|
276
|
+
"tn": tn,
|
|
277
|
+
},
|
|
278
|
+
parameters={
|
|
279
|
+
"score_threshold": score_threshold,
|
|
280
|
+
"hardmax": hardmax,
|
|
281
|
+
"label": label,
|
|
282
|
+
},
|
|
283
|
+
)
|
|
308
284
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
285
|
+
@classmethod
|
|
286
|
+
def confusion_matrix(
|
|
287
|
+
cls,
|
|
288
|
+
confusion_matrix: dict[
|
|
289
|
+
str, # ground truth label value
|
|
290
|
+
dict[
|
|
291
|
+
str, # prediction label value
|
|
292
|
+
dict[
|
|
293
|
+
str, # either `count` or `examples`
|
|
294
|
+
int
|
|
295
|
+
| list[
|
|
296
|
+
dict[
|
|
297
|
+
str, # either `datum` or `score`
|
|
298
|
+
str | float, # datum uid # prediction score
|
|
299
|
+
]
|
|
300
|
+
],
|
|
301
|
+
],
|
|
302
|
+
],
|
|
303
|
+
],
|
|
304
|
+
missing_predictions: dict[
|
|
305
|
+
str, # ground truth label value
|
|
306
|
+
dict[
|
|
307
|
+
str, # either `count` or `examples`
|
|
308
|
+
int | list[dict[str, str]], # count or datum examples
|
|
309
|
+
],
|
|
310
|
+
],
|
|
311
|
+
score_threshold: float,
|
|
312
|
+
maximum_number_of_examples: int,
|
|
313
|
+
):
|
|
314
|
+
"""
|
|
315
|
+
The confusion matrix and related metrics for the classification task.
|
|
316
|
+
|
|
317
|
+
This class encapsulates detailed information about the model's performance, including correct
|
|
318
|
+
predictions, misclassifications, hallucinations (false positives), and missing predictions
|
|
319
|
+
(false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
|
|
320
|
+
|
|
321
|
+
Confusion Matrix Structure:
|
|
322
|
+
{
|
|
323
|
+
ground_truth_label: {
|
|
324
|
+
predicted_label: {
|
|
325
|
+
'count': int,
|
|
326
|
+
'examples': [
|
|
327
|
+
{
|
|
328
|
+
'datum': str,
|
|
329
|
+
'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
330
|
+
'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
331
|
+
'score': float,
|
|
332
|
+
},
|
|
333
|
+
...
|
|
334
|
+
],
|
|
335
|
+
},
|
|
336
|
+
...
|
|
337
|
+
},
|
|
338
|
+
...
|
|
339
|
+
}
|
|
312
340
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
predicted_label: {
|
|
341
|
+
Missing Prediction Structure:
|
|
342
|
+
{
|
|
343
|
+
ground_truth_label: {
|
|
317
344
|
'count': int,
|
|
318
345
|
'examples': [
|
|
319
346
|
{
|
|
320
347
|
'datum': str,
|
|
321
348
|
'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
322
|
-
'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
323
|
-
'score': float,
|
|
324
349
|
},
|
|
325
350
|
...
|
|
326
351
|
],
|
|
327
352
|
},
|
|
328
353
|
...
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
'count': int,
|
|
353
|
-
'examples': [
|
|
354
|
-
{
|
|
355
|
-
'datum': str,
|
|
356
|
-
'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
357
|
-
},
|
|
358
|
-
...
|
|
359
|
-
],
|
|
360
|
-
},
|
|
361
|
-
...
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
Attributes
|
|
365
|
-
----------
|
|
366
|
-
confusion_matrix : dict
|
|
367
|
-
A nested dictionary where the first key is the ground truth label value, the second key
|
|
368
|
-
is the prediction label value, and the innermost dictionary contains either a `count`
|
|
369
|
-
or a list of `examples`. Each example includes the datum UID and prediction score.
|
|
370
|
-
missing_predictions : dict
|
|
371
|
-
A dictionary where each key is a ground truth label value for which the model failed to predict
|
|
372
|
-
(false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
|
|
373
|
-
Each example includes the datum UID.
|
|
374
|
-
score_threshold : float
|
|
375
|
-
The confidence score threshold used to filter predictions.
|
|
376
|
-
number_of_examples : int
|
|
377
|
-
The maximum number of examples per element.
|
|
378
|
-
|
|
379
|
-
Methods
|
|
380
|
-
-------
|
|
381
|
-
to_metric()
|
|
382
|
-
Converts the instance to a generic `Metric` object.
|
|
383
|
-
to_dict()
|
|
384
|
-
Converts the instance to a dictionary representation.
|
|
385
|
-
"""
|
|
386
|
-
|
|
387
|
-
confusion_matrix: dict[
|
|
388
|
-
str, # ground truth label value
|
|
389
|
-
dict[
|
|
390
|
-
str, # prediction label value
|
|
391
|
-
dict[
|
|
392
|
-
str, # either `count` or `examples`
|
|
393
|
-
int
|
|
394
|
-
| list[
|
|
395
|
-
dict[
|
|
396
|
-
str, # either `datum` or `score`
|
|
397
|
-
str | float, # datum uid # prediction score
|
|
398
|
-
]
|
|
399
|
-
],
|
|
400
|
-
],
|
|
401
|
-
],
|
|
402
|
-
]
|
|
403
|
-
missing_predictions: dict[
|
|
404
|
-
str, # ground truth label value
|
|
405
|
-
dict[
|
|
406
|
-
str, # either `count` or `examples`
|
|
407
|
-
int | list[dict[str, str]], # count or datum examples
|
|
408
|
-
],
|
|
409
|
-
]
|
|
410
|
-
score_threshold: float
|
|
411
|
-
number_of_examples: int
|
|
412
|
-
|
|
413
|
-
def to_metric(self) -> Metric:
|
|
414
|
-
return Metric(
|
|
415
|
-
type=type(self).__name__,
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
Parameters
|
|
357
|
+
----------
|
|
358
|
+
confusion_matrix : dict
|
|
359
|
+
A nested dictionary where the first key is the ground truth label value, the second key
|
|
360
|
+
is the prediction label value, and the innermost dictionary contains either a `count`
|
|
361
|
+
or a list of `examples`. Each example includes the datum UID and prediction score.
|
|
362
|
+
missing_predictions : dict
|
|
363
|
+
A dictionary where each key is a ground truth label value for which the model failed to predict
|
|
364
|
+
(false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
|
|
365
|
+
Each example includes the datum UID.
|
|
366
|
+
score_threshold : float
|
|
367
|
+
The confidence score threshold used to filter predictions.
|
|
368
|
+
maximum_number_of_examples : int
|
|
369
|
+
The maximum number of examples per element.
|
|
370
|
+
|
|
371
|
+
Returns
|
|
372
|
+
-------
|
|
373
|
+
Metric
|
|
374
|
+
"""
|
|
375
|
+
return cls(
|
|
376
|
+
type=MetricType.ConfusionMatrix.value,
|
|
416
377
|
value={
|
|
417
|
-
"confusion_matrix":
|
|
418
|
-
"missing_predictions":
|
|
378
|
+
"confusion_matrix": confusion_matrix,
|
|
379
|
+
"missing_predictions": missing_predictions,
|
|
419
380
|
},
|
|
420
381
|
parameters={
|
|
421
|
-
"score_threshold":
|
|
382
|
+
"score_threshold": score_threshold,
|
|
383
|
+
"maximum_number_of_examples": maximum_number_of_examples,
|
|
422
384
|
},
|
|
423
385
|
)
|
|
424
|
-
|
|
425
|
-
def to_dict(self) -> dict:
|
|
426
|
-
return self.to_metric().to_dict()
|