valor-lite 0.33.6__py3-none-any.whl → 0.33.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valor_lite/classification/annotation.py +6 -2
- valor_lite/classification/computation.py +31 -52
- valor_lite/classification/manager.py +68 -131
- valor_lite/classification/metric.py +7 -23
- valor_lite/detection/annotation.py +64 -29
- valor_lite/detection/computation.py +130 -92
- valor_lite/detection/manager.py +211 -462
- valor_lite/detection/metric.py +20 -48
- valor_lite/segmentation/__init__.py +27 -0
- valor_lite/segmentation/annotation.py +49 -0
- valor_lite/segmentation/computation.py +186 -0
- valor_lite/segmentation/manager.py +532 -0
- valor_lite/segmentation/metric.py +119 -0
- {valor_lite-0.33.6.dist-info → valor_lite-0.33.8.dist-info}/METADATA +1 -1
- valor_lite-0.33.8.dist-info/RECORD +22 -0
- valor_lite-0.33.6.dist-info/RECORD +0 -17
- {valor_lite-0.33.6.dist-info → valor_lite-0.33.8.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.6.dist-info → valor_lite-0.33.8.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.6.dist-info → valor_lite-0.33.8.dist-info}/top_level.txt +0 -0
|
@@ -4,10 +4,14 @@ from dataclasses import dataclass
|
|
|
4
4
|
@dataclass
|
|
5
5
|
class Classification:
|
|
6
6
|
uid: str
|
|
7
|
-
|
|
8
|
-
predictions: list[
|
|
7
|
+
groundtruth: str
|
|
8
|
+
predictions: list[str]
|
|
9
9
|
scores: list[float]
|
|
10
10
|
|
|
11
11
|
def __post_init__(self):
|
|
12
|
+
if not isinstance(self.groundtruth, str):
|
|
13
|
+
raise ValueError(
|
|
14
|
+
"A classification must contain a single groundtruth."
|
|
15
|
+
)
|
|
12
16
|
if len(self.predictions) != len(self.scores):
|
|
13
17
|
raise ValueError("There must be a score per prediction label.")
|
|
@@ -3,28 +3,18 @@ from numpy.typing import NDArray
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def _compute_rocauc(
|
|
6
|
-
data: NDArray[np.
|
|
6
|
+
data: NDArray[np.float64],
|
|
7
7
|
label_metadata: NDArray[np.int32],
|
|
8
8
|
n_datums: int,
|
|
9
9
|
n_labels: int,
|
|
10
|
-
n_label_keys: int,
|
|
11
10
|
mask_matching_labels: NDArray[np.bool_],
|
|
12
11
|
pd_labels: NDArray[np.int32],
|
|
13
12
|
):
|
|
14
13
|
"""
|
|
15
14
|
Compute ROCAUC and mean ROCAUC.
|
|
16
15
|
"""
|
|
17
|
-
count_labels_per_key = np.bincount(label_metadata[:, 2])
|
|
18
|
-
count_groundtruths_per_key = np.bincount(
|
|
19
|
-
label_metadata[:, 2],
|
|
20
|
-
weights=label_metadata[:, 0],
|
|
21
|
-
minlength=n_label_keys,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
16
|
positive_count = label_metadata[:, 0]
|
|
25
|
-
negative_count =
|
|
26
|
-
count_groundtruths_per_key[label_metadata[:, 2]] - label_metadata[:, 0]
|
|
27
|
-
)
|
|
17
|
+
negative_count = label_metadata[:, 1] - label_metadata[:, 0]
|
|
28
18
|
|
|
29
19
|
true_positives = np.zeros((n_labels, n_datums), dtype=np.int32)
|
|
30
20
|
false_positives = np.zeros_like(true_positives)
|
|
@@ -35,7 +25,6 @@ def _compute_rocauc(
|
|
|
35
25
|
continue
|
|
36
26
|
|
|
37
27
|
mask_pds = pd_labels == label_idx
|
|
38
|
-
|
|
39
28
|
true_positives[label_idx] = mask_matching_labels[mask_pds]
|
|
40
29
|
false_positives[label_idx] = ~mask_matching_labels[mask_pds]
|
|
41
30
|
scores[label_idx] = data[mask_pds, 3]
|
|
@@ -70,32 +59,25 @@ def _compute_rocauc(
|
|
|
70
59
|
rocauc = np.trapz(x=fpr, y=tpr, axis=1) # type: ignore - numpy will be switching to `trapezoid` in the future.
|
|
71
60
|
|
|
72
61
|
# compute mean rocauc
|
|
73
|
-
|
|
74
|
-
mean_rocauc = np.zeros(n_label_keys, dtype=np.float64)
|
|
75
|
-
np.divide(
|
|
76
|
-
summed_rocauc,
|
|
77
|
-
count_labels_per_key,
|
|
78
|
-
where=count_labels_per_key > 1e-9,
|
|
79
|
-
out=mean_rocauc,
|
|
80
|
-
)
|
|
62
|
+
mean_rocauc = rocauc.mean()
|
|
81
63
|
|
|
82
64
|
return rocauc, mean_rocauc
|
|
83
65
|
|
|
84
66
|
|
|
85
67
|
def compute_metrics(
|
|
86
|
-
data: NDArray[np.
|
|
68
|
+
data: NDArray[np.float64],
|
|
87
69
|
label_metadata: NDArray[np.int32],
|
|
88
|
-
score_thresholds: NDArray[np.
|
|
70
|
+
score_thresholds: NDArray[np.float64],
|
|
89
71
|
hardmax: bool,
|
|
90
72
|
n_datums: int,
|
|
91
73
|
) -> tuple[
|
|
92
74
|
NDArray[np.int32],
|
|
93
|
-
NDArray[np.
|
|
94
|
-
NDArray[np.
|
|
95
|
-
NDArray[np.
|
|
96
|
-
NDArray[np.
|
|
97
|
-
NDArray[np.
|
|
98
|
-
|
|
75
|
+
NDArray[np.float64],
|
|
76
|
+
NDArray[np.float64],
|
|
77
|
+
NDArray[np.float64],
|
|
78
|
+
NDArray[np.float64],
|
|
79
|
+
NDArray[np.float64],
|
|
80
|
+
float,
|
|
99
81
|
]:
|
|
100
82
|
"""
|
|
101
83
|
Computes classification metrics.
|
|
@@ -110,14 +92,14 @@ def compute_metrics(
|
|
|
110
92
|
|
|
111
93
|
Parameters
|
|
112
94
|
----------
|
|
113
|
-
data : NDArray[np.
|
|
95
|
+
data : NDArray[np.float64]
|
|
114
96
|
A sorted array of classification pairs.
|
|
115
97
|
label_metadata : NDArray[np.int32]
|
|
116
98
|
An array containing metadata related to labels.
|
|
117
|
-
score_thresholds : NDArray[np.
|
|
99
|
+
score_thresholds : NDArray[np.float64]
|
|
118
100
|
A 1-D array contains score thresholds to compute metrics over.
|
|
119
101
|
hardmax : bool
|
|
120
|
-
Option to only allow a single positive prediction
|
|
102
|
+
Option to only allow a single positive prediction.
|
|
121
103
|
n_datums : int
|
|
122
104
|
The number of datums being operated over.
|
|
123
105
|
|
|
@@ -125,22 +107,21 @@ def compute_metrics(
|
|
|
125
107
|
-------
|
|
126
108
|
NDArray[np.int32]
|
|
127
109
|
TP, FP, FN, TN counts.
|
|
128
|
-
NDArray[np.
|
|
110
|
+
NDArray[np.float64]
|
|
129
111
|
Precision.
|
|
130
|
-
NDArray[np.
|
|
112
|
+
NDArray[np.float64]
|
|
131
113
|
Recall.
|
|
132
|
-
NDArray[np.
|
|
114
|
+
NDArray[np.float64]
|
|
133
115
|
Accuracy
|
|
134
|
-
NDArray[np.
|
|
116
|
+
NDArray[np.float64]
|
|
135
117
|
F1 Score
|
|
136
|
-
NDArray[np.
|
|
118
|
+
NDArray[np.float64]
|
|
137
119
|
ROCAUC.
|
|
138
|
-
|
|
120
|
+
float
|
|
139
121
|
mROCAUC.
|
|
140
122
|
"""
|
|
141
123
|
|
|
142
124
|
n_labels = label_metadata.shape[0]
|
|
143
|
-
n_label_keys = np.unique(label_metadata[:, 2]).size
|
|
144
125
|
n_scores = score_thresholds.shape[0]
|
|
145
126
|
|
|
146
127
|
pd_labels = data[:, 2].astype(int)
|
|
@@ -155,7 +136,6 @@ def compute_metrics(
|
|
|
155
136
|
label_metadata=label_metadata,
|
|
156
137
|
n_datums=n_datums,
|
|
157
138
|
n_labels=n_labels,
|
|
158
|
-
n_label_keys=n_label_keys,
|
|
159
139
|
mask_matching_labels=mask_matching_labels,
|
|
160
140
|
pd_labels=pd_labels,
|
|
161
141
|
)
|
|
@@ -229,16 +209,16 @@ def compute_metrics(
|
|
|
229
209
|
|
|
230
210
|
|
|
231
211
|
def _count_with_examples(
|
|
232
|
-
data: NDArray[np.
|
|
212
|
+
data: NDArray[np.float64],
|
|
233
213
|
unique_idx: int | list[int],
|
|
234
214
|
label_idx: int | list[int],
|
|
235
|
-
) -> tuple[NDArray[np.
|
|
215
|
+
) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.int32]]:
|
|
236
216
|
"""
|
|
237
217
|
Helper function for counting occurences of unique detailed pairs.
|
|
238
218
|
|
|
239
219
|
Parameters
|
|
240
220
|
----------
|
|
241
|
-
data : NDArray[np.
|
|
221
|
+
data : NDArray[np.float64]
|
|
242
222
|
A masked portion of a detailed pairs array.
|
|
243
223
|
unique_idx : int | list[int]
|
|
244
224
|
The index or indices upon which uniqueness is constrained.
|
|
@@ -247,7 +227,7 @@ def _count_with_examples(
|
|
|
247
227
|
|
|
248
228
|
Returns
|
|
249
229
|
-------
|
|
250
|
-
NDArray[np.
|
|
230
|
+
NDArray[np.float64]
|
|
251
231
|
Examples drawn from the data input.
|
|
252
232
|
NDArray[np.int32]
|
|
253
233
|
Unique label indices.
|
|
@@ -267,13 +247,12 @@ def _count_with_examples(
|
|
|
267
247
|
|
|
268
248
|
|
|
269
249
|
def compute_confusion_matrix(
|
|
270
|
-
data: NDArray[np.
|
|
250
|
+
data: NDArray[np.float64],
|
|
271
251
|
label_metadata: NDArray[np.int32],
|
|
272
|
-
score_thresholds: NDArray[np.
|
|
252
|
+
score_thresholds: NDArray[np.float64],
|
|
273
253
|
hardmax: bool,
|
|
274
254
|
n_examples: int,
|
|
275
|
-
) -> tuple[NDArray[np.
|
|
276
|
-
|
|
255
|
+
) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
|
|
277
256
|
"""
|
|
278
257
|
Compute detailed confusion matrix.
|
|
279
258
|
|
|
@@ -287,20 +266,20 @@ def compute_confusion_matrix(
|
|
|
287
266
|
|
|
288
267
|
Parameters
|
|
289
268
|
----------
|
|
290
|
-
data : NDArray[np.
|
|
269
|
+
data : NDArray[np.float64]
|
|
291
270
|
A sorted array summarizing the IOU calculations of one or more pairs.
|
|
292
271
|
label_metadata : NDArray[np.int32]
|
|
293
272
|
An array containing metadata related to labels.
|
|
294
|
-
iou_thresholds : NDArray[np.
|
|
273
|
+
iou_thresholds : NDArray[np.float64]
|
|
295
274
|
A 1-D array containing IoU thresholds.
|
|
296
|
-
score_thresholds : NDArray[np.
|
|
275
|
+
score_thresholds : NDArray[np.float64]
|
|
297
276
|
A 1-D array containing score thresholds.
|
|
298
277
|
n_examples : int
|
|
299
278
|
The maximum number of examples to return per count.
|
|
300
279
|
|
|
301
280
|
Returns
|
|
302
281
|
-------
|
|
303
|
-
NDArray[np.
|
|
282
|
+
NDArray[np.float64]
|
|
304
283
|
Confusion matrix.
|
|
305
284
|
NDArray[np.int32]
|
|
306
285
|
Ground truths with missing predictions.
|
|
@@ -67,13 +67,8 @@ class Evaluator:
|
|
|
67
67
|
self.index_to_uid: dict[int, str] = dict()
|
|
68
68
|
|
|
69
69
|
# label reference
|
|
70
|
-
self.label_to_index: dict[
|
|
71
|
-
self.index_to_label: dict[int,
|
|
72
|
-
|
|
73
|
-
# label key reference
|
|
74
|
-
self.index_to_label_key: dict[int, str] = dict()
|
|
75
|
-
self.label_key_to_index: dict[str, int] = dict()
|
|
76
|
-
self.label_index_to_label_key_index: dict[int, int] = dict()
|
|
70
|
+
self.label_to_index: dict[str, int] = dict()
|
|
71
|
+
self.index_to_label: dict[int, str] = dict()
|
|
77
72
|
|
|
78
73
|
# computation caches
|
|
79
74
|
self._detailed_pairs = np.array([])
|
|
@@ -81,7 +76,7 @@ class Evaluator:
|
|
|
81
76
|
self._label_metadata_per_datum = np.array([], dtype=np.int32)
|
|
82
77
|
|
|
83
78
|
@property
|
|
84
|
-
def ignored_prediction_labels(self) -> list[
|
|
79
|
+
def ignored_prediction_labels(self) -> list[str]:
|
|
85
80
|
"""
|
|
86
81
|
Prediction labels that are not present in the ground truth set.
|
|
87
82
|
"""
|
|
@@ -92,7 +87,7 @@ class Evaluator:
|
|
|
92
87
|
]
|
|
93
88
|
|
|
94
89
|
@property
|
|
95
|
-
def missing_prediction_labels(self) -> list[
|
|
90
|
+
def missing_prediction_labels(self) -> list[str]:
|
|
96
91
|
"""
|
|
97
92
|
Ground truth labels that are not present in the prediction set.
|
|
98
93
|
"""
|
|
@@ -119,8 +114,7 @@ class Evaluator:
|
|
|
119
114
|
def create_filter(
|
|
120
115
|
self,
|
|
121
116
|
datum_uids: list[str] | NDArray[np.int32] | None = None,
|
|
122
|
-
labels: list[
|
|
123
|
-
label_keys: list[str] | NDArray[np.int32] | None = None,
|
|
117
|
+
labels: list[str] | NDArray[np.int32] | None = None,
|
|
124
118
|
) -> Filter:
|
|
125
119
|
"""
|
|
126
120
|
Creates a boolean mask that can be passed to an evaluation.
|
|
@@ -129,10 +123,8 @@ class Evaluator:
|
|
|
129
123
|
----------
|
|
130
124
|
datum_uids : list[str] | NDArray[np.int32], optional
|
|
131
125
|
An optional list of string uids or a numpy array of uid indices.
|
|
132
|
-
labels : list[
|
|
126
|
+
labels : list[str] | NDArray[np.int32], optional
|
|
133
127
|
An optional list of labels or a numpy array of label indices.
|
|
134
|
-
label_keys : list[str] | NDArray[np.int32], optional
|
|
135
|
-
An optional list of label keys or a numpy array of label key indices.
|
|
136
128
|
|
|
137
129
|
Returns
|
|
138
130
|
-------
|
|
@@ -179,36 +171,18 @@ class Evaluator:
|
|
|
179
171
|
mask[labels] = True
|
|
180
172
|
mask_labels &= mask
|
|
181
173
|
|
|
182
|
-
if label_keys is not None:
|
|
183
|
-
if isinstance(label_keys, list):
|
|
184
|
-
label_keys = np.array(
|
|
185
|
-
[self.label_key_to_index[key] for key in label_keys]
|
|
186
|
-
)
|
|
187
|
-
label_indices = np.where(
|
|
188
|
-
np.isclose(self._label_metadata[:, 2], label_keys)
|
|
189
|
-
)[0]
|
|
190
|
-
mask = np.zeros_like(mask_pairs, dtype=np.bool_)
|
|
191
|
-
mask[
|
|
192
|
-
np.isin(self._detailed_pairs[:, 1].astype(int), label_indices)
|
|
193
|
-
] = True
|
|
194
|
-
mask_pairs &= mask
|
|
195
|
-
|
|
196
|
-
mask = np.zeros_like(mask_labels, dtype=np.bool_)
|
|
197
|
-
mask[label_indices] = True
|
|
198
|
-
mask_labels &= mask
|
|
199
|
-
|
|
200
174
|
mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
|
|
201
175
|
label_metadata_per_datum = self._label_metadata_per_datum.copy()
|
|
202
176
|
label_metadata_per_datum[:, ~mask] = 0
|
|
203
177
|
|
|
204
178
|
label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
|
|
205
|
-
label_metadata
|
|
179
|
+
label_metadata = np.transpose(
|
|
206
180
|
np.sum(
|
|
207
181
|
label_metadata_per_datum,
|
|
208
182
|
axis=1,
|
|
209
183
|
)
|
|
210
184
|
)
|
|
211
|
-
|
|
185
|
+
|
|
212
186
|
n_datums = int(np.sum(label_metadata[:, 0]))
|
|
213
187
|
|
|
214
188
|
return Filter(
|
|
@@ -241,6 +215,8 @@ class Evaluator:
|
|
|
241
215
|
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
242
216
|
filter_ : Filter, optional
|
|
243
217
|
An optional filter object.
|
|
218
|
+
as_dict : bool, default=False
|
|
219
|
+
An option to return metrics as dictionaries.
|
|
244
220
|
|
|
245
221
|
Returns
|
|
246
222
|
-------
|
|
@@ -286,10 +262,8 @@ class Evaluator:
|
|
|
286
262
|
|
|
287
263
|
metrics[MetricType.mROCAUC] = [
|
|
288
264
|
mROCAUC(
|
|
289
|
-
value=mean_rocauc
|
|
290
|
-
label_key=self.index_to_label_key[label_key_idx],
|
|
265
|
+
value=mean_rocauc,
|
|
291
266
|
)
|
|
292
|
-
for label_key_idx in range(len(self.label_key_to_index))
|
|
293
267
|
]
|
|
294
268
|
|
|
295
269
|
for label_idx, label in self.index_to_label.items():
|
|
@@ -364,8 +338,7 @@ class Evaluator:
|
|
|
364
338
|
|
|
365
339
|
def _unpack_confusion_matrix(
|
|
366
340
|
self,
|
|
367
|
-
confusion_matrix: NDArray[np.
|
|
368
|
-
label_key_idx: int,
|
|
341
|
+
confusion_matrix: NDArray[np.float64],
|
|
369
342
|
number_of_labels: int,
|
|
370
343
|
number_of_examples: int,
|
|
371
344
|
) -> dict[
|
|
@@ -405,8 +378,8 @@ class Evaluator:
|
|
|
405
378
|
)
|
|
406
379
|
|
|
407
380
|
return {
|
|
408
|
-
self.index_to_label[gt_label_idx]
|
|
409
|
-
self.index_to_label[pd_label_idx]
|
|
381
|
+
self.index_to_label[gt_label_idx]: {
|
|
382
|
+
self.index_to_label[pd_label_idx]: {
|
|
410
383
|
"count": max(
|
|
411
384
|
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
412
385
|
0,
|
|
@@ -428,22 +401,13 @@ class Evaluator:
|
|
|
428
401
|
],
|
|
429
402
|
}
|
|
430
403
|
for pd_label_idx in range(number_of_labels)
|
|
431
|
-
if (
|
|
432
|
-
self.label_index_to_label_key_index[pd_label_idx]
|
|
433
|
-
== label_key_idx
|
|
434
|
-
)
|
|
435
404
|
}
|
|
436
405
|
for gt_label_idx in range(number_of_labels)
|
|
437
|
-
if (
|
|
438
|
-
self.label_index_to_label_key_index[gt_label_idx]
|
|
439
|
-
== label_key_idx
|
|
440
|
-
)
|
|
441
406
|
}
|
|
442
407
|
|
|
443
408
|
def _unpack_missing_predictions(
|
|
444
409
|
self,
|
|
445
410
|
missing_predictions: NDArray[np.int32],
|
|
446
|
-
label_key_idx: int,
|
|
447
411
|
number_of_labels: int,
|
|
448
412
|
number_of_examples: int,
|
|
449
413
|
) -> dict[str, dict[str, int | list[dict[str, str]]]]:
|
|
@@ -461,7 +425,7 @@ class Evaluator:
|
|
|
461
425
|
)
|
|
462
426
|
|
|
463
427
|
return {
|
|
464
|
-
self.index_to_label[gt_label_idx]
|
|
428
|
+
self.index_to_label[gt_label_idx]: {
|
|
465
429
|
"count": max(
|
|
466
430
|
int(missing_predictions[gt_label_idx, 0]),
|
|
467
431
|
0,
|
|
@@ -477,15 +441,11 @@ class Evaluator:
|
|
|
477
441
|
],
|
|
478
442
|
}
|
|
479
443
|
for gt_label_idx in range(number_of_labels)
|
|
480
|
-
if (
|
|
481
|
-
self.label_index_to_label_key_index[gt_label_idx]
|
|
482
|
-
== label_key_idx
|
|
483
|
-
)
|
|
484
444
|
}
|
|
485
445
|
|
|
486
446
|
def _compute_confusion_matrix(
|
|
487
447
|
self,
|
|
488
|
-
data: NDArray[np.
|
|
448
|
+
data: NDArray[np.float64],
|
|
489
449
|
label_metadata: NDArray[np.int32],
|
|
490
450
|
score_thresholds: list[float],
|
|
491
451
|
hardmax: bool,
|
|
@@ -496,7 +456,7 @@ class Evaluator:
|
|
|
496
456
|
|
|
497
457
|
Parameters
|
|
498
458
|
----------
|
|
499
|
-
data : NDArray[np.
|
|
459
|
+
data : NDArray[np.float64]
|
|
500
460
|
A data array containing classification pairs.
|
|
501
461
|
label_metadata : NDArray[np.int32]
|
|
502
462
|
An integer array containing label metadata.
|
|
@@ -510,7 +470,7 @@ class Evaluator:
|
|
|
510
470
|
Returns
|
|
511
471
|
-------
|
|
512
472
|
list[ConfusionMatrix]
|
|
513
|
-
A list of ConfusionMatrix
|
|
473
|
+
A list of ConfusionMatrix objects.
|
|
514
474
|
"""
|
|
515
475
|
|
|
516
476
|
if data.size == 0:
|
|
@@ -528,22 +488,18 @@ class Evaluator:
|
|
|
528
488
|
return [
|
|
529
489
|
ConfusionMatrix(
|
|
530
490
|
score_threshold=score_thresholds[score_idx],
|
|
531
|
-
label_key=label_key,
|
|
532
491
|
number_of_examples=number_of_examples,
|
|
533
492
|
confusion_matrix=self._unpack_confusion_matrix(
|
|
534
493
|
confusion_matrix=confusion_matrix[score_idx, :, :, :],
|
|
535
|
-
label_key_idx=label_key_idx,
|
|
536
494
|
number_of_labels=n_labels,
|
|
537
495
|
number_of_examples=number_of_examples,
|
|
538
496
|
),
|
|
539
497
|
missing_predictions=self._unpack_missing_predictions(
|
|
540
498
|
missing_predictions=missing_predictions[score_idx, :, :],
|
|
541
|
-
label_key_idx=label_key_idx,
|
|
542
499
|
number_of_labels=n_labels,
|
|
543
500
|
number_of_examples=number_of_examples,
|
|
544
501
|
),
|
|
545
502
|
)
|
|
546
|
-
for label_key_idx, label_key in self.index_to_label_key.items()
|
|
547
503
|
for score_idx in range(n_scores)
|
|
548
504
|
]
|
|
549
505
|
|
|
@@ -578,77 +534,50 @@ class DataLoader:
|
|
|
578
534
|
self._evaluator.index_to_uid[index] = uid
|
|
579
535
|
return self._evaluator.uid_to_index[uid]
|
|
580
536
|
|
|
581
|
-
def _add_label(self, label:
|
|
537
|
+
def _add_label(self, label: str) -> int:
|
|
582
538
|
"""
|
|
583
539
|
Helper function for adding a label to the cache.
|
|
584
540
|
|
|
585
541
|
Parameters
|
|
586
542
|
----------
|
|
587
|
-
label :
|
|
588
|
-
|
|
543
|
+
label : str
|
|
544
|
+
A string representing a label.
|
|
589
545
|
|
|
590
546
|
Returns
|
|
591
547
|
-------
|
|
592
548
|
int
|
|
593
549
|
Label index.
|
|
594
|
-
int
|
|
595
|
-
Label key index.
|
|
596
550
|
"""
|
|
597
551
|
label_id = len(self._evaluator.index_to_label)
|
|
598
|
-
label_key_id = len(self._evaluator.index_to_label_key)
|
|
599
552
|
if label not in self._evaluator.label_to_index:
|
|
600
553
|
self._evaluator.label_to_index[label] = label_id
|
|
601
554
|
self._evaluator.index_to_label[label_id] = label
|
|
602
555
|
|
|
603
|
-
# update label key index
|
|
604
|
-
if label[0] not in self._evaluator.label_key_to_index:
|
|
605
|
-
self._evaluator.label_key_to_index[label[0]] = label_key_id
|
|
606
|
-
self._evaluator.index_to_label_key[label_key_id] = label[0]
|
|
607
|
-
label_key_id += 1
|
|
608
|
-
|
|
609
|
-
self._evaluator.label_index_to_label_key_index[
|
|
610
|
-
label_id
|
|
611
|
-
] = self._evaluator.label_key_to_index[label[0]]
|
|
612
556
|
label_id += 1
|
|
613
557
|
|
|
614
|
-
return
|
|
615
|
-
self._evaluator.label_to_index[label],
|
|
616
|
-
self._evaluator.label_key_to_index[label[0]],
|
|
617
|
-
)
|
|
558
|
+
return self._evaluator.label_to_index[label]
|
|
618
559
|
|
|
619
560
|
def _add_data(
|
|
620
561
|
self,
|
|
621
562
|
uid_index: int,
|
|
622
|
-
|
|
623
|
-
|
|
563
|
+
groundtruth: int,
|
|
564
|
+
predictions: list[tuple[int, float]],
|
|
624
565
|
):
|
|
625
|
-
gt_keys = set(keyed_groundtruths.keys())
|
|
626
|
-
pd_keys = set(keyed_predictions.keys())
|
|
627
|
-
joint_keys = gt_keys.intersection(pd_keys)
|
|
628
|
-
|
|
629
|
-
gt_unique_keys = gt_keys - pd_keys
|
|
630
|
-
pd_unique_keys = pd_keys - gt_keys
|
|
631
|
-
if gt_unique_keys or pd_unique_keys:
|
|
632
|
-
raise ValueError(
|
|
633
|
-
"Label keys must match between ground truths and predictions."
|
|
634
|
-
)
|
|
635
566
|
|
|
636
567
|
pairs = list()
|
|
637
|
-
for
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
(
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
float(score),
|
|
649
|
-
float(max_score_idx == idx),
|
|
650
|
-
)
|
|
568
|
+
scores = np.array([score for _, score in predictions])
|
|
569
|
+
max_score_idx = np.argmax(scores)
|
|
570
|
+
|
|
571
|
+
for idx, (plabel, score) in enumerate(predictions):
|
|
572
|
+
pairs.append(
|
|
573
|
+
(
|
|
574
|
+
float(uid_index),
|
|
575
|
+
float(groundtruth),
|
|
576
|
+
float(plabel),
|
|
577
|
+
float(score),
|
|
578
|
+
float(max_score_idx == idx),
|
|
651
579
|
)
|
|
580
|
+
)
|
|
652
581
|
|
|
653
582
|
if self._evaluator._detailed_pairs.size == 0:
|
|
654
583
|
self._evaluator._detailed_pairs = np.array(pairs)
|
|
@@ -680,27 +609,29 @@ class DataLoader:
|
|
|
680
609
|
disable_tqdm = not show_progress
|
|
681
610
|
for classification in tqdm(classifications, disable=disable_tqdm):
|
|
682
611
|
|
|
612
|
+
if len(classification.predictions) == 0:
|
|
613
|
+
raise ValueError(
|
|
614
|
+
"Classifications must contain at least one prediction."
|
|
615
|
+
)
|
|
683
616
|
# update metadata
|
|
684
617
|
self._evaluator.n_datums += 1
|
|
685
|
-
self._evaluator.n_groundtruths +=
|
|
618
|
+
self._evaluator.n_groundtruths += 1
|
|
686
619
|
self._evaluator.n_predictions += len(classification.predictions)
|
|
687
620
|
|
|
688
621
|
# update datum uid index
|
|
689
622
|
uid_index = self._add_datum(uid=classification.uid)
|
|
690
623
|
|
|
691
624
|
# cache labels and annotations
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
for idx, (plabel, pscore) in enumerate(
|
|
699
|
-
zip(classification.predictions, classification.scores)
|
|
625
|
+
groundtruth = self._add_label(classification.groundtruth)
|
|
626
|
+
self.groundtruth_count[groundtruth][uid_index] += 1
|
|
627
|
+
|
|
628
|
+
predictions = list()
|
|
629
|
+
for plabel, pscore in zip(
|
|
630
|
+
classification.predictions, classification.scores
|
|
700
631
|
):
|
|
701
|
-
label_idx
|
|
632
|
+
label_idx = self._add_label(plabel)
|
|
702
633
|
self.prediction_count[label_idx][uid_index] += 1
|
|
703
|
-
|
|
634
|
+
predictions.append(
|
|
704
635
|
(
|
|
705
636
|
label_idx,
|
|
706
637
|
pscore,
|
|
@@ -709,8 +640,8 @@ class DataLoader:
|
|
|
709
640
|
|
|
710
641
|
self._add_data(
|
|
711
642
|
uid_index=uid_index,
|
|
712
|
-
|
|
713
|
-
|
|
643
|
+
groundtruth=groundtruth,
|
|
644
|
+
predictions=predictions,
|
|
714
645
|
)
|
|
715
646
|
|
|
716
647
|
def add_data_from_valor_dict(
|
|
@@ -743,31 +674,38 @@ class DataLoader:
|
|
|
743
674
|
uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
|
|
744
675
|
|
|
745
676
|
# cache labels and annotations
|
|
746
|
-
|
|
747
|
-
|
|
677
|
+
predictions = list()
|
|
678
|
+
groundtruths = None
|
|
748
679
|
for gann in groundtruth["annotations"]:
|
|
749
680
|
for valor_label in gann["labels"]:
|
|
750
|
-
glabel =
|
|
751
|
-
label_idx
|
|
681
|
+
glabel = f'{valor_label["key"]}_{valor_label["value"]}'
|
|
682
|
+
label_idx = self._add_label(glabel)
|
|
752
683
|
self.groundtruth_count[label_idx][uid_index] += 1
|
|
753
|
-
|
|
684
|
+
groundtruths = label_idx
|
|
754
685
|
for pann in prediction["annotations"]:
|
|
755
686
|
for valor_label in pann["labels"]:
|
|
756
|
-
plabel =
|
|
687
|
+
plabel = f'{valor_label["key"]}_{valor_label["value"]}'
|
|
757
688
|
pscore = valor_label["score"]
|
|
758
|
-
label_idx
|
|
689
|
+
label_idx = self._add_label(plabel)
|
|
759
690
|
self.prediction_count[label_idx][uid_index] += 1
|
|
760
|
-
|
|
691
|
+
predictions.append(
|
|
761
692
|
(
|
|
762
693
|
label_idx,
|
|
763
694
|
pscore,
|
|
764
695
|
)
|
|
765
696
|
)
|
|
766
697
|
|
|
698
|
+
# fix type error where groundtruths can possibly be unbound now that it's a float
|
|
699
|
+
# in practice, this error should never be hit since groundtruths can't be empty without throwing a ValueError earlier in the flow
|
|
700
|
+
if groundtruths is None:
|
|
701
|
+
raise ValueError(
|
|
702
|
+
"Expected a value for groundtruths, but got None."
|
|
703
|
+
)
|
|
704
|
+
|
|
767
705
|
self._add_data(
|
|
768
706
|
uid_index=uid_index,
|
|
769
|
-
|
|
770
|
-
|
|
707
|
+
groundtruth=groundtruths,
|
|
708
|
+
predictions=predictions,
|
|
771
709
|
)
|
|
772
710
|
|
|
773
711
|
def finalize(self) -> Evaluator:
|
|
@@ -820,7 +758,6 @@ class DataLoader:
|
|
|
820
758
|
1, :, label_idx
|
|
821
759
|
]
|
|
822
760
|
),
|
|
823
|
-
self._evaluator.label_index_to_label_key_index[label_idx],
|
|
824
761
|
]
|
|
825
762
|
for label_idx in range(n_labels)
|
|
826
763
|
],
|