valor-lite 0.33.7__py3-none-any.whl → 0.33.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valor_lite/classification/annotation.py +6 -2
- valor_lite/classification/computation.py +31 -52
- valor_lite/classification/manager.py +66 -131
- valor_lite/classification/metric.py +7 -23
- valor_lite/detection/annotation.py +64 -29
- valor_lite/detection/computation.py +130 -92
- valor_lite/detection/manager.py +202 -462
- valor_lite/detection/metric.py +20 -48
- valor_lite/segmentation/__init__.py +27 -0
- valor_lite/segmentation/annotation.py +49 -0
- valor_lite/segmentation/computation.py +186 -0
- valor_lite/segmentation/manager.py +532 -0
- valor_lite/segmentation/metric.py +119 -0
- {valor_lite-0.33.7.dist-info → valor_lite-0.33.8.dist-info}/METADATA +1 -1
- valor_lite-0.33.8.dist-info/RECORD +22 -0
- valor_lite-0.33.7.dist-info/RECORD +0 -17
- {valor_lite-0.33.7.dist-info → valor_lite-0.33.8.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.7.dist-info → valor_lite-0.33.8.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.7.dist-info → valor_lite-0.33.8.dist-info}/top_level.txt +0 -0
|
@@ -4,10 +4,14 @@ from dataclasses import dataclass
|
|
|
4
4
|
@dataclass
|
|
5
5
|
class Classification:
|
|
6
6
|
uid: str
|
|
7
|
-
|
|
8
|
-
predictions: list[
|
|
7
|
+
groundtruth: str
|
|
8
|
+
predictions: list[str]
|
|
9
9
|
scores: list[float]
|
|
10
10
|
|
|
11
11
|
def __post_init__(self):
|
|
12
|
+
if not isinstance(self.groundtruth, str):
|
|
13
|
+
raise ValueError(
|
|
14
|
+
"A classification must contain a single groundtruth."
|
|
15
|
+
)
|
|
12
16
|
if len(self.predictions) != len(self.scores):
|
|
13
17
|
raise ValueError("There must be a score per prediction label.")
|
|
@@ -3,28 +3,18 @@ from numpy.typing import NDArray
|
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
def _compute_rocauc(
|
|
6
|
-
data: NDArray[np.
|
|
6
|
+
data: NDArray[np.float64],
|
|
7
7
|
label_metadata: NDArray[np.int32],
|
|
8
8
|
n_datums: int,
|
|
9
9
|
n_labels: int,
|
|
10
|
-
n_label_keys: int,
|
|
11
10
|
mask_matching_labels: NDArray[np.bool_],
|
|
12
11
|
pd_labels: NDArray[np.int32],
|
|
13
12
|
):
|
|
14
13
|
"""
|
|
15
14
|
Compute ROCAUC and mean ROCAUC.
|
|
16
15
|
"""
|
|
17
|
-
count_labels_per_key = np.bincount(label_metadata[:, 2])
|
|
18
|
-
count_groundtruths_per_key = np.bincount(
|
|
19
|
-
label_metadata[:, 2],
|
|
20
|
-
weights=label_metadata[:, 0],
|
|
21
|
-
minlength=n_label_keys,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
16
|
positive_count = label_metadata[:, 0]
|
|
25
|
-
negative_count =
|
|
26
|
-
count_groundtruths_per_key[label_metadata[:, 2]] - label_metadata[:, 0]
|
|
27
|
-
)
|
|
17
|
+
negative_count = label_metadata[:, 1] - label_metadata[:, 0]
|
|
28
18
|
|
|
29
19
|
true_positives = np.zeros((n_labels, n_datums), dtype=np.int32)
|
|
30
20
|
false_positives = np.zeros_like(true_positives)
|
|
@@ -35,7 +25,6 @@ def _compute_rocauc(
|
|
|
35
25
|
continue
|
|
36
26
|
|
|
37
27
|
mask_pds = pd_labels == label_idx
|
|
38
|
-
|
|
39
28
|
true_positives[label_idx] = mask_matching_labels[mask_pds]
|
|
40
29
|
false_positives[label_idx] = ~mask_matching_labels[mask_pds]
|
|
41
30
|
scores[label_idx] = data[mask_pds, 3]
|
|
@@ -70,32 +59,25 @@ def _compute_rocauc(
|
|
|
70
59
|
rocauc = np.trapz(x=fpr, y=tpr, axis=1) # type: ignore - numpy will be switching to `trapezoid` in the future.
|
|
71
60
|
|
|
72
61
|
# compute mean rocauc
|
|
73
|
-
|
|
74
|
-
mean_rocauc = np.zeros(n_label_keys, dtype=np.float64)
|
|
75
|
-
np.divide(
|
|
76
|
-
summed_rocauc,
|
|
77
|
-
count_labels_per_key,
|
|
78
|
-
where=count_labels_per_key > 1e-9,
|
|
79
|
-
out=mean_rocauc,
|
|
80
|
-
)
|
|
62
|
+
mean_rocauc = rocauc.mean()
|
|
81
63
|
|
|
82
64
|
return rocauc, mean_rocauc
|
|
83
65
|
|
|
84
66
|
|
|
85
67
|
def compute_metrics(
|
|
86
|
-
data: NDArray[np.
|
|
68
|
+
data: NDArray[np.float64],
|
|
87
69
|
label_metadata: NDArray[np.int32],
|
|
88
|
-
score_thresholds: NDArray[np.
|
|
70
|
+
score_thresholds: NDArray[np.float64],
|
|
89
71
|
hardmax: bool,
|
|
90
72
|
n_datums: int,
|
|
91
73
|
) -> tuple[
|
|
92
74
|
NDArray[np.int32],
|
|
93
|
-
NDArray[np.
|
|
94
|
-
NDArray[np.
|
|
95
|
-
NDArray[np.
|
|
96
|
-
NDArray[np.
|
|
97
|
-
NDArray[np.
|
|
98
|
-
|
|
75
|
+
NDArray[np.float64],
|
|
76
|
+
NDArray[np.float64],
|
|
77
|
+
NDArray[np.float64],
|
|
78
|
+
NDArray[np.float64],
|
|
79
|
+
NDArray[np.float64],
|
|
80
|
+
float,
|
|
99
81
|
]:
|
|
100
82
|
"""
|
|
101
83
|
Computes classification metrics.
|
|
@@ -110,14 +92,14 @@ def compute_metrics(
|
|
|
110
92
|
|
|
111
93
|
Parameters
|
|
112
94
|
----------
|
|
113
|
-
data : NDArray[np.
|
|
95
|
+
data : NDArray[np.float64]
|
|
114
96
|
A sorted array of classification pairs.
|
|
115
97
|
label_metadata : NDArray[np.int32]
|
|
116
98
|
An array containing metadata related to labels.
|
|
117
|
-
score_thresholds : NDArray[np.
|
|
99
|
+
score_thresholds : NDArray[np.float64]
|
|
118
100
|
A 1-D array contains score thresholds to compute metrics over.
|
|
119
101
|
hardmax : bool
|
|
120
|
-
Option to only allow a single positive prediction
|
|
102
|
+
Option to only allow a single positive prediction.
|
|
121
103
|
n_datums : int
|
|
122
104
|
The number of datums being operated over.
|
|
123
105
|
|
|
@@ -125,22 +107,21 @@ def compute_metrics(
|
|
|
125
107
|
-------
|
|
126
108
|
NDArray[np.int32]
|
|
127
109
|
TP, FP, FN, TN counts.
|
|
128
|
-
NDArray[np.
|
|
110
|
+
NDArray[np.float64]
|
|
129
111
|
Precision.
|
|
130
|
-
NDArray[np.
|
|
112
|
+
NDArray[np.float64]
|
|
131
113
|
Recall.
|
|
132
|
-
NDArray[np.
|
|
114
|
+
NDArray[np.float64]
|
|
133
115
|
Accuracy
|
|
134
|
-
NDArray[np.
|
|
116
|
+
NDArray[np.float64]
|
|
135
117
|
F1 Score
|
|
136
|
-
NDArray[np.
|
|
118
|
+
NDArray[np.float64]
|
|
137
119
|
ROCAUC.
|
|
138
|
-
|
|
120
|
+
float
|
|
139
121
|
mROCAUC.
|
|
140
122
|
"""
|
|
141
123
|
|
|
142
124
|
n_labels = label_metadata.shape[0]
|
|
143
|
-
n_label_keys = np.unique(label_metadata[:, 2]).size
|
|
144
125
|
n_scores = score_thresholds.shape[0]
|
|
145
126
|
|
|
146
127
|
pd_labels = data[:, 2].astype(int)
|
|
@@ -155,7 +136,6 @@ def compute_metrics(
|
|
|
155
136
|
label_metadata=label_metadata,
|
|
156
137
|
n_datums=n_datums,
|
|
157
138
|
n_labels=n_labels,
|
|
158
|
-
n_label_keys=n_label_keys,
|
|
159
139
|
mask_matching_labels=mask_matching_labels,
|
|
160
140
|
pd_labels=pd_labels,
|
|
161
141
|
)
|
|
@@ -229,16 +209,16 @@ def compute_metrics(
|
|
|
229
209
|
|
|
230
210
|
|
|
231
211
|
def _count_with_examples(
|
|
232
|
-
data: NDArray[np.
|
|
212
|
+
data: NDArray[np.float64],
|
|
233
213
|
unique_idx: int | list[int],
|
|
234
214
|
label_idx: int | list[int],
|
|
235
|
-
) -> tuple[NDArray[np.
|
|
215
|
+
) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.int32]]:
|
|
236
216
|
"""
|
|
237
217
|
Helper function for counting occurences of unique detailed pairs.
|
|
238
218
|
|
|
239
219
|
Parameters
|
|
240
220
|
----------
|
|
241
|
-
data : NDArray[np.
|
|
221
|
+
data : NDArray[np.float64]
|
|
242
222
|
A masked portion of a detailed pairs array.
|
|
243
223
|
unique_idx : int | list[int]
|
|
244
224
|
The index or indices upon which uniqueness is constrained.
|
|
@@ -247,7 +227,7 @@ def _count_with_examples(
|
|
|
247
227
|
|
|
248
228
|
Returns
|
|
249
229
|
-------
|
|
250
|
-
NDArray[np.
|
|
230
|
+
NDArray[np.float64]
|
|
251
231
|
Examples drawn from the data input.
|
|
252
232
|
NDArray[np.int32]
|
|
253
233
|
Unique label indices.
|
|
@@ -267,13 +247,12 @@ def _count_with_examples(
|
|
|
267
247
|
|
|
268
248
|
|
|
269
249
|
def compute_confusion_matrix(
|
|
270
|
-
data: NDArray[np.
|
|
250
|
+
data: NDArray[np.float64],
|
|
271
251
|
label_metadata: NDArray[np.int32],
|
|
272
|
-
score_thresholds: NDArray[np.
|
|
252
|
+
score_thresholds: NDArray[np.float64],
|
|
273
253
|
hardmax: bool,
|
|
274
254
|
n_examples: int,
|
|
275
|
-
) -> tuple[NDArray[np.
|
|
276
|
-
|
|
255
|
+
) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
|
|
277
256
|
"""
|
|
278
257
|
Compute detailed confusion matrix.
|
|
279
258
|
|
|
@@ -287,20 +266,20 @@ def compute_confusion_matrix(
|
|
|
287
266
|
|
|
288
267
|
Parameters
|
|
289
268
|
----------
|
|
290
|
-
data : NDArray[np.
|
|
269
|
+
data : NDArray[np.float64]
|
|
291
270
|
A sorted array summarizing the IOU calculations of one or more pairs.
|
|
292
271
|
label_metadata : NDArray[np.int32]
|
|
293
272
|
An array containing metadata related to labels.
|
|
294
|
-
iou_thresholds : NDArray[np.
|
|
273
|
+
iou_thresholds : NDArray[np.float64]
|
|
295
274
|
A 1-D array containing IoU thresholds.
|
|
296
|
-
score_thresholds : NDArray[np.
|
|
275
|
+
score_thresholds : NDArray[np.float64]
|
|
297
276
|
A 1-D array containing score thresholds.
|
|
298
277
|
n_examples : int
|
|
299
278
|
The maximum number of examples to return per count.
|
|
300
279
|
|
|
301
280
|
Returns
|
|
302
281
|
-------
|
|
303
|
-
NDArray[np.
|
|
282
|
+
NDArray[np.float64]
|
|
304
283
|
Confusion matrix.
|
|
305
284
|
NDArray[np.int32]
|
|
306
285
|
Ground truths with missing predictions.
|
|
@@ -67,13 +67,8 @@ class Evaluator:
|
|
|
67
67
|
self.index_to_uid: dict[int, str] = dict()
|
|
68
68
|
|
|
69
69
|
# label reference
|
|
70
|
-
self.label_to_index: dict[
|
|
71
|
-
self.index_to_label: dict[int,
|
|
72
|
-
|
|
73
|
-
# label key reference
|
|
74
|
-
self.index_to_label_key: dict[int, str] = dict()
|
|
75
|
-
self.label_key_to_index: dict[str, int] = dict()
|
|
76
|
-
self.label_index_to_label_key_index: dict[int, int] = dict()
|
|
70
|
+
self.label_to_index: dict[str, int] = dict()
|
|
71
|
+
self.index_to_label: dict[int, str] = dict()
|
|
77
72
|
|
|
78
73
|
# computation caches
|
|
79
74
|
self._detailed_pairs = np.array([])
|
|
@@ -81,7 +76,7 @@ class Evaluator:
|
|
|
81
76
|
self._label_metadata_per_datum = np.array([], dtype=np.int32)
|
|
82
77
|
|
|
83
78
|
@property
|
|
84
|
-
def ignored_prediction_labels(self) -> list[
|
|
79
|
+
def ignored_prediction_labels(self) -> list[str]:
|
|
85
80
|
"""
|
|
86
81
|
Prediction labels that are not present in the ground truth set.
|
|
87
82
|
"""
|
|
@@ -92,7 +87,7 @@ class Evaluator:
|
|
|
92
87
|
]
|
|
93
88
|
|
|
94
89
|
@property
|
|
95
|
-
def missing_prediction_labels(self) -> list[
|
|
90
|
+
def missing_prediction_labels(self) -> list[str]:
|
|
96
91
|
"""
|
|
97
92
|
Ground truth labels that are not present in the prediction set.
|
|
98
93
|
"""
|
|
@@ -119,8 +114,7 @@ class Evaluator:
|
|
|
119
114
|
def create_filter(
|
|
120
115
|
self,
|
|
121
116
|
datum_uids: list[str] | NDArray[np.int32] | None = None,
|
|
122
|
-
labels: list[
|
|
123
|
-
label_keys: list[str] | NDArray[np.int32] | None = None,
|
|
117
|
+
labels: list[str] | NDArray[np.int32] | None = None,
|
|
124
118
|
) -> Filter:
|
|
125
119
|
"""
|
|
126
120
|
Creates a boolean mask that can be passed to an evaluation.
|
|
@@ -129,10 +123,8 @@ class Evaluator:
|
|
|
129
123
|
----------
|
|
130
124
|
datum_uids : list[str] | NDArray[np.int32], optional
|
|
131
125
|
An optional list of string uids or a numpy array of uid indices.
|
|
132
|
-
labels : list[
|
|
126
|
+
labels : list[str] | NDArray[np.int32], optional
|
|
133
127
|
An optional list of labels or a numpy array of label indices.
|
|
134
|
-
label_keys : list[str] | NDArray[np.int32], optional
|
|
135
|
-
An optional list of label keys or a numpy array of label key indices.
|
|
136
128
|
|
|
137
129
|
Returns
|
|
138
130
|
-------
|
|
@@ -179,36 +171,18 @@ class Evaluator:
|
|
|
179
171
|
mask[labels] = True
|
|
180
172
|
mask_labels &= mask
|
|
181
173
|
|
|
182
|
-
if label_keys is not None:
|
|
183
|
-
if isinstance(label_keys, list):
|
|
184
|
-
label_keys = np.array(
|
|
185
|
-
[self.label_key_to_index[key] for key in label_keys]
|
|
186
|
-
)
|
|
187
|
-
label_indices = np.where(
|
|
188
|
-
np.isclose(self._label_metadata[:, 2], label_keys)
|
|
189
|
-
)[0]
|
|
190
|
-
mask = np.zeros_like(mask_pairs, dtype=np.bool_)
|
|
191
|
-
mask[
|
|
192
|
-
np.isin(self._detailed_pairs[:, 1].astype(int), label_indices)
|
|
193
|
-
] = True
|
|
194
|
-
mask_pairs &= mask
|
|
195
|
-
|
|
196
|
-
mask = np.zeros_like(mask_labels, dtype=np.bool_)
|
|
197
|
-
mask[label_indices] = True
|
|
198
|
-
mask_labels &= mask
|
|
199
|
-
|
|
200
174
|
mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
|
|
201
175
|
label_metadata_per_datum = self._label_metadata_per_datum.copy()
|
|
202
176
|
label_metadata_per_datum[:, ~mask] = 0
|
|
203
177
|
|
|
204
178
|
label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
|
|
205
|
-
label_metadata
|
|
179
|
+
label_metadata = np.transpose(
|
|
206
180
|
np.sum(
|
|
207
181
|
label_metadata_per_datum,
|
|
208
182
|
axis=1,
|
|
209
183
|
)
|
|
210
184
|
)
|
|
211
|
-
|
|
185
|
+
|
|
212
186
|
n_datums = int(np.sum(label_metadata[:, 0]))
|
|
213
187
|
|
|
214
188
|
return Filter(
|
|
@@ -288,10 +262,8 @@ class Evaluator:
|
|
|
288
262
|
|
|
289
263
|
metrics[MetricType.mROCAUC] = [
|
|
290
264
|
mROCAUC(
|
|
291
|
-
value=mean_rocauc
|
|
292
|
-
label_key=self.index_to_label_key[label_key_idx],
|
|
265
|
+
value=mean_rocauc,
|
|
293
266
|
)
|
|
294
|
-
for label_key_idx in range(len(self.label_key_to_index))
|
|
295
267
|
]
|
|
296
268
|
|
|
297
269
|
for label_idx, label in self.index_to_label.items():
|
|
@@ -366,8 +338,7 @@ class Evaluator:
|
|
|
366
338
|
|
|
367
339
|
def _unpack_confusion_matrix(
|
|
368
340
|
self,
|
|
369
|
-
confusion_matrix: NDArray[np.
|
|
370
|
-
label_key_idx: int,
|
|
341
|
+
confusion_matrix: NDArray[np.float64],
|
|
371
342
|
number_of_labels: int,
|
|
372
343
|
number_of_examples: int,
|
|
373
344
|
) -> dict[
|
|
@@ -407,8 +378,8 @@ class Evaluator:
|
|
|
407
378
|
)
|
|
408
379
|
|
|
409
380
|
return {
|
|
410
|
-
self.index_to_label[gt_label_idx]
|
|
411
|
-
self.index_to_label[pd_label_idx]
|
|
381
|
+
self.index_to_label[gt_label_idx]: {
|
|
382
|
+
self.index_to_label[pd_label_idx]: {
|
|
412
383
|
"count": max(
|
|
413
384
|
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
414
385
|
0,
|
|
@@ -430,22 +401,13 @@ class Evaluator:
|
|
|
430
401
|
],
|
|
431
402
|
}
|
|
432
403
|
for pd_label_idx in range(number_of_labels)
|
|
433
|
-
if (
|
|
434
|
-
self.label_index_to_label_key_index[pd_label_idx]
|
|
435
|
-
== label_key_idx
|
|
436
|
-
)
|
|
437
404
|
}
|
|
438
405
|
for gt_label_idx in range(number_of_labels)
|
|
439
|
-
if (
|
|
440
|
-
self.label_index_to_label_key_index[gt_label_idx]
|
|
441
|
-
== label_key_idx
|
|
442
|
-
)
|
|
443
406
|
}
|
|
444
407
|
|
|
445
408
|
def _unpack_missing_predictions(
|
|
446
409
|
self,
|
|
447
410
|
missing_predictions: NDArray[np.int32],
|
|
448
|
-
label_key_idx: int,
|
|
449
411
|
number_of_labels: int,
|
|
450
412
|
number_of_examples: int,
|
|
451
413
|
) -> dict[str, dict[str, int | list[dict[str, str]]]]:
|
|
@@ -463,7 +425,7 @@ class Evaluator:
|
|
|
463
425
|
)
|
|
464
426
|
|
|
465
427
|
return {
|
|
466
|
-
self.index_to_label[gt_label_idx]
|
|
428
|
+
self.index_to_label[gt_label_idx]: {
|
|
467
429
|
"count": max(
|
|
468
430
|
int(missing_predictions[gt_label_idx, 0]),
|
|
469
431
|
0,
|
|
@@ -479,15 +441,11 @@ class Evaluator:
|
|
|
479
441
|
],
|
|
480
442
|
}
|
|
481
443
|
for gt_label_idx in range(number_of_labels)
|
|
482
|
-
if (
|
|
483
|
-
self.label_index_to_label_key_index[gt_label_idx]
|
|
484
|
-
== label_key_idx
|
|
485
|
-
)
|
|
486
444
|
}
|
|
487
445
|
|
|
488
446
|
def _compute_confusion_matrix(
|
|
489
447
|
self,
|
|
490
|
-
data: NDArray[np.
|
|
448
|
+
data: NDArray[np.float64],
|
|
491
449
|
label_metadata: NDArray[np.int32],
|
|
492
450
|
score_thresholds: list[float],
|
|
493
451
|
hardmax: bool,
|
|
@@ -498,7 +456,7 @@ class Evaluator:
|
|
|
498
456
|
|
|
499
457
|
Parameters
|
|
500
458
|
----------
|
|
501
|
-
data : NDArray[np.
|
|
459
|
+
data : NDArray[np.float64]
|
|
502
460
|
A data array containing classification pairs.
|
|
503
461
|
label_metadata : NDArray[np.int32]
|
|
504
462
|
An integer array containing label metadata.
|
|
@@ -512,7 +470,7 @@ class Evaluator:
|
|
|
512
470
|
Returns
|
|
513
471
|
-------
|
|
514
472
|
list[ConfusionMatrix]
|
|
515
|
-
A list of ConfusionMatrix
|
|
473
|
+
A list of ConfusionMatrix objects.
|
|
516
474
|
"""
|
|
517
475
|
|
|
518
476
|
if data.size == 0:
|
|
@@ -530,22 +488,18 @@ class Evaluator:
|
|
|
530
488
|
return [
|
|
531
489
|
ConfusionMatrix(
|
|
532
490
|
score_threshold=score_thresholds[score_idx],
|
|
533
|
-
label_key=label_key,
|
|
534
491
|
number_of_examples=number_of_examples,
|
|
535
492
|
confusion_matrix=self._unpack_confusion_matrix(
|
|
536
493
|
confusion_matrix=confusion_matrix[score_idx, :, :, :],
|
|
537
|
-
label_key_idx=label_key_idx,
|
|
538
494
|
number_of_labels=n_labels,
|
|
539
495
|
number_of_examples=number_of_examples,
|
|
540
496
|
),
|
|
541
497
|
missing_predictions=self._unpack_missing_predictions(
|
|
542
498
|
missing_predictions=missing_predictions[score_idx, :, :],
|
|
543
|
-
label_key_idx=label_key_idx,
|
|
544
499
|
number_of_labels=n_labels,
|
|
545
500
|
number_of_examples=number_of_examples,
|
|
546
501
|
),
|
|
547
502
|
)
|
|
548
|
-
for label_key_idx, label_key in self.index_to_label_key.items()
|
|
549
503
|
for score_idx in range(n_scores)
|
|
550
504
|
]
|
|
551
505
|
|
|
@@ -580,77 +534,50 @@ class DataLoader:
|
|
|
580
534
|
self._evaluator.index_to_uid[index] = uid
|
|
581
535
|
return self._evaluator.uid_to_index[uid]
|
|
582
536
|
|
|
583
|
-
def _add_label(self, label:
|
|
537
|
+
def _add_label(self, label: str) -> int:
|
|
584
538
|
"""
|
|
585
539
|
Helper function for adding a label to the cache.
|
|
586
540
|
|
|
587
541
|
Parameters
|
|
588
542
|
----------
|
|
589
|
-
label :
|
|
590
|
-
|
|
543
|
+
label : str
|
|
544
|
+
A string representing a label.
|
|
591
545
|
|
|
592
546
|
Returns
|
|
593
547
|
-------
|
|
594
548
|
int
|
|
595
549
|
Label index.
|
|
596
|
-
int
|
|
597
|
-
Label key index.
|
|
598
550
|
"""
|
|
599
551
|
label_id = len(self._evaluator.index_to_label)
|
|
600
|
-
label_key_id = len(self._evaluator.index_to_label_key)
|
|
601
552
|
if label not in self._evaluator.label_to_index:
|
|
602
553
|
self._evaluator.label_to_index[label] = label_id
|
|
603
554
|
self._evaluator.index_to_label[label_id] = label
|
|
604
555
|
|
|
605
|
-
# update label key index
|
|
606
|
-
if label[0] not in self._evaluator.label_key_to_index:
|
|
607
|
-
self._evaluator.label_key_to_index[label[0]] = label_key_id
|
|
608
|
-
self._evaluator.index_to_label_key[label_key_id] = label[0]
|
|
609
|
-
label_key_id += 1
|
|
610
|
-
|
|
611
|
-
self._evaluator.label_index_to_label_key_index[
|
|
612
|
-
label_id
|
|
613
|
-
] = self._evaluator.label_key_to_index[label[0]]
|
|
614
556
|
label_id += 1
|
|
615
557
|
|
|
616
|
-
return
|
|
617
|
-
self._evaluator.label_to_index[label],
|
|
618
|
-
self._evaluator.label_key_to_index[label[0]],
|
|
619
|
-
)
|
|
558
|
+
return self._evaluator.label_to_index[label]
|
|
620
559
|
|
|
621
560
|
def _add_data(
|
|
622
561
|
self,
|
|
623
562
|
uid_index: int,
|
|
624
|
-
|
|
625
|
-
|
|
563
|
+
groundtruth: int,
|
|
564
|
+
predictions: list[tuple[int, float]],
|
|
626
565
|
):
|
|
627
|
-
gt_keys = set(keyed_groundtruths.keys())
|
|
628
|
-
pd_keys = set(keyed_predictions.keys())
|
|
629
|
-
joint_keys = gt_keys.intersection(pd_keys)
|
|
630
|
-
|
|
631
|
-
gt_unique_keys = gt_keys - pd_keys
|
|
632
|
-
pd_unique_keys = pd_keys - gt_keys
|
|
633
|
-
if gt_unique_keys or pd_unique_keys:
|
|
634
|
-
raise ValueError(
|
|
635
|
-
"Label keys must match between ground truths and predictions."
|
|
636
|
-
)
|
|
637
566
|
|
|
638
567
|
pairs = list()
|
|
639
|
-
for
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
(
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
float(score),
|
|
651
|
-
float(max_score_idx == idx),
|
|
652
|
-
)
|
|
568
|
+
scores = np.array([score for _, score in predictions])
|
|
569
|
+
max_score_idx = np.argmax(scores)
|
|
570
|
+
|
|
571
|
+
for idx, (plabel, score) in enumerate(predictions):
|
|
572
|
+
pairs.append(
|
|
573
|
+
(
|
|
574
|
+
float(uid_index),
|
|
575
|
+
float(groundtruth),
|
|
576
|
+
float(plabel),
|
|
577
|
+
float(score),
|
|
578
|
+
float(max_score_idx == idx),
|
|
653
579
|
)
|
|
580
|
+
)
|
|
654
581
|
|
|
655
582
|
if self._evaluator._detailed_pairs.size == 0:
|
|
656
583
|
self._evaluator._detailed_pairs = np.array(pairs)
|
|
@@ -682,27 +609,29 @@ class DataLoader:
|
|
|
682
609
|
disable_tqdm = not show_progress
|
|
683
610
|
for classification in tqdm(classifications, disable=disable_tqdm):
|
|
684
611
|
|
|
612
|
+
if len(classification.predictions) == 0:
|
|
613
|
+
raise ValueError(
|
|
614
|
+
"Classifications must contain at least one prediction."
|
|
615
|
+
)
|
|
685
616
|
# update metadata
|
|
686
617
|
self._evaluator.n_datums += 1
|
|
687
|
-
self._evaluator.n_groundtruths +=
|
|
618
|
+
self._evaluator.n_groundtruths += 1
|
|
688
619
|
self._evaluator.n_predictions += len(classification.predictions)
|
|
689
620
|
|
|
690
621
|
# update datum uid index
|
|
691
622
|
uid_index = self._add_datum(uid=classification.uid)
|
|
692
623
|
|
|
693
624
|
# cache labels and annotations
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
for idx, (plabel, pscore) in enumerate(
|
|
701
|
-
zip(classification.predictions, classification.scores)
|
|
625
|
+
groundtruth = self._add_label(classification.groundtruth)
|
|
626
|
+
self.groundtruth_count[groundtruth][uid_index] += 1
|
|
627
|
+
|
|
628
|
+
predictions = list()
|
|
629
|
+
for plabel, pscore in zip(
|
|
630
|
+
classification.predictions, classification.scores
|
|
702
631
|
):
|
|
703
|
-
label_idx
|
|
632
|
+
label_idx = self._add_label(plabel)
|
|
704
633
|
self.prediction_count[label_idx][uid_index] += 1
|
|
705
|
-
|
|
634
|
+
predictions.append(
|
|
706
635
|
(
|
|
707
636
|
label_idx,
|
|
708
637
|
pscore,
|
|
@@ -711,8 +640,8 @@ class DataLoader:
|
|
|
711
640
|
|
|
712
641
|
self._add_data(
|
|
713
642
|
uid_index=uid_index,
|
|
714
|
-
|
|
715
|
-
|
|
643
|
+
groundtruth=groundtruth,
|
|
644
|
+
predictions=predictions,
|
|
716
645
|
)
|
|
717
646
|
|
|
718
647
|
def add_data_from_valor_dict(
|
|
@@ -745,31 +674,38 @@ class DataLoader:
|
|
|
745
674
|
uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
|
|
746
675
|
|
|
747
676
|
# cache labels and annotations
|
|
748
|
-
|
|
749
|
-
|
|
677
|
+
predictions = list()
|
|
678
|
+
groundtruths = None
|
|
750
679
|
for gann in groundtruth["annotations"]:
|
|
751
680
|
for valor_label in gann["labels"]:
|
|
752
|
-
glabel =
|
|
753
|
-
label_idx
|
|
681
|
+
glabel = f'{valor_label["key"]}_{valor_label["value"]}'
|
|
682
|
+
label_idx = self._add_label(glabel)
|
|
754
683
|
self.groundtruth_count[label_idx][uid_index] += 1
|
|
755
|
-
|
|
684
|
+
groundtruths = label_idx
|
|
756
685
|
for pann in prediction["annotations"]:
|
|
757
686
|
for valor_label in pann["labels"]:
|
|
758
|
-
plabel =
|
|
687
|
+
plabel = f'{valor_label["key"]}_{valor_label["value"]}'
|
|
759
688
|
pscore = valor_label["score"]
|
|
760
|
-
label_idx
|
|
689
|
+
label_idx = self._add_label(plabel)
|
|
761
690
|
self.prediction_count[label_idx][uid_index] += 1
|
|
762
|
-
|
|
691
|
+
predictions.append(
|
|
763
692
|
(
|
|
764
693
|
label_idx,
|
|
765
694
|
pscore,
|
|
766
695
|
)
|
|
767
696
|
)
|
|
768
697
|
|
|
698
|
+
# fix type error where groundtruths can possibly be unbound now that it's a float
|
|
699
|
+
# in practice, this error should never be hit since groundtruths can't be empty without throwing a ValueError earlier in the flow
|
|
700
|
+
if groundtruths is None:
|
|
701
|
+
raise ValueError(
|
|
702
|
+
"Expected a value for groundtruths, but got None."
|
|
703
|
+
)
|
|
704
|
+
|
|
769
705
|
self._add_data(
|
|
770
706
|
uid_index=uid_index,
|
|
771
|
-
|
|
772
|
-
|
|
707
|
+
groundtruth=groundtruths,
|
|
708
|
+
predictions=predictions,
|
|
773
709
|
)
|
|
774
710
|
|
|
775
711
|
def finalize(self) -> Evaluator:
|
|
@@ -822,7 +758,6 @@ class DataLoader:
|
|
|
822
758
|
1, :, label_idx
|
|
823
759
|
]
|
|
824
760
|
),
|
|
825
|
-
self._evaluator.label_index_to_label_key_index[label_idx],
|
|
826
761
|
]
|
|
827
762
|
for label_idx in range(n_labels)
|
|
828
763
|
],
|