valor-lite 0.33.5__py3-none-any.whl → 0.33.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/__init__.py +30 -0
- valor_lite/classification/annotation.py +13 -0
- valor_lite/classification/computation.py +411 -0
- valor_lite/classification/manager.py +842 -0
- valor_lite/classification/metric.py +191 -0
- valor_lite/detection/manager.py +10 -8
- {valor_lite-0.33.5.dist-info → valor_lite-0.33.6.dist-info}/METADATA +1 -1
- valor_lite-0.33.6.dist-info/RECORD +17 -0
- valor_lite-0.33.5.dist-info/RECORD +0 -12
- {valor_lite-0.33.5.dist-info → valor_lite-0.33.6.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.5.dist-info → valor_lite-0.33.6.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.5.dist-info → valor_lite-0.33.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .annotation import Classification
|
|
2
|
+
from .computation import compute_metrics
|
|
3
|
+
from .manager import DataLoader, Evaluator
|
|
4
|
+
from .metric import (
|
|
5
|
+
F1,
|
|
6
|
+
ROCAUC,
|
|
7
|
+
Accuracy,
|
|
8
|
+
ConfusionMatrix,
|
|
9
|
+
Counts,
|
|
10
|
+
MetricType,
|
|
11
|
+
Precision,
|
|
12
|
+
Recall,
|
|
13
|
+
mROCAUC,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"Classification",
|
|
18
|
+
"compute_metrics",
|
|
19
|
+
"MetricType",
|
|
20
|
+
"Counts",
|
|
21
|
+
"Precision",
|
|
22
|
+
"Recall",
|
|
23
|
+
"Accuracy",
|
|
24
|
+
"F1",
|
|
25
|
+
"ROCAUC",
|
|
26
|
+
"mROCAUC",
|
|
27
|
+
"ConfusionMatrix",
|
|
28
|
+
"DataLoader",
|
|
29
|
+
"Evaluator",
|
|
30
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class Classification:
|
|
6
|
+
uid: str
|
|
7
|
+
groundtruths: list[tuple[str, str]]
|
|
8
|
+
predictions: list[tuple[str, str]]
|
|
9
|
+
scores: list[float]
|
|
10
|
+
|
|
11
|
+
def __post_init__(self):
|
|
12
|
+
if len(self.predictions) != len(self.scores):
|
|
13
|
+
raise ValueError("There must be a score per prediction label.")
|
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy.typing import NDArray
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _compute_rocauc(
|
|
6
|
+
data: NDArray[np.floating],
|
|
7
|
+
label_metadata: NDArray[np.int32],
|
|
8
|
+
n_datums: int,
|
|
9
|
+
n_labels: int,
|
|
10
|
+
n_label_keys: int,
|
|
11
|
+
mask_matching_labels: NDArray[np.bool_],
|
|
12
|
+
pd_labels: NDArray[np.int32],
|
|
13
|
+
):
|
|
14
|
+
"""
|
|
15
|
+
Compute ROCAUC and mean ROCAUC.
|
|
16
|
+
"""
|
|
17
|
+
count_labels_per_key = np.bincount(label_metadata[:, 2])
|
|
18
|
+
count_groundtruths_per_key = np.bincount(
|
|
19
|
+
label_metadata[:, 2],
|
|
20
|
+
weights=label_metadata[:, 0],
|
|
21
|
+
minlength=n_label_keys,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
positive_count = label_metadata[:, 0]
|
|
25
|
+
negative_count = (
|
|
26
|
+
count_groundtruths_per_key[label_metadata[:, 2]] - label_metadata[:, 0]
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
true_positives = np.zeros((n_labels, n_datums), dtype=np.int32)
|
|
30
|
+
false_positives = np.zeros_like(true_positives)
|
|
31
|
+
scores = np.zeros_like(true_positives, dtype=np.float64)
|
|
32
|
+
|
|
33
|
+
for label_idx in range(n_labels):
|
|
34
|
+
if label_metadata[label_idx, 1] == 0:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
mask_pds = pd_labels == label_idx
|
|
38
|
+
|
|
39
|
+
true_positives[label_idx] = mask_matching_labels[mask_pds]
|
|
40
|
+
false_positives[label_idx] = ~mask_matching_labels[mask_pds]
|
|
41
|
+
scores[label_idx] = data[mask_pds, 3]
|
|
42
|
+
|
|
43
|
+
cumulative_fp = np.cumsum(false_positives, axis=1)
|
|
44
|
+
cumulative_tp = np.cumsum(true_positives, axis=1)
|
|
45
|
+
|
|
46
|
+
fpr = np.zeros_like(true_positives, dtype=np.float64)
|
|
47
|
+
np.divide(
|
|
48
|
+
cumulative_fp,
|
|
49
|
+
negative_count[:, np.newaxis],
|
|
50
|
+
where=negative_count[:, np.newaxis] > 1e-9,
|
|
51
|
+
out=fpr,
|
|
52
|
+
)
|
|
53
|
+
tpr = np.zeros_like(true_positives, dtype=np.float64)
|
|
54
|
+
np.divide(
|
|
55
|
+
cumulative_tp,
|
|
56
|
+
positive_count[:, np.newaxis],
|
|
57
|
+
where=positive_count[:, np.newaxis] > 1e-9,
|
|
58
|
+
out=tpr,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# sort by -tpr, -score
|
|
62
|
+
indices = np.lexsort((-tpr, -scores), axis=1)
|
|
63
|
+
fpr = np.take_along_axis(fpr, indices, axis=1)
|
|
64
|
+
tpr = np.take_along_axis(tpr, indices, axis=1)
|
|
65
|
+
|
|
66
|
+
# running max of tpr
|
|
67
|
+
np.maximum.accumulate(tpr, axis=1, out=tpr)
|
|
68
|
+
|
|
69
|
+
# compute rocauc
|
|
70
|
+
rocauc = np.trapz(x=fpr, y=tpr, axis=1) # type: ignore - numpy will be switching to `trapezoid` in the future.
|
|
71
|
+
|
|
72
|
+
# compute mean rocauc
|
|
73
|
+
summed_rocauc = np.bincount(label_metadata[:, 2], weights=rocauc)
|
|
74
|
+
mean_rocauc = np.zeros(n_label_keys, dtype=np.float64)
|
|
75
|
+
np.divide(
|
|
76
|
+
summed_rocauc,
|
|
77
|
+
count_labels_per_key,
|
|
78
|
+
where=count_labels_per_key > 1e-9,
|
|
79
|
+
out=mean_rocauc,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return rocauc, mean_rocauc
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def compute_metrics(
|
|
86
|
+
data: NDArray[np.floating],
|
|
87
|
+
label_metadata: NDArray[np.int32],
|
|
88
|
+
score_thresholds: NDArray[np.floating],
|
|
89
|
+
hardmax: bool,
|
|
90
|
+
n_datums: int,
|
|
91
|
+
) -> tuple[
|
|
92
|
+
NDArray[np.int32],
|
|
93
|
+
NDArray[np.floating],
|
|
94
|
+
NDArray[np.floating],
|
|
95
|
+
NDArray[np.floating],
|
|
96
|
+
NDArray[np.floating],
|
|
97
|
+
NDArray[np.floating],
|
|
98
|
+
NDArray[np.floating],
|
|
99
|
+
]:
|
|
100
|
+
"""
|
|
101
|
+
Computes classification metrics.
|
|
102
|
+
|
|
103
|
+
Takes data with shape (N, 5):
|
|
104
|
+
|
|
105
|
+
Index 0 - Datum Index
|
|
106
|
+
Index 1 - GroundTruth Label Index
|
|
107
|
+
Index 2 - Prediction Label Index
|
|
108
|
+
Index 3 - Score
|
|
109
|
+
Index 4 - Hard-Max Score
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
data : NDArray[np.floating]
|
|
114
|
+
A sorted array of classification pairs.
|
|
115
|
+
label_metadata : NDArray[np.int32]
|
|
116
|
+
An array containing metadata related to labels.
|
|
117
|
+
score_thresholds : NDArray[np.floating]
|
|
118
|
+
A 1-D array contains score thresholds to compute metrics over.
|
|
119
|
+
hardmax : bool
|
|
120
|
+
Option to only allow a single positive prediction per label key.
|
|
121
|
+
n_datums : int
|
|
122
|
+
The number of datums being operated over.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
NDArray[np.int32]
|
|
127
|
+
TP, FP, FN, TN counts.
|
|
128
|
+
NDArray[np.floating]
|
|
129
|
+
Precision.
|
|
130
|
+
NDArray[np.floating]
|
|
131
|
+
Recall.
|
|
132
|
+
NDArray[np.floating]
|
|
133
|
+
Accuracy
|
|
134
|
+
NDArray[np.floating]
|
|
135
|
+
F1 Score
|
|
136
|
+
NDArray[np.floating]
|
|
137
|
+
ROCAUC.
|
|
138
|
+
NDArray[np.floating]
|
|
139
|
+
mROCAUC.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
n_labels = label_metadata.shape[0]
|
|
143
|
+
n_label_keys = np.unique(label_metadata[:, 2]).size
|
|
144
|
+
n_scores = score_thresholds.shape[0]
|
|
145
|
+
|
|
146
|
+
pd_labels = data[:, 2].astype(int)
|
|
147
|
+
|
|
148
|
+
mask_matching_labels = np.isclose(data[:, 1], data[:, 2])
|
|
149
|
+
mask_score_nonzero = ~np.isclose(data[:, 3], 0.0)
|
|
150
|
+
mask_hardmax = data[:, 4] > 0.5
|
|
151
|
+
|
|
152
|
+
# calculate ROCAUC
|
|
153
|
+
rocauc, mean_rocauc = _compute_rocauc(
|
|
154
|
+
data=data,
|
|
155
|
+
label_metadata=label_metadata,
|
|
156
|
+
n_datums=n_datums,
|
|
157
|
+
n_labels=n_labels,
|
|
158
|
+
n_label_keys=n_label_keys,
|
|
159
|
+
mask_matching_labels=mask_matching_labels,
|
|
160
|
+
pd_labels=pd_labels,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# calculate metrics at various score thresholds
|
|
164
|
+
counts = np.zeros((n_scores, n_labels, 4), dtype=np.int32)
|
|
165
|
+
for score_idx in range(n_scores):
|
|
166
|
+
mask_score_threshold = data[:, 3] >= score_thresholds[score_idx]
|
|
167
|
+
mask_score = mask_score_nonzero & mask_score_threshold
|
|
168
|
+
|
|
169
|
+
if hardmax:
|
|
170
|
+
mask_score &= mask_hardmax
|
|
171
|
+
|
|
172
|
+
mask_tp = mask_matching_labels & mask_score
|
|
173
|
+
mask_fp = ~mask_matching_labels & mask_score
|
|
174
|
+
mask_fn = (mask_matching_labels & ~mask_score) | mask_fp
|
|
175
|
+
mask_tn = ~mask_matching_labels & ~mask_score
|
|
176
|
+
|
|
177
|
+
fn = np.unique(data[mask_fn][:, [0, 1]].astype(int), axis=0)
|
|
178
|
+
tn = np.unique(data[mask_tn][:, [0, 2]].astype(int), axis=0)
|
|
179
|
+
|
|
180
|
+
counts[score_idx, :, 0] = np.bincount(
|
|
181
|
+
pd_labels[mask_tp], minlength=n_labels
|
|
182
|
+
)
|
|
183
|
+
counts[score_idx, :, 1] = np.bincount(
|
|
184
|
+
pd_labels[mask_fp], minlength=n_labels
|
|
185
|
+
)
|
|
186
|
+
counts[score_idx, :, 2] = np.bincount(fn[:, 1], minlength=n_labels)
|
|
187
|
+
counts[score_idx, :, 3] = np.bincount(tn[:, 1], minlength=n_labels)
|
|
188
|
+
|
|
189
|
+
recall = np.zeros((n_scores, n_labels), dtype=np.float64)
|
|
190
|
+
np.divide(
|
|
191
|
+
counts[:, :, 0],
|
|
192
|
+
(counts[:, :, 0] + counts[:, :, 2]),
|
|
193
|
+
where=(counts[:, :, 0] + counts[:, :, 2]) > 1e-9,
|
|
194
|
+
out=recall,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
precision = np.zeros_like(recall)
|
|
198
|
+
np.divide(
|
|
199
|
+
counts[:, :, 0],
|
|
200
|
+
(counts[:, :, 0] + counts[:, :, 1]),
|
|
201
|
+
where=(counts[:, :, 0] + counts[:, :, 1]) > 1e-9,
|
|
202
|
+
out=precision,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
accuracy = np.zeros_like(recall)
|
|
206
|
+
np.divide(
|
|
207
|
+
(counts[:, :, 0] + counts[:, :, 3]),
|
|
208
|
+
float(n_datums),
|
|
209
|
+
out=accuracy,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
f1_score = np.zeros_like(recall)
|
|
213
|
+
np.divide(
|
|
214
|
+
(2 * precision * recall),
|
|
215
|
+
(precision + recall),
|
|
216
|
+
where=(precision + recall) > 1e-9,
|
|
217
|
+
out=f1_score,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return (
|
|
221
|
+
counts,
|
|
222
|
+
precision,
|
|
223
|
+
recall,
|
|
224
|
+
accuracy,
|
|
225
|
+
f1_score,
|
|
226
|
+
rocauc,
|
|
227
|
+
mean_rocauc,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _count_with_examples(
|
|
232
|
+
data: NDArray[np.floating],
|
|
233
|
+
unique_idx: int | list[int],
|
|
234
|
+
label_idx: int | list[int],
|
|
235
|
+
) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
|
|
236
|
+
"""
|
|
237
|
+
Helper function for counting occurences of unique detailed pairs.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
data : NDArray[np.floating]
|
|
242
|
+
A masked portion of a detailed pairs array.
|
|
243
|
+
unique_idx : int | list[int]
|
|
244
|
+
The index or indices upon which uniqueness is constrained.
|
|
245
|
+
label_idx : int | list[int]
|
|
246
|
+
The index or indices within the unique index or indices that encode labels.
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
NDArray[np.floating]
|
|
251
|
+
Examples drawn from the data input.
|
|
252
|
+
NDArray[np.int32]
|
|
253
|
+
Unique label indices.
|
|
254
|
+
NDArray[np.int32]
|
|
255
|
+
Counts for each unique label index.
|
|
256
|
+
"""
|
|
257
|
+
unique_rows, indices = np.unique(
|
|
258
|
+
data.astype(int)[:, unique_idx],
|
|
259
|
+
return_index=True,
|
|
260
|
+
axis=0,
|
|
261
|
+
)
|
|
262
|
+
examples = data[indices]
|
|
263
|
+
labels, counts = np.unique(
|
|
264
|
+
unique_rows[:, label_idx], return_counts=True, axis=0
|
|
265
|
+
)
|
|
266
|
+
return examples, labels, counts
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def compute_confusion_matrix(
|
|
270
|
+
data: NDArray[np.floating],
|
|
271
|
+
label_metadata: NDArray[np.int32],
|
|
272
|
+
score_thresholds: NDArray[np.floating],
|
|
273
|
+
hardmax: bool,
|
|
274
|
+
n_examples: int,
|
|
275
|
+
) -> tuple[NDArray[np.floating], NDArray[np.int32]]:
|
|
276
|
+
|
|
277
|
+
"""
|
|
278
|
+
Compute detailed confusion matrix.
|
|
279
|
+
|
|
280
|
+
Takes data with shape (N, 5):
|
|
281
|
+
|
|
282
|
+
Index 0 - Datum Index
|
|
283
|
+
Index 1 - GroundTruth Label Index
|
|
284
|
+
Index 2 - Prediction Label Index
|
|
285
|
+
Index 3 - Score
|
|
286
|
+
Index 4 - Hard Max Score
|
|
287
|
+
|
|
288
|
+
Parameters
|
|
289
|
+
----------
|
|
290
|
+
data : NDArray[np.floating]
|
|
291
|
+
A sorted array summarizing the IOU calculations of one or more pairs.
|
|
292
|
+
label_metadata : NDArray[np.int32]
|
|
293
|
+
An array containing metadata related to labels.
|
|
294
|
+
iou_thresholds : NDArray[np.floating]
|
|
295
|
+
A 1-D array containing IoU thresholds.
|
|
296
|
+
score_thresholds : NDArray[np.floating]
|
|
297
|
+
A 1-D array containing score thresholds.
|
|
298
|
+
n_examples : int
|
|
299
|
+
The maximum number of examples to return per count.
|
|
300
|
+
|
|
301
|
+
Returns
|
|
302
|
+
-------
|
|
303
|
+
NDArray[np.floating]
|
|
304
|
+
Confusion matrix.
|
|
305
|
+
NDArray[np.int32]
|
|
306
|
+
Ground truths with missing predictions.
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
n_labels = label_metadata.shape[0]
|
|
310
|
+
n_scores = score_thresholds.shape[0]
|
|
311
|
+
|
|
312
|
+
confusion_matrix = -1 * np.ones(
|
|
313
|
+
(n_scores, n_labels, n_labels, 2 * n_examples + 1),
|
|
314
|
+
dtype=np.float32,
|
|
315
|
+
)
|
|
316
|
+
missing_predictions = -1 * np.ones(
|
|
317
|
+
(n_scores, n_labels, n_examples + 1),
|
|
318
|
+
dtype=np.int32,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
mask_label_match = np.isclose(data[:, 1], data[:, 2])
|
|
322
|
+
mask_score = data[:, 3] > 1e-9
|
|
323
|
+
|
|
324
|
+
groundtruths = data[:, [0, 1]].astype(int)
|
|
325
|
+
|
|
326
|
+
for score_idx in range(n_scores):
|
|
327
|
+
mask_score &= data[:, 3] >= score_thresholds[score_idx]
|
|
328
|
+
if hardmax:
|
|
329
|
+
mask_score &= data[:, 4] > 0.5
|
|
330
|
+
|
|
331
|
+
mask_tp = mask_label_match & mask_score
|
|
332
|
+
mask_misclf = ~mask_label_match & mask_score
|
|
333
|
+
mask_misprd = ~(
|
|
334
|
+
(
|
|
335
|
+
groundtruths.reshape(-1, 1, 2)
|
|
336
|
+
== groundtruths[mask_score].reshape(1, -1, 2)
|
|
337
|
+
)
|
|
338
|
+
.all(axis=2)
|
|
339
|
+
.any(axis=1)
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
tp_examples, tp_labels, tp_counts = _count_with_examples(
|
|
343
|
+
data=data[mask_tp],
|
|
344
|
+
unique_idx=[0, 2],
|
|
345
|
+
label_idx=1,
|
|
346
|
+
)
|
|
347
|
+
misclf_examples, misclf_labels, misclf_counts = _count_with_examples(
|
|
348
|
+
data=data[mask_misclf],
|
|
349
|
+
unique_idx=[0, 1, 2],
|
|
350
|
+
label_idx=[1, 2],
|
|
351
|
+
)
|
|
352
|
+
misprd_examples, misprd_labels, misprd_counts = _count_with_examples(
|
|
353
|
+
data=data[mask_misprd],
|
|
354
|
+
unique_idx=[0, 1],
|
|
355
|
+
label_idx=1,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
confusion_matrix[score_idx, tp_labels, tp_labels, 0] = tp_counts
|
|
359
|
+
confusion_matrix[
|
|
360
|
+
score_idx, misclf_labels[:, 0], misclf_labels[:, 1], 0
|
|
361
|
+
] = misclf_counts
|
|
362
|
+
|
|
363
|
+
missing_predictions[score_idx, misprd_labels, 0] = misprd_counts
|
|
364
|
+
|
|
365
|
+
if n_examples > 0:
|
|
366
|
+
for label_idx in range(n_labels):
|
|
367
|
+
# true-positive examples
|
|
368
|
+
mask_tp_label = tp_examples[:, 2] == label_idx
|
|
369
|
+
if mask_tp_label.sum() > 0:
|
|
370
|
+
tp_label_examples = tp_examples[mask_tp_label][:n_examples]
|
|
371
|
+
confusion_matrix[
|
|
372
|
+
score_idx,
|
|
373
|
+
label_idx,
|
|
374
|
+
label_idx,
|
|
375
|
+
1 : 2 * tp_label_examples.shape[0] + 1,
|
|
376
|
+
] = tp_label_examples[:, [0, 3]].flatten()
|
|
377
|
+
|
|
378
|
+
# misclassification examples
|
|
379
|
+
mask_misclf_gt_label = misclf_examples[:, 1] == label_idx
|
|
380
|
+
if mask_misclf_gt_label.sum() > 0:
|
|
381
|
+
for pd_label_idx in range(n_labels):
|
|
382
|
+
mask_misclf_pd_label = (
|
|
383
|
+
misclf_examples[:, 2] == pd_label_idx
|
|
384
|
+
)
|
|
385
|
+
mask_misclf_label_combo = (
|
|
386
|
+
mask_misclf_gt_label & mask_misclf_pd_label
|
|
387
|
+
)
|
|
388
|
+
if mask_misclf_label_combo.sum() > 0:
|
|
389
|
+
misclf_label_examples = misclf_examples[
|
|
390
|
+
mask_misclf_label_combo
|
|
391
|
+
][:n_examples]
|
|
392
|
+
confusion_matrix[
|
|
393
|
+
score_idx,
|
|
394
|
+
label_idx,
|
|
395
|
+
pd_label_idx,
|
|
396
|
+
1 : 2 * misclf_label_examples.shape[0] + 1,
|
|
397
|
+
] = misclf_label_examples[:, [0, 3]].flatten()
|
|
398
|
+
|
|
399
|
+
# missing prediction examples
|
|
400
|
+
mask_misprd_label = misprd_examples[:, 1] == label_idx
|
|
401
|
+
if misprd_examples.size > 0:
|
|
402
|
+
misprd_label_examples = misprd_examples[mask_misprd_label][
|
|
403
|
+
:n_examples
|
|
404
|
+
]
|
|
405
|
+
missing_predictions[
|
|
406
|
+
score_idx,
|
|
407
|
+
label_idx,
|
|
408
|
+
1 : misprd_label_examples.shape[0] + 1,
|
|
409
|
+
] = misprd_label_examples[:, 0].flatten()
|
|
410
|
+
|
|
411
|
+
return confusion_matrix, missing_predictions
|