valor-lite 0.33.5__py3-none-any.whl → 0.33.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

@@ -0,0 +1,30 @@
1
+ from .annotation import Classification
2
+ from .computation import compute_metrics
3
+ from .manager import DataLoader, Evaluator
4
+ from .metric import (
5
+ F1,
6
+ ROCAUC,
7
+ Accuracy,
8
+ ConfusionMatrix,
9
+ Counts,
10
+ MetricType,
11
+ Precision,
12
+ Recall,
13
+ mROCAUC,
14
+ )
15
+
16
+ __all__ = [
17
+ "Classification",
18
+ "compute_metrics",
19
+ "MetricType",
20
+ "Counts",
21
+ "Precision",
22
+ "Recall",
23
+ "Accuracy",
24
+ "F1",
25
+ "ROCAUC",
26
+ "mROCAUC",
27
+ "ConfusionMatrix",
28
+ "DataLoader",
29
+ "Evaluator",
30
+ ]
@@ -0,0 +1,13 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class Classification:
6
+ uid: str
7
+ groundtruths: list[tuple[str, str]]
8
+ predictions: list[tuple[str, str]]
9
+ scores: list[float]
10
+
11
+ def __post_init__(self):
12
+ if len(self.predictions) != len(self.scores):
13
+ raise ValueError("There must be a score per prediction label.")
@@ -0,0 +1,411 @@
1
+ import numpy as np
2
+ from numpy.typing import NDArray
3
+
4
+
5
+ def _compute_rocauc(
6
+ data: NDArray[np.floating],
7
+ label_metadata: NDArray[np.int32],
8
+ n_datums: int,
9
+ n_labels: int,
10
+ n_label_keys: int,
11
+ mask_matching_labels: NDArray[np.bool_],
12
+ pd_labels: NDArray[np.int32],
13
+ ):
14
+ """
15
+ Compute ROCAUC and mean ROCAUC.
16
+ """
17
+ count_labels_per_key = np.bincount(label_metadata[:, 2])
18
+ count_groundtruths_per_key = np.bincount(
19
+ label_metadata[:, 2],
20
+ weights=label_metadata[:, 0],
21
+ minlength=n_label_keys,
22
+ )
23
+
24
+ positive_count = label_metadata[:, 0]
25
+ negative_count = (
26
+ count_groundtruths_per_key[label_metadata[:, 2]] - label_metadata[:, 0]
27
+ )
28
+
29
+ true_positives = np.zeros((n_labels, n_datums), dtype=np.int32)
30
+ false_positives = np.zeros_like(true_positives)
31
+ scores = np.zeros_like(true_positives, dtype=np.float64)
32
+
33
+ for label_idx in range(n_labels):
34
+ if label_metadata[label_idx, 1] == 0:
35
+ continue
36
+
37
+ mask_pds = pd_labels == label_idx
38
+
39
+ true_positives[label_idx] = mask_matching_labels[mask_pds]
40
+ false_positives[label_idx] = ~mask_matching_labels[mask_pds]
41
+ scores[label_idx] = data[mask_pds, 3]
42
+
43
+ cumulative_fp = np.cumsum(false_positives, axis=1)
44
+ cumulative_tp = np.cumsum(true_positives, axis=1)
45
+
46
+ fpr = np.zeros_like(true_positives, dtype=np.float64)
47
+ np.divide(
48
+ cumulative_fp,
49
+ negative_count[:, np.newaxis],
50
+ where=negative_count[:, np.newaxis] > 1e-9,
51
+ out=fpr,
52
+ )
53
+ tpr = np.zeros_like(true_positives, dtype=np.float64)
54
+ np.divide(
55
+ cumulative_tp,
56
+ positive_count[:, np.newaxis],
57
+ where=positive_count[:, np.newaxis] > 1e-9,
58
+ out=tpr,
59
+ )
60
+
61
+ # sort by -tpr, -score
62
+ indices = np.lexsort((-tpr, -scores), axis=1)
63
+ fpr = np.take_along_axis(fpr, indices, axis=1)
64
+ tpr = np.take_along_axis(tpr, indices, axis=1)
65
+
66
+ # running max of tpr
67
+ np.maximum.accumulate(tpr, axis=1, out=tpr)
68
+
69
+ # compute rocauc
70
+ rocauc = np.trapz(x=fpr, y=tpr, axis=1) # type: ignore - numpy will be switching to `trapezoid` in the future.
71
+
72
+ # compute mean rocauc
73
+ summed_rocauc = np.bincount(label_metadata[:, 2], weights=rocauc)
74
+ mean_rocauc = np.zeros(n_label_keys, dtype=np.float64)
75
+ np.divide(
76
+ summed_rocauc,
77
+ count_labels_per_key,
78
+ where=count_labels_per_key > 1e-9,
79
+ out=mean_rocauc,
80
+ )
81
+
82
+ return rocauc, mean_rocauc
83
+
84
+
85
+ def compute_metrics(
86
+ data: NDArray[np.floating],
87
+ label_metadata: NDArray[np.int32],
88
+ score_thresholds: NDArray[np.floating],
89
+ hardmax: bool,
90
+ n_datums: int,
91
+ ) -> tuple[
92
+ NDArray[np.int32],
93
+ NDArray[np.floating],
94
+ NDArray[np.floating],
95
+ NDArray[np.floating],
96
+ NDArray[np.floating],
97
+ NDArray[np.floating],
98
+ NDArray[np.floating],
99
+ ]:
100
+ """
101
+ Computes classification metrics.
102
+
103
+ Takes data with shape (N, 5):
104
+
105
+ Index 0 - Datum Index
106
+ Index 1 - GroundTruth Label Index
107
+ Index 2 - Prediction Label Index
108
+ Index 3 - Score
109
+ Index 4 - Hard-Max Score
110
+
111
+ Parameters
112
+ ----------
113
+ data : NDArray[np.floating]
114
+ A sorted array of classification pairs.
115
+ label_metadata : NDArray[np.int32]
116
+ An array containing metadata related to labels.
117
+ score_thresholds : NDArray[np.floating]
118
+ A 1-D array contains score thresholds to compute metrics over.
119
+ hardmax : bool
120
+ Option to only allow a single positive prediction per label key.
121
+ n_datums : int
122
+ The number of datums being operated over.
123
+
124
+ Returns
125
+ -------
126
+ NDArray[np.int32]
127
+ TP, FP, FN, TN counts.
128
+ NDArray[np.floating]
129
+ Precision.
130
+ NDArray[np.floating]
131
+ Recall.
132
+ NDArray[np.floating]
133
+ Accuracy
134
+ NDArray[np.floating]
135
+ F1 Score
136
+ NDArray[np.floating]
137
+ ROCAUC.
138
+ NDArray[np.floating]
139
+ mROCAUC.
140
+ """
141
+
142
+ n_labels = label_metadata.shape[0]
143
+ n_label_keys = np.unique(label_metadata[:, 2]).size
144
+ n_scores = score_thresholds.shape[0]
145
+
146
+ pd_labels = data[:, 2].astype(int)
147
+
148
+ mask_matching_labels = np.isclose(data[:, 1], data[:, 2])
149
+ mask_score_nonzero = ~np.isclose(data[:, 3], 0.0)
150
+ mask_hardmax = data[:, 4] > 0.5
151
+
152
+ # calculate ROCAUC
153
+ rocauc, mean_rocauc = _compute_rocauc(
154
+ data=data,
155
+ label_metadata=label_metadata,
156
+ n_datums=n_datums,
157
+ n_labels=n_labels,
158
+ n_label_keys=n_label_keys,
159
+ mask_matching_labels=mask_matching_labels,
160
+ pd_labels=pd_labels,
161
+ )
162
+
163
+ # calculate metrics at various score thresholds
164
+ counts = np.zeros((n_scores, n_labels, 4), dtype=np.int32)
165
+ for score_idx in range(n_scores):
166
+ mask_score_threshold = data[:, 3] >= score_thresholds[score_idx]
167
+ mask_score = mask_score_nonzero & mask_score_threshold
168
+
169
+ if hardmax:
170
+ mask_score &= mask_hardmax
171
+
172
+ mask_tp = mask_matching_labels & mask_score
173
+ mask_fp = ~mask_matching_labels & mask_score
174
+ mask_fn = (mask_matching_labels & ~mask_score) | mask_fp
175
+ mask_tn = ~mask_matching_labels & ~mask_score
176
+
177
+ fn = np.unique(data[mask_fn][:, [0, 1]].astype(int), axis=0)
178
+ tn = np.unique(data[mask_tn][:, [0, 2]].astype(int), axis=0)
179
+
180
+ counts[score_idx, :, 0] = np.bincount(
181
+ pd_labels[mask_tp], minlength=n_labels
182
+ )
183
+ counts[score_idx, :, 1] = np.bincount(
184
+ pd_labels[mask_fp], minlength=n_labels
185
+ )
186
+ counts[score_idx, :, 2] = np.bincount(fn[:, 1], minlength=n_labels)
187
+ counts[score_idx, :, 3] = np.bincount(tn[:, 1], minlength=n_labels)
188
+
189
+ recall = np.zeros((n_scores, n_labels), dtype=np.float64)
190
+ np.divide(
191
+ counts[:, :, 0],
192
+ (counts[:, :, 0] + counts[:, :, 2]),
193
+ where=(counts[:, :, 0] + counts[:, :, 2]) > 1e-9,
194
+ out=recall,
195
+ )
196
+
197
+ precision = np.zeros_like(recall)
198
+ np.divide(
199
+ counts[:, :, 0],
200
+ (counts[:, :, 0] + counts[:, :, 1]),
201
+ where=(counts[:, :, 0] + counts[:, :, 1]) > 1e-9,
202
+ out=precision,
203
+ )
204
+
205
+ accuracy = np.zeros_like(recall)
206
+ np.divide(
207
+ (counts[:, :, 0] + counts[:, :, 3]),
208
+ float(n_datums),
209
+ out=accuracy,
210
+ )
211
+
212
+ f1_score = np.zeros_like(recall)
213
+ np.divide(
214
+ (2 * precision * recall),
215
+ (precision + recall),
216
+ where=(precision + recall) > 1e-9,
217
+ out=f1_score,
218
+ )
219
+
220
+ return (
221
+ counts,
222
+ precision,
223
+ recall,
224
+ accuracy,
225
+ f1_score,
226
+ rocauc,
227
+ mean_rocauc,
228
+ )
229
+
230
+
231
+ def _count_with_examples(
232
+ data: NDArray[np.floating],
233
+ unique_idx: int | list[int],
234
+ label_idx: int | list[int],
235
+ ) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
236
+ """
237
+ Helper function for counting occurences of unique detailed pairs.
238
+
239
+ Parameters
240
+ ----------
241
+ data : NDArray[np.floating]
242
+ A masked portion of a detailed pairs array.
243
+ unique_idx : int | list[int]
244
+ The index or indices upon which uniqueness is constrained.
245
+ label_idx : int | list[int]
246
+ The index or indices within the unique index or indices that encode labels.
247
+
248
+ Returns
249
+ -------
250
+ NDArray[np.floating]
251
+ Examples drawn from the data input.
252
+ NDArray[np.int32]
253
+ Unique label indices.
254
+ NDArray[np.int32]
255
+ Counts for each unique label index.
256
+ """
257
+ unique_rows, indices = np.unique(
258
+ data.astype(int)[:, unique_idx],
259
+ return_index=True,
260
+ axis=0,
261
+ )
262
+ examples = data[indices]
263
+ labels, counts = np.unique(
264
+ unique_rows[:, label_idx], return_counts=True, axis=0
265
+ )
266
+ return examples, labels, counts
267
+
268
+
269
+ def compute_confusion_matrix(
270
+ data: NDArray[np.floating],
271
+ label_metadata: NDArray[np.int32],
272
+ score_thresholds: NDArray[np.floating],
273
+ hardmax: bool,
274
+ n_examples: int,
275
+ ) -> tuple[NDArray[np.floating], NDArray[np.int32]]:
276
+
277
+ """
278
+ Compute detailed confusion matrix.
279
+
280
+ Takes data with shape (N, 5):
281
+
282
+ Index 0 - Datum Index
283
+ Index 1 - GroundTruth Label Index
284
+ Index 2 - Prediction Label Index
285
+ Index 3 - Score
286
+ Index 4 - Hard Max Score
287
+
288
+ Parameters
289
+ ----------
290
+ data : NDArray[np.floating]
291
+ A sorted array summarizing the IOU calculations of one or more pairs.
292
+ label_metadata : NDArray[np.int32]
293
+ An array containing metadata related to labels.
294
+ iou_thresholds : NDArray[np.floating]
295
+ A 1-D array containing IoU thresholds.
296
+ score_thresholds : NDArray[np.floating]
297
+ A 1-D array containing score thresholds.
298
+ n_examples : int
299
+ The maximum number of examples to return per count.
300
+
301
+ Returns
302
+ -------
303
+ NDArray[np.floating]
304
+ Confusion matrix.
305
+ NDArray[np.int32]
306
+ Ground truths with missing predictions.
307
+ """
308
+
309
+ n_labels = label_metadata.shape[0]
310
+ n_scores = score_thresholds.shape[0]
311
+
312
+ confusion_matrix = -1 * np.ones(
313
+ (n_scores, n_labels, n_labels, 2 * n_examples + 1),
314
+ dtype=np.float32,
315
+ )
316
+ missing_predictions = -1 * np.ones(
317
+ (n_scores, n_labels, n_examples + 1),
318
+ dtype=np.int32,
319
+ )
320
+
321
+ mask_label_match = np.isclose(data[:, 1], data[:, 2])
322
+ mask_score = data[:, 3] > 1e-9
323
+
324
+ groundtruths = data[:, [0, 1]].astype(int)
325
+
326
+ for score_idx in range(n_scores):
327
+ mask_score &= data[:, 3] >= score_thresholds[score_idx]
328
+ if hardmax:
329
+ mask_score &= data[:, 4] > 0.5
330
+
331
+ mask_tp = mask_label_match & mask_score
332
+ mask_misclf = ~mask_label_match & mask_score
333
+ mask_misprd = ~(
334
+ (
335
+ groundtruths.reshape(-1, 1, 2)
336
+ == groundtruths[mask_score].reshape(1, -1, 2)
337
+ )
338
+ .all(axis=2)
339
+ .any(axis=1)
340
+ )
341
+
342
+ tp_examples, tp_labels, tp_counts = _count_with_examples(
343
+ data=data[mask_tp],
344
+ unique_idx=[0, 2],
345
+ label_idx=1,
346
+ )
347
+ misclf_examples, misclf_labels, misclf_counts = _count_with_examples(
348
+ data=data[mask_misclf],
349
+ unique_idx=[0, 1, 2],
350
+ label_idx=[1, 2],
351
+ )
352
+ misprd_examples, misprd_labels, misprd_counts = _count_with_examples(
353
+ data=data[mask_misprd],
354
+ unique_idx=[0, 1],
355
+ label_idx=1,
356
+ )
357
+
358
+ confusion_matrix[score_idx, tp_labels, tp_labels, 0] = tp_counts
359
+ confusion_matrix[
360
+ score_idx, misclf_labels[:, 0], misclf_labels[:, 1], 0
361
+ ] = misclf_counts
362
+
363
+ missing_predictions[score_idx, misprd_labels, 0] = misprd_counts
364
+
365
+ if n_examples > 0:
366
+ for label_idx in range(n_labels):
367
+ # true-positive examples
368
+ mask_tp_label = tp_examples[:, 2] == label_idx
369
+ if mask_tp_label.sum() > 0:
370
+ tp_label_examples = tp_examples[mask_tp_label][:n_examples]
371
+ confusion_matrix[
372
+ score_idx,
373
+ label_idx,
374
+ label_idx,
375
+ 1 : 2 * tp_label_examples.shape[0] + 1,
376
+ ] = tp_label_examples[:, [0, 3]].flatten()
377
+
378
+ # misclassification examples
379
+ mask_misclf_gt_label = misclf_examples[:, 1] == label_idx
380
+ if mask_misclf_gt_label.sum() > 0:
381
+ for pd_label_idx in range(n_labels):
382
+ mask_misclf_pd_label = (
383
+ misclf_examples[:, 2] == pd_label_idx
384
+ )
385
+ mask_misclf_label_combo = (
386
+ mask_misclf_gt_label & mask_misclf_pd_label
387
+ )
388
+ if mask_misclf_label_combo.sum() > 0:
389
+ misclf_label_examples = misclf_examples[
390
+ mask_misclf_label_combo
391
+ ][:n_examples]
392
+ confusion_matrix[
393
+ score_idx,
394
+ label_idx,
395
+ pd_label_idx,
396
+ 1 : 2 * misclf_label_examples.shape[0] + 1,
397
+ ] = misclf_label_examples[:, [0, 3]].flatten()
398
+
399
+ # missing prediction examples
400
+ mask_misprd_label = misprd_examples[:, 1] == label_idx
401
+ if misprd_examples.size > 0:
402
+ misprd_label_examples = misprd_examples[mask_misprd_label][
403
+ :n_examples
404
+ ]
405
+ missing_predictions[
406
+ score_idx,
407
+ label_idx,
408
+ 1 : misprd_label_examples.shape[0] + 1,
409
+ ] = misprd_label_examples[:, 0].flatten()
410
+
411
+ return confusion_matrix, missing_predictions