valor-lite 0.33.13__py3-none-any.whl → 0.33.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/__init__.py +8 -21
- valor_lite/classification/computation.py +2 -2
- valor_lite/classification/manager.py +32 -244
- valor_lite/classification/metric.py +331 -372
- valor_lite/classification/utilities.py +222 -0
- valor_lite/object_detection/__init__.py +4 -35
- valor_lite/object_detection/computation.py +20 -20
- valor_lite/object_detection/manager.py +38 -497
- valor_lite/object_detection/metric.py +633 -706
- valor_lite/object_detection/utilities.py +505 -0
- valor_lite/schemas.py +10 -8
- valor_lite/semantic_segmentation/__init__.py +2 -17
- valor_lite/semantic_segmentation/computation.py +1 -1
- valor_lite/semantic_segmentation/manager.py +13 -116
- valor_lite/semantic_segmentation/metric.py +216 -239
- valor_lite/semantic_segmentation/utilities.py +104 -0
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.14.dist-info}/METADATA +1 -1
- valor_lite-0.33.14.dist-info/RECORD +27 -0
- valor_lite-0.33.13.dist-info/RECORD +0 -24
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.14.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.14.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.14.dist-info}/top_level.txt +0 -0
|
@@ -1,30 +1,17 @@
|
|
|
1
1
|
from .annotation import Classification
|
|
2
|
-
from .computation import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
F1,
|
|
6
|
-
ROCAUC,
|
|
7
|
-
Accuracy,
|
|
8
|
-
ConfusionMatrix,
|
|
9
|
-
Counts,
|
|
10
|
-
MetricType,
|
|
11
|
-
Precision,
|
|
12
|
-
Recall,
|
|
13
|
-
mROCAUC,
|
|
2
|
+
from .computation import (
|
|
3
|
+
compute_confusion_matrix,
|
|
4
|
+
compute_precision_recall_rocauc,
|
|
14
5
|
)
|
|
6
|
+
from .manager import DataLoader, Evaluator
|
|
7
|
+
from .metric import Metric, MetricType
|
|
15
8
|
|
|
16
9
|
__all__ = [
|
|
17
10
|
"Classification",
|
|
18
|
-
"
|
|
11
|
+
"compute_precision_recall_rocauc",
|
|
12
|
+
"compute_confusion_matrix",
|
|
19
13
|
"MetricType",
|
|
20
|
-
"Counts",
|
|
21
|
-
"Precision",
|
|
22
|
-
"Recall",
|
|
23
|
-
"Accuracy",
|
|
24
|
-
"F1",
|
|
25
|
-
"ROCAUC",
|
|
26
|
-
"mROCAUC",
|
|
27
|
-
"ConfusionMatrix",
|
|
28
14
|
"DataLoader",
|
|
29
15
|
"Evaluator",
|
|
16
|
+
"Metric",
|
|
30
17
|
]
|
|
@@ -64,7 +64,7 @@ def _compute_rocauc(
|
|
|
64
64
|
return rocauc, mean_rocauc
|
|
65
65
|
|
|
66
66
|
|
|
67
|
-
def
|
|
67
|
+
def compute_precision_recall_rocauc(
|
|
68
68
|
data: NDArray[np.float64],
|
|
69
69
|
label_metadata: NDArray[np.int32],
|
|
70
70
|
score_thresholds: NDArray[np.float64],
|
|
@@ -271,7 +271,7 @@ def compute_confusion_matrix(
|
|
|
271
271
|
label_metadata : NDArray[np.int32]
|
|
272
272
|
An array containing metadata related to labels.
|
|
273
273
|
iou_thresholds : NDArray[np.float64]
|
|
274
|
-
A 1-D array containing
|
|
274
|
+
A 1-D array containing IOU thresholds.
|
|
275
275
|
score_thresholds : NDArray[np.float64]
|
|
276
276
|
A 1-D array containing score thresholds.
|
|
277
277
|
n_examples : int
|
|
@@ -7,18 +7,12 @@ from tqdm import tqdm
|
|
|
7
7
|
from valor_lite.classification.annotation import Classification
|
|
8
8
|
from valor_lite.classification.computation import (
|
|
9
9
|
compute_confusion_matrix,
|
|
10
|
-
|
|
10
|
+
compute_precision_recall_rocauc,
|
|
11
11
|
)
|
|
12
|
-
from valor_lite.classification.metric import
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
ConfusionMatrix,
|
|
17
|
-
Counts,
|
|
18
|
-
MetricType,
|
|
19
|
-
Precision,
|
|
20
|
-
Recall,
|
|
21
|
-
mROCAUC,
|
|
12
|
+
from valor_lite.classification.metric import Metric, MetricType
|
|
13
|
+
from valor_lite.classification.utilities import (
|
|
14
|
+
unpack_confusion_matrix_into_metric_list,
|
|
15
|
+
unpack_precision_recall_rocauc_into_metric_lists,
|
|
22
16
|
)
|
|
23
17
|
|
|
24
18
|
"""
|
|
@@ -191,119 +185,11 @@ class Evaluator:
|
|
|
191
185
|
n_datums=n_datums,
|
|
192
186
|
)
|
|
193
187
|
|
|
194
|
-
def
|
|
195
|
-
self,
|
|
196
|
-
confusion_matrix: NDArray[np.float64],
|
|
197
|
-
number_of_labels: int,
|
|
198
|
-
number_of_examples: int,
|
|
199
|
-
) -> dict[
|
|
200
|
-
str,
|
|
201
|
-
dict[
|
|
202
|
-
str,
|
|
203
|
-
dict[
|
|
204
|
-
str,
|
|
205
|
-
int
|
|
206
|
-
| list[
|
|
207
|
-
dict[
|
|
208
|
-
str,
|
|
209
|
-
str | float,
|
|
210
|
-
]
|
|
211
|
-
],
|
|
212
|
-
],
|
|
213
|
-
],
|
|
214
|
-
]:
|
|
215
|
-
"""
|
|
216
|
-
Unpacks a numpy array of confusion matrix counts and examples.
|
|
217
|
-
"""
|
|
218
|
-
|
|
219
|
-
datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
220
|
-
confusion_matrix[
|
|
221
|
-
gt_label_idx,
|
|
222
|
-
pd_label_idx,
|
|
223
|
-
example_idx * 2 + 1,
|
|
224
|
-
]
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
228
|
-
confusion_matrix[
|
|
229
|
-
gt_label_idx,
|
|
230
|
-
pd_label_idx,
|
|
231
|
-
example_idx * 2 + 2,
|
|
232
|
-
]
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
return {
|
|
236
|
-
self.index_to_label[gt_label_idx]: {
|
|
237
|
-
self.index_to_label[pd_label_idx]: {
|
|
238
|
-
"count": max(
|
|
239
|
-
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
240
|
-
0,
|
|
241
|
-
),
|
|
242
|
-
"examples": [
|
|
243
|
-
{
|
|
244
|
-
"datum": self.index_to_uid[
|
|
245
|
-
datum_idx(
|
|
246
|
-
gt_label_idx, pd_label_idx, example_idx
|
|
247
|
-
)
|
|
248
|
-
],
|
|
249
|
-
"score": score_idx(
|
|
250
|
-
gt_label_idx, pd_label_idx, example_idx
|
|
251
|
-
),
|
|
252
|
-
}
|
|
253
|
-
for example_idx in range(number_of_examples)
|
|
254
|
-
if datum_idx(gt_label_idx, pd_label_idx, example_idx)
|
|
255
|
-
>= 0
|
|
256
|
-
],
|
|
257
|
-
}
|
|
258
|
-
for pd_label_idx in range(number_of_labels)
|
|
259
|
-
}
|
|
260
|
-
for gt_label_idx in range(number_of_labels)
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
def _unpack_missing_predictions(
|
|
264
|
-
self,
|
|
265
|
-
missing_predictions: NDArray[np.int32],
|
|
266
|
-
number_of_labels: int,
|
|
267
|
-
number_of_examples: int,
|
|
268
|
-
) -> dict[str, dict[str, int | list[dict[str, str]]]]:
|
|
269
|
-
"""
|
|
270
|
-
Unpacks a numpy array of missing prediction counts and examples.
|
|
271
|
-
"""
|
|
272
|
-
|
|
273
|
-
datum_idx = (
|
|
274
|
-
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
275
|
-
missing_predictions[
|
|
276
|
-
gt_label_idx,
|
|
277
|
-
example_idx + 1,
|
|
278
|
-
]
|
|
279
|
-
)
|
|
280
|
-
)
|
|
281
|
-
|
|
282
|
-
return {
|
|
283
|
-
self.index_to_label[gt_label_idx]: {
|
|
284
|
-
"count": max(
|
|
285
|
-
int(missing_predictions[gt_label_idx, 0]),
|
|
286
|
-
0,
|
|
287
|
-
),
|
|
288
|
-
"examples": [
|
|
289
|
-
{
|
|
290
|
-
"datum": self.index_to_uid[
|
|
291
|
-
datum_idx(gt_label_idx, example_idx)
|
|
292
|
-
]
|
|
293
|
-
}
|
|
294
|
-
for example_idx in range(number_of_examples)
|
|
295
|
-
if datum_idx(gt_label_idx, example_idx) >= 0
|
|
296
|
-
],
|
|
297
|
-
}
|
|
298
|
-
for gt_label_idx in range(number_of_labels)
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
def compute_precision_recall(
|
|
188
|
+
def compute_precision_recall_rocauc(
|
|
302
189
|
self,
|
|
303
190
|
score_thresholds: list[float] = [0.0],
|
|
304
191
|
hardmax: bool = True,
|
|
305
192
|
filter_: Filter | None = None,
|
|
306
|
-
as_dict: bool = False,
|
|
307
193
|
) -> dict[MetricType, list]:
|
|
308
194
|
"""
|
|
309
195
|
Performs an evaluation and returns metrics.
|
|
@@ -316,8 +202,6 @@ class Evaluator:
|
|
|
316
202
|
Toggles whether a hardmax is applied to predictions.
|
|
317
203
|
filter_ : Filter, optional
|
|
318
204
|
An optional filter object.
|
|
319
|
-
as_dict : bool, default=False
|
|
320
|
-
An option to return metrics as dictionaries.
|
|
321
205
|
|
|
322
206
|
Returns
|
|
323
207
|
-------
|
|
@@ -334,15 +218,7 @@ class Evaluator:
|
|
|
334
218
|
label_metadata = filter_.label_metadata
|
|
335
219
|
n_datums = filter_.n_datums
|
|
336
220
|
|
|
337
|
-
(
|
|
338
|
-
counts,
|
|
339
|
-
precision,
|
|
340
|
-
recall,
|
|
341
|
-
accuracy,
|
|
342
|
-
f1_score,
|
|
343
|
-
rocauc,
|
|
344
|
-
mean_rocauc,
|
|
345
|
-
) = compute_metrics(
|
|
221
|
+
results = compute_precision_recall_rocauc(
|
|
346
222
|
data=data,
|
|
347
223
|
label_metadata=label_metadata,
|
|
348
224
|
score_thresholds=np.array(score_thresholds),
|
|
@@ -350,79 +226,13 @@ class Evaluator:
|
|
|
350
226
|
n_datums=n_datums,
|
|
351
227
|
)
|
|
352
228
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
for label_idx in range(label_metadata.shape[0])
|
|
361
|
-
if label_metadata[label_idx, 0] > 0
|
|
362
|
-
]
|
|
363
|
-
|
|
364
|
-
metrics[MetricType.mROCAUC] = [
|
|
365
|
-
mROCAUC(
|
|
366
|
-
value=float(mean_rocauc),
|
|
367
|
-
)
|
|
368
|
-
]
|
|
369
|
-
|
|
370
|
-
metrics[MetricType.Accuracy] = [
|
|
371
|
-
Accuracy(
|
|
372
|
-
value=accuracy.astype(float).tolist(),
|
|
373
|
-
score_thresholds=score_thresholds,
|
|
374
|
-
hardmax=hardmax,
|
|
375
|
-
)
|
|
376
|
-
]
|
|
377
|
-
|
|
378
|
-
for label_idx, label in self.index_to_label.items():
|
|
379
|
-
|
|
380
|
-
kwargs = {
|
|
381
|
-
"label": label,
|
|
382
|
-
"score_thresholds": score_thresholds,
|
|
383
|
-
"hardmax": hardmax,
|
|
384
|
-
}
|
|
385
|
-
row = counts[:, label_idx]
|
|
386
|
-
metrics[MetricType.Counts].append(
|
|
387
|
-
Counts(
|
|
388
|
-
tp=row[:, 0].astype(int).tolist(),
|
|
389
|
-
fp=row[:, 1].astype(int).tolist(),
|
|
390
|
-
fn=row[:, 2].astype(int).tolist(),
|
|
391
|
-
tn=row[:, 3].astype(int).tolist(),
|
|
392
|
-
**kwargs,
|
|
393
|
-
)
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
# if no groundtruths exists for a label, skip it.
|
|
397
|
-
if label_metadata[label_idx, 0] == 0:
|
|
398
|
-
continue
|
|
399
|
-
|
|
400
|
-
metrics[MetricType.Precision].append(
|
|
401
|
-
Precision(
|
|
402
|
-
value=precision[:, label_idx].astype(float).tolist(),
|
|
403
|
-
**kwargs,
|
|
404
|
-
)
|
|
405
|
-
)
|
|
406
|
-
metrics[MetricType.Recall].append(
|
|
407
|
-
Recall(
|
|
408
|
-
value=recall[:, label_idx].astype(float).tolist(),
|
|
409
|
-
**kwargs,
|
|
410
|
-
)
|
|
411
|
-
)
|
|
412
|
-
metrics[MetricType.F1].append(
|
|
413
|
-
F1(
|
|
414
|
-
value=f1_score[:, label_idx].astype(float).tolist(),
|
|
415
|
-
**kwargs,
|
|
416
|
-
)
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
if as_dict:
|
|
420
|
-
return {
|
|
421
|
-
mtype: [metric.to_dict() for metric in mvalues]
|
|
422
|
-
for mtype, mvalues in metrics.items()
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
return metrics
|
|
229
|
+
return unpack_precision_recall_rocauc_into_metric_lists(
|
|
230
|
+
results=results,
|
|
231
|
+
score_thresholds=score_thresholds,
|
|
232
|
+
hardmax=hardmax,
|
|
233
|
+
label_metadata=label_metadata,
|
|
234
|
+
index_to_label=self.index_to_label,
|
|
235
|
+
)
|
|
426
236
|
|
|
427
237
|
def compute_confusion_matrix(
|
|
428
238
|
self,
|
|
@@ -430,8 +240,7 @@ class Evaluator:
|
|
|
430
240
|
hardmax: bool = True,
|
|
431
241
|
number_of_examples: int = 0,
|
|
432
242
|
filter_: Filter | None = None,
|
|
433
|
-
|
|
434
|
-
) -> list:
|
|
243
|
+
) -> list[Metric]:
|
|
435
244
|
"""
|
|
436
245
|
Computes a detailed confusion matrix..
|
|
437
246
|
|
|
@@ -445,12 +254,10 @@ class Evaluator:
|
|
|
445
254
|
The number of examples to return per count.
|
|
446
255
|
filter_ : Filter, optional
|
|
447
256
|
An optional filter object.
|
|
448
|
-
as_dict : bool, default=False
|
|
449
|
-
An option to return metrics as dictionaries.
|
|
450
257
|
|
|
451
258
|
Returns
|
|
452
259
|
-------
|
|
453
|
-
list[
|
|
260
|
+
list[Metric]
|
|
454
261
|
A list of confusion matrices.
|
|
455
262
|
"""
|
|
456
263
|
|
|
@@ -464,7 +271,7 @@ class Evaluator:
|
|
|
464
271
|
if data.size == 0:
|
|
465
272
|
return list()
|
|
466
273
|
|
|
467
|
-
|
|
274
|
+
results = compute_confusion_matrix(
|
|
468
275
|
data=data,
|
|
469
276
|
label_metadata=label_metadata,
|
|
470
277
|
score_thresholds=np.array(score_thresholds),
|
|
@@ -472,29 +279,13 @@ class Evaluator:
|
|
|
472
279
|
n_examples=number_of_examples,
|
|
473
280
|
)
|
|
474
281
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
number_of_labels=n_labels,
|
|
483
|
-
number_of_examples=number_of_examples,
|
|
484
|
-
),
|
|
485
|
-
missing_predictions=self._unpack_missing_predictions(
|
|
486
|
-
missing_predictions=missing_predictions[score_idx, :, :],
|
|
487
|
-
number_of_labels=n_labels,
|
|
488
|
-
number_of_examples=number_of_examples,
|
|
489
|
-
),
|
|
490
|
-
)
|
|
491
|
-
for score_idx in range(n_scores)
|
|
492
|
-
]
|
|
493
|
-
|
|
494
|
-
if as_dict:
|
|
495
|
-
return [m.to_dict() for m in results]
|
|
496
|
-
|
|
497
|
-
return results
|
|
282
|
+
return unpack_confusion_matrix_into_metric_list(
|
|
283
|
+
results=results,
|
|
284
|
+
score_thresholds=score_thresholds,
|
|
285
|
+
number_of_examples=number_of_examples,
|
|
286
|
+
index_to_uid=self.index_to_uid,
|
|
287
|
+
index_to_label=self.index_to_label,
|
|
288
|
+
)
|
|
498
289
|
|
|
499
290
|
def evaluate(
|
|
500
291
|
self,
|
|
@@ -502,8 +293,7 @@ class Evaluator:
|
|
|
502
293
|
hardmax: bool = True,
|
|
503
294
|
number_of_examples: int = 0,
|
|
504
295
|
filter_: Filter | None = None,
|
|
505
|
-
|
|
506
|
-
) -> dict[MetricType, list]:
|
|
296
|
+
) -> dict[MetricType, list[Metric]]:
|
|
507
297
|
"""
|
|
508
298
|
Computes a detailed confusion matrix..
|
|
509
299
|
|
|
@@ -517,29 +307,27 @@ class Evaluator:
|
|
|
517
307
|
The number of examples to return per count.
|
|
518
308
|
filter_ : Filter, optional
|
|
519
309
|
An optional filter object.
|
|
520
|
-
as_dict : bool, default=False
|
|
521
|
-
An option to return metrics as dictionaries.
|
|
522
310
|
|
|
523
311
|
Returns
|
|
524
312
|
-------
|
|
525
|
-
|
|
526
|
-
|
|
313
|
+
dict[MetricType, list[Metric]]
|
|
314
|
+
Lists of metrics organized by metric type.
|
|
527
315
|
"""
|
|
528
316
|
|
|
529
|
-
|
|
317
|
+
metrics = self.compute_precision_recall_rocauc(
|
|
530
318
|
score_thresholds=score_thresholds,
|
|
531
319
|
hardmax=hardmax,
|
|
532
320
|
filter_=filter_,
|
|
533
|
-
as_dict=as_dict,
|
|
534
321
|
)
|
|
535
|
-
|
|
322
|
+
|
|
323
|
+
metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
|
|
536
324
|
score_thresholds=score_thresholds,
|
|
537
325
|
hardmax=hardmax,
|
|
538
326
|
number_of_examples=number_of_examples,
|
|
539
327
|
filter_=filter_,
|
|
540
|
-
as_dict=as_dict,
|
|
541
328
|
)
|
|
542
|
-
|
|
329
|
+
|
|
330
|
+
return metrics
|
|
543
331
|
|
|
544
332
|
|
|
545
333
|
class DataLoader:
|