valor-lite 0.36.1__py3-none-any.whl → 0.36.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/__init__.py +3 -1
- valor_lite/classification/computation.py +25 -132
- valor_lite/classification/manager.py +18 -22
- valor_lite/classification/metric.py +0 -4
- valor_lite/classification/utilities.py +85 -103
- valor_lite/exceptions.py +3 -3
- valor_lite/object_detection/__init__.py +2 -1
- valor_lite/object_detection/manager.py +13 -15
- valor_lite/semantic_segmentation/__init__.py +2 -1
- valor_lite/semantic_segmentation/manager.py +4 -4
- {valor_lite-0.36.1.dist-info → valor_lite-0.36.3.dist-info}/METADATA +1 -1
- {valor_lite-0.36.1.dist-info → valor_lite-0.36.3.dist-info}/RECORD +14 -14
- {valor_lite-0.36.1.dist-info → valor_lite-0.36.3.dist-info}/WHEEL +0 -0
- {valor_lite-0.36.1.dist-info → valor_lite-0.36.3.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,7 @@ from .computation import (
|
|
|
3
3
|
compute_confusion_matrix,
|
|
4
4
|
compute_precision_recall_rocauc,
|
|
5
5
|
)
|
|
6
|
-
from .manager import DataLoader, Evaluator
|
|
6
|
+
from .manager import DataLoader, Evaluator, Filter, Metadata
|
|
7
7
|
from .metric import Metric, MetricType
|
|
8
8
|
|
|
9
9
|
__all__ = [
|
|
@@ -14,4 +14,6 @@ __all__ = [
|
|
|
14
14
|
"DataLoader",
|
|
15
15
|
"Evaluator",
|
|
16
16
|
"Metric",
|
|
17
|
+
"Metadata",
|
|
18
|
+
"Filter",
|
|
17
19
|
]
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from enum import IntFlag, auto
|
|
2
|
+
|
|
1
3
|
import numpy as np
|
|
2
4
|
from numpy.typing import NDArray
|
|
3
5
|
|
|
@@ -318,56 +320,20 @@ def compute_precision_recall_rocauc(
|
|
|
318
320
|
)
|
|
319
321
|
|
|
320
322
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.intp]]:
|
|
326
|
-
"""
|
|
327
|
-
Helper function for counting occurences of unique detailed pairs.
|
|
328
|
-
|
|
329
|
-
Parameters
|
|
330
|
-
----------
|
|
331
|
-
data : NDArray[np.float64]
|
|
332
|
-
A masked portion of a detailed pairs array.
|
|
333
|
-
unique_idx : int | list[int]
|
|
334
|
-
The index or indices upon which uniqueness is constrained.
|
|
335
|
-
label_idx : int | list[int]
|
|
336
|
-
The index or indices within the unique index or indices that encode labels.
|
|
337
|
-
|
|
338
|
-
Returns
|
|
339
|
-
-------
|
|
340
|
-
NDArray[np.float64]
|
|
341
|
-
Examples drawn from the data input.
|
|
342
|
-
NDArray[np.int32]
|
|
343
|
-
Unique label indices.
|
|
344
|
-
NDArray[np.intp]
|
|
345
|
-
Counts for each unique label index.
|
|
346
|
-
"""
|
|
347
|
-
unique_rows, indices = np.unique(
|
|
348
|
-
data.astype(int)[:, unique_idx],
|
|
349
|
-
return_index=True,
|
|
350
|
-
axis=0,
|
|
351
|
-
)
|
|
352
|
-
examples = data[indices]
|
|
353
|
-
labels, counts = np.unique(
|
|
354
|
-
unique_rows[:, label_idx], return_counts=True, axis=0
|
|
355
|
-
)
|
|
356
|
-
return examples, labels, counts
|
|
323
|
+
class PairClassification(IntFlag):
|
|
324
|
+
TP = auto()
|
|
325
|
+
FP_FN_MISCLF = auto()
|
|
326
|
+
FN_UNMATCHED = auto()
|
|
357
327
|
|
|
358
328
|
|
|
359
329
|
def compute_confusion_matrix(
|
|
360
330
|
detailed_pairs: NDArray[np.float64],
|
|
361
|
-
label_metadata: NDArray[np.int32],
|
|
362
331
|
score_thresholds: NDArray[np.float64],
|
|
363
332
|
hardmax: bool,
|
|
364
|
-
|
|
365
|
-
) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
|
|
333
|
+
) -> NDArray[np.uint8]:
|
|
366
334
|
"""
|
|
367
335
|
Compute detailed confusion matrix.
|
|
368
336
|
|
|
369
|
-
Takes data with shape (N, 5):
|
|
370
|
-
|
|
371
337
|
Parameters
|
|
372
338
|
----------
|
|
373
339
|
detailed_pairs : NDArray[np.float64]
|
|
@@ -377,37 +343,22 @@ def compute_confusion_matrix(
|
|
|
377
343
|
Index 2 - Prediction Label Index
|
|
378
344
|
Index 3 - Score
|
|
379
345
|
Index 4 - Hard Max Score
|
|
380
|
-
label_metadata : NDArray[np.int32]
|
|
381
|
-
A 2-D array containing metadata related to labels with shape (n_labels, 2).
|
|
382
|
-
Index 0 - GroundTruth Label Count
|
|
383
|
-
Index 1 - Prediction Label Count
|
|
384
346
|
iou_thresholds : NDArray[np.float64]
|
|
385
347
|
A 1-D array containing IOU thresholds.
|
|
386
348
|
score_thresholds : NDArray[np.float64]
|
|
387
349
|
A 1-D array containing score thresholds.
|
|
388
|
-
n_examples : int
|
|
389
|
-
The maximum number of examples to return per count.
|
|
390
350
|
|
|
391
351
|
Returns
|
|
392
352
|
-------
|
|
393
|
-
NDArray[
|
|
394
|
-
|
|
395
|
-
NDArray[np.int32]
|
|
396
|
-
Unmatched Ground Truths.
|
|
353
|
+
NDArray[uint8]
|
|
354
|
+
Row-wise classification of pairs.
|
|
397
355
|
"""
|
|
398
|
-
|
|
399
|
-
n_labels = label_metadata.shape[0]
|
|
356
|
+
n_pairs = detailed_pairs.shape[0]
|
|
400
357
|
n_scores = score_thresholds.shape[0]
|
|
401
358
|
|
|
402
|
-
|
|
403
|
-
(n_scores,
|
|
404
|
-
|
|
405
|
-
dtype=np.float32,
|
|
406
|
-
)
|
|
407
|
-
unmatched_ground_truths = np.full(
|
|
408
|
-
(n_scores, n_labels, n_examples + 1),
|
|
409
|
-
fill_value=-1,
|
|
410
|
-
dtype=np.int32,
|
|
359
|
+
pair_classifications = np.zeros(
|
|
360
|
+
(n_scores, n_pairs),
|
|
361
|
+
dtype=np.uint8,
|
|
411
362
|
)
|
|
412
363
|
|
|
413
364
|
mask_label_match = np.isclose(detailed_pairs[:, 1], detailed_pairs[:, 2])
|
|
@@ -420,9 +371,9 @@ def compute_confusion_matrix(
|
|
|
420
371
|
if hardmax:
|
|
421
372
|
mask_score &= detailed_pairs[:, 4] > 0.5
|
|
422
373
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
374
|
+
mask_true_positives = mask_label_match & mask_score
|
|
375
|
+
mask_misclassifications = ~mask_label_match & mask_score
|
|
376
|
+
mask_unmatched_groundtruths = ~(
|
|
426
377
|
(
|
|
427
378
|
groundtruths.reshape(-1, 1, 2)
|
|
428
379
|
== groundtruths[mask_score].reshape(1, -1, 2)
|
|
@@ -431,73 +382,15 @@ def compute_confusion_matrix(
|
|
|
431
382
|
.any(axis=1)
|
|
432
383
|
)
|
|
433
384
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
label_idx=1,
|
|
438
|
-
)
|
|
439
|
-
misclf_examples, misclf_labels, misclf_counts = _count_with_examples(
|
|
440
|
-
data=detailed_pairs[mask_misclf],
|
|
441
|
-
unique_idx=[0, 1, 2],
|
|
442
|
-
label_idx=[1, 2],
|
|
385
|
+
# classify pairings
|
|
386
|
+
pair_classifications[score_idx, mask_true_positives] |= np.uint8(
|
|
387
|
+
PairClassification.TP
|
|
443
388
|
)
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
unique_idx=[0, 1],
|
|
447
|
-
label_idx=1,
|
|
389
|
+
pair_classifications[score_idx, mask_misclassifications] |= np.uint8(
|
|
390
|
+
PairClassification.FP_FN_MISCLF
|
|
448
391
|
)
|
|
392
|
+
pair_classifications[
|
|
393
|
+
score_idx, mask_unmatched_groundtruths
|
|
394
|
+
] |= np.uint8(PairClassification.FN_UNMATCHED)
|
|
449
395
|
|
|
450
|
-
|
|
451
|
-
confusion_matrix[
|
|
452
|
-
score_idx, misclf_labels[:, 0], misclf_labels[:, 1], 0
|
|
453
|
-
] = misclf_counts
|
|
454
|
-
|
|
455
|
-
unmatched_ground_truths[score_idx, misprd_labels, 0] = misprd_counts
|
|
456
|
-
|
|
457
|
-
if n_examples > 0:
|
|
458
|
-
for label_idx in range(n_labels):
|
|
459
|
-
# true-positive examples
|
|
460
|
-
mask_tp_label = tp_examples[:, 2] == label_idx
|
|
461
|
-
if mask_tp_label.sum() > 0:
|
|
462
|
-
tp_label_examples = tp_examples[mask_tp_label][:n_examples]
|
|
463
|
-
confusion_matrix[
|
|
464
|
-
score_idx,
|
|
465
|
-
label_idx,
|
|
466
|
-
label_idx,
|
|
467
|
-
1 : 2 * tp_label_examples.shape[0] + 1,
|
|
468
|
-
] = tp_label_examples[:, [0, 3]].flatten()
|
|
469
|
-
|
|
470
|
-
# misclassification examples
|
|
471
|
-
mask_misclf_gt_label = misclf_examples[:, 1] == label_idx
|
|
472
|
-
if mask_misclf_gt_label.sum() > 0:
|
|
473
|
-
for pd_label_idx in range(n_labels):
|
|
474
|
-
mask_misclf_pd_label = (
|
|
475
|
-
misclf_examples[:, 2] == pd_label_idx
|
|
476
|
-
)
|
|
477
|
-
mask_misclf_label_combo = (
|
|
478
|
-
mask_misclf_gt_label & mask_misclf_pd_label
|
|
479
|
-
)
|
|
480
|
-
if mask_misclf_label_combo.sum() > 0:
|
|
481
|
-
misclf_label_examples = misclf_examples[
|
|
482
|
-
mask_misclf_label_combo
|
|
483
|
-
][:n_examples]
|
|
484
|
-
confusion_matrix[
|
|
485
|
-
score_idx,
|
|
486
|
-
label_idx,
|
|
487
|
-
pd_label_idx,
|
|
488
|
-
1 : 2 * misclf_label_examples.shape[0] + 1,
|
|
489
|
-
] = misclf_label_examples[:, [0, 3]].flatten()
|
|
490
|
-
|
|
491
|
-
# unmatched ground truth examples
|
|
492
|
-
mask_misprd_label = misprd_examples[:, 1] == label_idx
|
|
493
|
-
if misprd_examples.size > 0:
|
|
494
|
-
misprd_label_examples = misprd_examples[mask_misprd_label][
|
|
495
|
-
:n_examples
|
|
496
|
-
]
|
|
497
|
-
unmatched_ground_truths[
|
|
498
|
-
score_idx,
|
|
499
|
-
label_idx,
|
|
500
|
-
1 : misprd_label_examples.shape[0] + 1,
|
|
501
|
-
] = misprd_label_examples[:, 0].flatten()
|
|
502
|
-
|
|
503
|
-
return confusion_matrix, unmatched_ground_truths # type: ignore[reportReturnType]
|
|
396
|
+
return pair_classifications
|
|
@@ -16,7 +16,7 @@ from valor_lite.classification.utilities import (
|
|
|
16
16
|
unpack_confusion_matrix_into_metric_list,
|
|
17
17
|
unpack_precision_recall_rocauc_into_metric_lists,
|
|
18
18
|
)
|
|
19
|
-
from valor_lite.exceptions import
|
|
19
|
+
from valor_lite.exceptions import EmptyEvaluatorError, EmptyFilterError
|
|
20
20
|
|
|
21
21
|
"""
|
|
22
22
|
Usage
|
|
@@ -88,14 +88,14 @@ class Filter:
|
|
|
88
88
|
def __post_init__(self):
|
|
89
89
|
# validate datum mask
|
|
90
90
|
if not self.datum_mask.any():
|
|
91
|
-
raise
|
|
91
|
+
raise EmptyFilterError("filter removes all datums")
|
|
92
92
|
|
|
93
93
|
# validate label indices
|
|
94
94
|
if (
|
|
95
95
|
self.valid_label_indices is not None
|
|
96
96
|
and self.valid_label_indices.size == 0
|
|
97
97
|
):
|
|
98
|
-
raise
|
|
98
|
+
raise EmptyFilterError("filter removes all labels")
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
class Evaluator:
|
|
@@ -292,7 +292,6 @@ class Evaluator:
|
|
|
292
292
|
self,
|
|
293
293
|
score_thresholds: list[float] = [0.0],
|
|
294
294
|
hardmax: bool = True,
|
|
295
|
-
number_of_examples: int = 0,
|
|
296
295
|
filter_: Filter | None = None,
|
|
297
296
|
) -> list[Metric]:
|
|
298
297
|
"""
|
|
@@ -304,8 +303,6 @@ class Evaluator:
|
|
|
304
303
|
A list of score thresholds to compute metrics over.
|
|
305
304
|
hardmax : bool
|
|
306
305
|
Toggles whether a hardmax is applied to predictions.
|
|
307
|
-
number_of_examples : int, default=0
|
|
308
|
-
The number of examples to return per count.
|
|
309
306
|
filter_ : Filter, optional
|
|
310
307
|
Applies a filter to the internal cache.
|
|
311
308
|
|
|
@@ -316,25 +313,22 @@ class Evaluator:
|
|
|
316
313
|
"""
|
|
317
314
|
# apply filters
|
|
318
315
|
if filter_ is not None:
|
|
319
|
-
detailed_pairs,
|
|
316
|
+
detailed_pairs, _ = self.filter(filter_=filter_)
|
|
320
317
|
else:
|
|
321
318
|
detailed_pairs = self._detailed_pairs
|
|
322
|
-
label_metadata = self._label_metadata
|
|
323
319
|
|
|
324
320
|
if detailed_pairs.size == 0:
|
|
325
321
|
return list()
|
|
326
322
|
|
|
327
|
-
|
|
323
|
+
result = compute_confusion_matrix(
|
|
328
324
|
detailed_pairs=detailed_pairs,
|
|
329
|
-
label_metadata=label_metadata,
|
|
330
325
|
score_thresholds=np.array(score_thresholds),
|
|
331
326
|
hardmax=hardmax,
|
|
332
|
-
n_examples=number_of_examples,
|
|
333
327
|
)
|
|
334
328
|
return unpack_confusion_matrix_into_metric_list(
|
|
335
|
-
|
|
329
|
+
detailed_pairs=detailed_pairs,
|
|
330
|
+
result=result,
|
|
336
331
|
score_thresholds=score_thresholds,
|
|
337
|
-
number_of_examples=number_of_examples,
|
|
338
332
|
index_to_datum_id=self.index_to_datum_id,
|
|
339
333
|
index_to_label=self.index_to_label,
|
|
340
334
|
)
|
|
@@ -343,7 +337,6 @@ class Evaluator:
|
|
|
343
337
|
self,
|
|
344
338
|
score_thresholds: list[float] = [0.0],
|
|
345
339
|
hardmax: bool = True,
|
|
346
|
-
number_of_examples: int = 0,
|
|
347
340
|
filter_: Filter | None = None,
|
|
348
341
|
) -> dict[MetricType, list[Metric]]:
|
|
349
342
|
"""
|
|
@@ -355,8 +348,6 @@ class Evaluator:
|
|
|
355
348
|
A list of score thresholds to compute metrics over.
|
|
356
349
|
hardmax : bool
|
|
357
350
|
Toggles whether a hardmax is applied to predictions.
|
|
358
|
-
number_of_examples : int, default=0
|
|
359
|
-
The number of examples to return per count.
|
|
360
351
|
filter_ : Filter, optional
|
|
361
352
|
Applies a filter to the internal cache.
|
|
362
353
|
|
|
@@ -373,7 +364,6 @@ class Evaluator:
|
|
|
373
364
|
metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
|
|
374
365
|
score_thresholds=score_thresholds,
|
|
375
366
|
hardmax=hardmax,
|
|
376
|
-
number_of_examples=number_of_examples,
|
|
377
367
|
filter_=filter_,
|
|
378
368
|
)
|
|
379
369
|
return metrics
|
|
@@ -391,11 +381,17 @@ class Evaluator:
|
|
|
391
381
|
-------
|
|
392
382
|
int
|
|
393
383
|
The datum index.
|
|
384
|
+
|
|
385
|
+
Raises
|
|
386
|
+
------
|
|
387
|
+
ValueError
|
|
388
|
+
If datum id already exists.
|
|
394
389
|
"""
|
|
395
|
-
if uid
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
390
|
+
if uid in self.datum_id_to_index:
|
|
391
|
+
raise ValueError("datum with id '{uid}' already exists")
|
|
392
|
+
index = len(self.datum_id_to_index)
|
|
393
|
+
self.datum_id_to_index[uid] = index
|
|
394
|
+
self.index_to_datum_id.append(uid)
|
|
399
395
|
return self.datum_id_to_index[uid]
|
|
400
396
|
|
|
401
397
|
def _add_label(self, label: str) -> int:
|
|
@@ -497,7 +493,7 @@ class Evaluator:
|
|
|
497
493
|
A ready-to-use evaluator object.
|
|
498
494
|
"""
|
|
499
495
|
if self._detailed_pairs.size == 0:
|
|
500
|
-
raise
|
|
496
|
+
raise EmptyEvaluatorError()
|
|
501
497
|
|
|
502
498
|
self._label_metadata = compute_label_metadata(
|
|
503
499
|
ids=self._detailed_pairs[:, :3].astype(np.int32),
|
|
@@ -329,7 +329,6 @@ class Metric(BaseMetric):
|
|
|
329
329
|
],
|
|
330
330
|
],
|
|
331
331
|
score_threshold: float,
|
|
332
|
-
maximum_number_of_examples: int,
|
|
333
332
|
):
|
|
334
333
|
"""
|
|
335
334
|
The confusion matrix and related metrics for the classification task.
|
|
@@ -382,8 +381,6 @@ class Metric(BaseMetric):
|
|
|
382
381
|
Each example includes the datum UID.
|
|
383
382
|
score_threshold : float
|
|
384
383
|
The confidence score threshold used to filter predictions.
|
|
385
|
-
maximum_number_of_examples : int
|
|
386
|
-
The maximum number of examples per element.
|
|
387
384
|
|
|
388
385
|
Returns
|
|
389
386
|
-------
|
|
@@ -397,6 +394,5 @@ class Metric(BaseMetric):
|
|
|
397
394
|
},
|
|
398
395
|
parameters={
|
|
399
396
|
"score_threshold": score_threshold,
|
|
400
|
-
"maximum_number_of_examples": maximum_number_of_examples,
|
|
401
397
|
},
|
|
402
398
|
)
|
|
@@ -3,6 +3,7 @@ from collections import defaultdict
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from numpy.typing import NDArray
|
|
5
5
|
|
|
6
|
+
from valor_lite.classification.computation import PairClassification
|
|
6
7
|
from valor_lite.classification.metric import Metric, MetricType
|
|
7
8
|
|
|
8
9
|
|
|
@@ -101,129 +102,110 @@ def unpack_precision_recall_rocauc_into_metric_lists(
|
|
|
101
102
|
return metrics
|
|
102
103
|
|
|
103
104
|
|
|
104
|
-
def
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
116
|
-
confusion_matrix[
|
|
117
|
-
gt_label_idx,
|
|
118
|
-
pd_label_idx,
|
|
119
|
-
example_idx * 2 + 1,
|
|
120
|
-
]
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
124
|
-
confusion_matrix[
|
|
125
|
-
gt_label_idx,
|
|
126
|
-
pd_label_idx,
|
|
127
|
-
example_idx * 2 + 2,
|
|
128
|
-
]
|
|
105
|
+
def _create_empty_confusion_matrix(index_to_labels: list[str]):
|
|
106
|
+
unmatched_ground_truths = dict()
|
|
107
|
+
confusion_matrix = dict()
|
|
108
|
+
for label in index_to_labels:
|
|
109
|
+
unmatched_ground_truths[label] = {"count": 0, "examples": []}
|
|
110
|
+
confusion_matrix[label] = {}
|
|
111
|
+
for plabel in index_to_labels:
|
|
112
|
+
confusion_matrix[label][plabel] = {"count": 0, "examples": []}
|
|
113
|
+
return (
|
|
114
|
+
confusion_matrix,
|
|
115
|
+
unmatched_ground_truths,
|
|
129
116
|
)
|
|
130
117
|
|
|
131
|
-
return {
|
|
132
|
-
index_to_label[gt_label_idx]: {
|
|
133
|
-
index_to_label[pd_label_idx]: {
|
|
134
|
-
"count": max(
|
|
135
|
-
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
136
|
-
0,
|
|
137
|
-
),
|
|
138
|
-
"examples": [
|
|
139
|
-
{
|
|
140
|
-
"datum_id": index_to_datum_id[
|
|
141
|
-
datum_idx(gt_label_idx, pd_label_idx, example_idx)
|
|
142
|
-
],
|
|
143
|
-
"score": score_idx(
|
|
144
|
-
gt_label_idx, pd_label_idx, example_idx
|
|
145
|
-
),
|
|
146
|
-
}
|
|
147
|
-
for example_idx in range(number_of_examples)
|
|
148
|
-
if datum_idx(gt_label_idx, pd_label_idx, example_idx) >= 0
|
|
149
|
-
],
|
|
150
|
-
}
|
|
151
|
-
for pd_label_idx in range(number_of_labels)
|
|
152
|
-
}
|
|
153
|
-
for gt_label_idx in range(number_of_labels)
|
|
154
|
-
}
|
|
155
|
-
|
|
156
118
|
|
|
157
|
-
def
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
119
|
+
def _unpack_confusion_matrix(
|
|
120
|
+
ids: NDArray[np.int32],
|
|
121
|
+
scores: NDArray[np.float64],
|
|
122
|
+
mask_matched: NDArray[np.bool_],
|
|
123
|
+
mask_fn_unmatched: NDArray[np.bool_],
|
|
161
124
|
index_to_datum_id: list[str],
|
|
162
125
|
index_to_label: list[str],
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
)
|
|
126
|
+
score_threshold: float,
|
|
127
|
+
):
|
|
128
|
+
(
|
|
129
|
+
confusion_matrix,
|
|
130
|
+
unmatched_ground_truths,
|
|
131
|
+
) = _create_empty_confusion_matrix(index_to_label)
|
|
132
|
+
|
|
133
|
+
unique_matches, unique_match_indices = np.unique(
|
|
134
|
+
ids[np.ix_(mask_matched, (0, 1, 2))], # type: ignore - numpy ix_ typing
|
|
135
|
+
axis=0,
|
|
136
|
+
return_index=True,
|
|
175
137
|
)
|
|
138
|
+
(
|
|
139
|
+
unique_unmatched_groundtruths,
|
|
140
|
+
unique_unmatched_groundtruth_indices,
|
|
141
|
+
) = np.unique(
|
|
142
|
+
ids[np.ix_(mask_fn_unmatched, (0, 1))], # type: ignore - numpy ix_ typing
|
|
143
|
+
axis=0,
|
|
144
|
+
return_index=True,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
n_matched = unique_matches.shape[0]
|
|
148
|
+
n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
|
|
149
|
+
n_max = max(n_matched, n_unmatched_groundtruths)
|
|
176
150
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
151
|
+
for idx in range(n_max):
|
|
152
|
+
if idx < n_matched:
|
|
153
|
+
glabel = index_to_label[unique_matches[idx, 1]]
|
|
154
|
+
plabel = index_to_label[unique_matches[idx, 2]]
|
|
155
|
+
confusion_matrix[glabel][plabel]["count"] += 1
|
|
156
|
+
confusion_matrix[glabel][plabel]["examples"].append(
|
|
157
|
+
{
|
|
158
|
+
"datum_id": index_to_datum_id[unique_matches[idx, 0]],
|
|
159
|
+
"score": float(scores[unique_match_indices[idx]]),
|
|
160
|
+
}
|
|
161
|
+
)
|
|
162
|
+
if idx < n_unmatched_groundtruths:
|
|
163
|
+
label = index_to_label[unique_unmatched_groundtruths[idx, 1]]
|
|
164
|
+
unmatched_ground_truths[label]["count"] += 1
|
|
165
|
+
unmatched_ground_truths[label]["examples"].append(
|
|
184
166
|
{
|
|
185
167
|
"datum_id": index_to_datum_id[
|
|
186
|
-
|
|
187
|
-
]
|
|
168
|
+
unique_unmatched_groundtruths[idx, 0]
|
|
169
|
+
],
|
|
188
170
|
}
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return Metric.confusion_matrix(
|
|
174
|
+
confusion_matrix=confusion_matrix,
|
|
175
|
+
unmatched_ground_truths=unmatched_ground_truths,
|
|
176
|
+
score_threshold=score_threshold,
|
|
177
|
+
)
|
|
195
178
|
|
|
196
179
|
|
|
197
180
|
def unpack_confusion_matrix_into_metric_list(
|
|
198
|
-
|
|
181
|
+
result: NDArray[np.uint8],
|
|
182
|
+
detailed_pairs: NDArray[np.float64],
|
|
199
183
|
score_thresholds: list[float],
|
|
200
|
-
number_of_examples: int,
|
|
201
184
|
index_to_datum_id: list[str],
|
|
202
185
|
index_to_label: list[str],
|
|
203
186
|
) -> list[Metric]:
|
|
204
187
|
|
|
205
|
-
|
|
206
|
-
|
|
188
|
+
ids = detailed_pairs[:, :3].astype(np.int32)
|
|
189
|
+
|
|
190
|
+
mask_matched = (
|
|
191
|
+
np.bitwise_and(
|
|
192
|
+
result, PairClassification.TP | PairClassification.FP_FN_MISCLF
|
|
193
|
+
)
|
|
194
|
+
> 0
|
|
195
|
+
)
|
|
196
|
+
mask_fn_unmatched = (
|
|
197
|
+
np.bitwise_and(result, PairClassification.FN_UNMATCHED) > 0
|
|
198
|
+
)
|
|
199
|
+
|
|
207
200
|
return [
|
|
208
|
-
|
|
201
|
+
_unpack_confusion_matrix(
|
|
202
|
+
ids=ids,
|
|
203
|
+
scores=detailed_pairs[:, 3],
|
|
204
|
+
mask_matched=mask_matched[score_idx, :],
|
|
205
|
+
mask_fn_unmatched=mask_fn_unmatched[score_idx, :],
|
|
206
|
+
index_to_datum_id=index_to_datum_id,
|
|
207
|
+
index_to_label=index_to_label,
|
|
209
208
|
score_threshold=score_threshold,
|
|
210
|
-
maximum_number_of_examples=number_of_examples,
|
|
211
|
-
confusion_matrix=_unpack_confusion_matrix_value(
|
|
212
|
-
confusion_matrix=confusion_matrix[score_idx, :, :, :],
|
|
213
|
-
number_of_labels=n_labels,
|
|
214
|
-
number_of_examples=number_of_examples,
|
|
215
|
-
index_to_label=index_to_label,
|
|
216
|
-
index_to_datum_id=index_to_datum_id,
|
|
217
|
-
),
|
|
218
|
-
unmatched_ground_truths=_unpack_unmatched_ground_truths_value(
|
|
219
|
-
unmatched_ground_truths=unmatched_ground_truths[
|
|
220
|
-
score_idx, :, :
|
|
221
|
-
],
|
|
222
|
-
number_of_labels=n_labels,
|
|
223
|
-
number_of_examples=number_of_examples,
|
|
224
|
-
index_to_label=index_to_label,
|
|
225
|
-
index_to_datum_id=index_to_datum_id,
|
|
226
|
-
),
|
|
227
209
|
)
|
|
228
210
|
for score_idx, score_threshold in enumerate(score_thresholds)
|
|
229
211
|
]
|
valor_lite/exceptions.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
class
|
|
1
|
+
class EmptyEvaluatorError(Exception):
|
|
2
2
|
def __init__(self):
|
|
3
3
|
super().__init__(
|
|
4
4
|
"evaluator cannot be finalized as it contains no data"
|
|
5
5
|
)
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
class
|
|
8
|
+
class EmptyFilterError(Exception):
|
|
9
9
|
def __init__(self, message: str):
|
|
10
10
|
super().__init__(message)
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class
|
|
13
|
+
class InternalCacheError(Exception):
|
|
14
14
|
def __init__(self, message: str):
|
|
15
15
|
super().__init__(message)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .annotation import Bitmask, BoundingBox, Detection, Polygon
|
|
2
|
-
from .manager import DataLoader, Evaluator, Filter
|
|
2
|
+
from .manager import DataLoader, Evaluator, Filter, Metadata
|
|
3
3
|
from .metric import Metric, MetricType
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
@@ -12,4 +12,5 @@ __all__ = [
|
|
|
12
12
|
"DataLoader",
|
|
13
13
|
"Evaluator",
|
|
14
14
|
"Filter",
|
|
15
|
+
"Metadata",
|
|
15
16
|
]
|
|
@@ -6,9 +6,9 @@ from numpy.typing import NDArray
|
|
|
6
6
|
from tqdm import tqdm
|
|
7
7
|
|
|
8
8
|
from valor_lite.exceptions import (
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
EmptyEvaluatorError,
|
|
10
|
+
EmptyFilterError,
|
|
11
|
+
InternalCacheError,
|
|
12
12
|
)
|
|
13
13
|
from valor_lite.object_detection.annotation import (
|
|
14
14
|
Bitmask,
|
|
@@ -102,13 +102,13 @@ class Filter:
|
|
|
102
102
|
def __post_init__(self):
|
|
103
103
|
# validate datums mask
|
|
104
104
|
if not self.mask_datums.any():
|
|
105
|
-
raise
|
|
105
|
+
raise EmptyFilterError("filter removes all datums")
|
|
106
106
|
|
|
107
107
|
# validate annotation masks
|
|
108
108
|
no_gts = self.mask_groundtruths.all()
|
|
109
109
|
no_pds = self.mask_predictions.all()
|
|
110
110
|
if no_gts and no_pds:
|
|
111
|
-
raise
|
|
111
|
+
raise EmptyFilterError("filter removes all annotations")
|
|
112
112
|
elif no_gts:
|
|
113
113
|
warnings.warn("filter removes all ground truths")
|
|
114
114
|
elif no_pds:
|
|
@@ -197,7 +197,7 @@ class Evaluator:
|
|
|
197
197
|
# filter datums
|
|
198
198
|
if datum_ids is not None:
|
|
199
199
|
if not datum_ids:
|
|
200
|
-
raise
|
|
200
|
+
raise EmptyFilterError("filter removes all datums")
|
|
201
201
|
valid_datum_indices = np.array(
|
|
202
202
|
[self.datum_id_to_index[uid] for uid in datum_ids],
|
|
203
203
|
dtype=np.int32,
|
|
@@ -240,7 +240,7 @@ class Evaluator:
|
|
|
240
240
|
# filter by labels
|
|
241
241
|
if labels is not None:
|
|
242
242
|
if not labels:
|
|
243
|
-
raise
|
|
243
|
+
raise EmptyFilterError("filter removes all labels")
|
|
244
244
|
valid_label_indices = np.array(
|
|
245
245
|
[self.label_to_index[label] for label in labels] + [-1]
|
|
246
246
|
)
|
|
@@ -467,7 +467,7 @@ class DataLoader:
|
|
|
467
467
|
if len(self._evaluator.datum_id_to_index) != len(
|
|
468
468
|
self._evaluator.index_to_datum_id
|
|
469
469
|
):
|
|
470
|
-
raise
|
|
470
|
+
raise InternalCacheError("datum cache size mismatch")
|
|
471
471
|
idx = len(self._evaluator.datum_id_to_index)
|
|
472
472
|
self._evaluator.datum_id_to_index[datum_id] = idx
|
|
473
473
|
self._evaluator.index_to_datum_id.append(datum_id)
|
|
@@ -491,9 +491,7 @@ class DataLoader:
|
|
|
491
491
|
if len(self._evaluator.groundtruth_id_to_index) != len(
|
|
492
492
|
self._evaluator.index_to_groundtruth_id
|
|
493
493
|
):
|
|
494
|
-
raise
|
|
495
|
-
"ground truth cache size mismatch"
|
|
496
|
-
)
|
|
494
|
+
raise InternalCacheError("ground truth cache size mismatch")
|
|
497
495
|
idx = len(self._evaluator.groundtruth_id_to_index)
|
|
498
496
|
self._evaluator.groundtruth_id_to_index[annotation_id] = idx
|
|
499
497
|
self._evaluator.index_to_groundtruth_id.append(annotation_id)
|
|
@@ -517,7 +515,7 @@ class DataLoader:
|
|
|
517
515
|
if len(self._evaluator.prediction_id_to_index) != len(
|
|
518
516
|
self._evaluator.index_to_prediction_id
|
|
519
517
|
):
|
|
520
|
-
raise
|
|
518
|
+
raise InternalCacheError("prediction cache size mismatch")
|
|
521
519
|
idx = len(self._evaluator.prediction_id_to_index)
|
|
522
520
|
self._evaluator.prediction_id_to_index[annotation_id] = idx
|
|
523
521
|
self._evaluator.index_to_prediction_id.append(annotation_id)
|
|
@@ -542,7 +540,7 @@ class DataLoader:
|
|
|
542
540
|
if len(self._evaluator.label_to_index) != len(
|
|
543
541
|
self._evaluator.index_to_label
|
|
544
542
|
):
|
|
545
|
-
raise
|
|
543
|
+
raise InternalCacheError("label cache size mismatch")
|
|
546
544
|
self._evaluator.label_to_index[label] = label_id
|
|
547
545
|
self._evaluator.index_to_label.append(label)
|
|
548
546
|
label_id += 1
|
|
@@ -768,14 +766,14 @@ class DataLoader:
|
|
|
768
766
|
A ready-to-use evaluator object.
|
|
769
767
|
"""
|
|
770
768
|
if not self.pairs:
|
|
771
|
-
raise
|
|
769
|
+
raise EmptyEvaluatorError()
|
|
772
770
|
|
|
773
771
|
n_labels = len(self._evaluator.index_to_label)
|
|
774
772
|
n_datums = len(self._evaluator.index_to_datum_id)
|
|
775
773
|
|
|
776
774
|
self._evaluator._detailed_pairs = np.concatenate(self.pairs, axis=0)
|
|
777
775
|
if self._evaluator._detailed_pairs.size == 0:
|
|
778
|
-
raise
|
|
776
|
+
raise EmptyEvaluatorError()
|
|
779
777
|
|
|
780
778
|
# order pairs by descending score, iou
|
|
781
779
|
indices = np.lexsort(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .annotation import Bitmask, Segmentation
|
|
2
|
-
from .manager import DataLoader, Evaluator, Filter
|
|
2
|
+
from .manager import DataLoader, Evaluator, Filter, Metadata
|
|
3
3
|
from .metric import Metric, MetricType
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
@@ -10,4 +10,5 @@ __all__ = [
|
|
|
10
10
|
"Metric",
|
|
11
11
|
"MetricType",
|
|
12
12
|
"Filter",
|
|
13
|
+
"Metadata",
|
|
13
14
|
]
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
from numpy.typing import NDArray
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
7
|
-
from valor_lite.exceptions import
|
|
7
|
+
from valor_lite.exceptions import EmptyEvaluatorError, EmptyFilterError
|
|
8
8
|
from valor_lite.semantic_segmentation.annotation import Segmentation
|
|
9
9
|
from valor_lite.semantic_segmentation.computation import (
|
|
10
10
|
compute_intermediate_confusion_matrices,
|
|
@@ -74,11 +74,11 @@ class Filter:
|
|
|
74
74
|
def __post_init__(self):
|
|
75
75
|
# validate datum mask
|
|
76
76
|
if not self.datum_mask.any():
|
|
77
|
-
raise
|
|
77
|
+
raise EmptyFilterError("filter removes all datums")
|
|
78
78
|
|
|
79
79
|
# validate label mask
|
|
80
80
|
if self.label_mask.all():
|
|
81
|
-
raise
|
|
81
|
+
raise EmptyFilterError("filter removes all labels")
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
class Evaluator:
|
|
@@ -403,7 +403,7 @@ class DataLoader:
|
|
|
403
403
|
"""
|
|
404
404
|
|
|
405
405
|
if len(self.matrices) == 0:
|
|
406
|
-
raise
|
|
406
|
+
raise EmptyEvaluatorError()
|
|
407
407
|
|
|
408
408
|
n_labels = len(self._evaluator.index_to_label)
|
|
409
409
|
n_datums = len(self._evaluator.index_to_datum_id)
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
valor_lite/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
|
|
2
2
|
valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
valor_lite/exceptions.py,sha256=
|
|
3
|
+
valor_lite/exceptions.py,sha256=Q0PLMu0PnCPBx438iEPzpOQyMOcMOA3lOf5xQZP_yYU,385
|
|
4
4
|
valor_lite/profiling.py,sha256=TLIROA1qccFw9NoEkMeQcrvvGGO75c4K5yTIWoCUix8,11746
|
|
5
5
|
valor_lite/schemas.py,sha256=pB0MrPx5qFLbwBWDiOUUm-vmXdWvbJLFCBmKgbcbI5g,198
|
|
6
|
-
valor_lite/classification/__init__.py,sha256=
|
|
6
|
+
valor_lite/classification/__init__.py,sha256=KXaVwyqAbeeeEq7bzNPyt4GTpbxhrABjV7lR58KR6Y4,440
|
|
7
7
|
valor_lite/classification/annotation.py,sha256=0aUOvcwBAZgiNOJuyh-pXyNTG7vP7r8CUfnU3OmpUwQ,1113
|
|
8
|
-
valor_lite/classification/computation.py,sha256=
|
|
9
|
-
valor_lite/classification/manager.py,sha256=
|
|
10
|
-
valor_lite/classification/metric.py,sha256=
|
|
8
|
+
valor_lite/classification/computation.py,sha256=kB5n-RHzDsKG75Guvgg25xAOeLEQCq1TgjwHwfwbQ60,12010
|
|
9
|
+
valor_lite/classification/manager.py,sha256=OGLtdTkOlhyU5zpjRFy4c3zqZ2Lt3SV38J6hzbAXrJY,15905
|
|
10
|
+
valor_lite/classification/metric.py,sha256=nSNWjoxQ1ou7gxTPOYxLNoUYf7avKQzJq3NHR9jzM48,11693
|
|
11
11
|
valor_lite/classification/numpy_compatibility.py,sha256=roqtTetsm1_HxuaejrthQdydjsRIy-FpXpGb86cLh_E,365
|
|
12
|
-
valor_lite/classification/utilities.py,sha256=
|
|
13
|
-
valor_lite/object_detection/__init__.py,sha256=
|
|
12
|
+
valor_lite/classification/utilities.py,sha256=jAcir7dW-o4I2gk_NEmlRr8j8Iniyyq9QT5j3PMxVHk,6435
|
|
13
|
+
valor_lite/object_detection/__init__.py,sha256=eSrVAOpSykk1CfHXIKy1necplonUGxjyVKyDQ5UZoBQ,343
|
|
14
14
|
valor_lite/object_detection/annotation.py,sha256=LVec-rIk408LuFxcOoIkPk0QZMWSSxbmsady4wapC1s,7007
|
|
15
15
|
valor_lite/object_detection/computation.py,sha256=njLN-1_yql56NSVxY4KGKohxJUIStPYczVTpEpj5geA,24478
|
|
16
|
-
valor_lite/object_detection/manager.py,sha256
|
|
16
|
+
valor_lite/object_detection/manager.py,sha256=-DayHeOirI23pYCp2SqMv4EmsqYuADR95qXt1JMoNo8,27444
|
|
17
17
|
valor_lite/object_detection/metric.py,sha256=sUYSZwXYfIyfmXG6_7Tje1_ZL_QwvecPq85jrGmwOWE,22739
|
|
18
18
|
valor_lite/object_detection/utilities.py,sha256=tNdv5dL7JhzOamGQkZ8x3ocZoTwPI6K8rcRAGMhp2nc,11217
|
|
19
|
-
valor_lite/semantic_segmentation/__init__.py,sha256=
|
|
19
|
+
valor_lite/semantic_segmentation/__init__.py,sha256=3YdItCThY_tW23IChCBm-R0zahnbZ06JDVjs-gQLVes,293
|
|
20
20
|
valor_lite/semantic_segmentation/annotation.py,sha256=XRMV32Sx9A1bAVMFQdBGc3tN5Xz2RfmlyKGXCzdee7A,3705
|
|
21
21
|
valor_lite/semantic_segmentation/benchmark.py,sha256=uxd0SiDY3npsgU5pdeT4HvNP_au9GVRWzoqT6br9DtM,5961
|
|
22
22
|
valor_lite/semantic_segmentation/computation.py,sha256=ZO0qAFmq8lN73UjCyiynSv18qQDtn35FNOmvuXY4rOw,7380
|
|
23
|
-
valor_lite/semantic_segmentation/manager.py,sha256=
|
|
23
|
+
valor_lite/semantic_segmentation/manager.py,sha256=NAm7u1HNwPnBbZBSMX_A51f5jUB578-4ZDmBbpeAe7M,13299
|
|
24
24
|
valor_lite/semantic_segmentation/metric.py,sha256=T9RfPJf4WgqGQTXYvSy08vJG5bjXXJnyYZeW0mlxMa8,7132
|
|
25
25
|
valor_lite/semantic_segmentation/utilities.py,sha256=zgVmV8nyKWQK-T4Ov8cZFQzOmTKc5EL7errKFvc2H0g,2957
|
|
26
26
|
valor_lite/text_generation/__init__.py,sha256=pGhpWCSZjLM0pPHCtPykAfos55B8ie3mi9EzbNxfj-U,356
|
|
@@ -35,7 +35,7 @@ valor_lite/text_generation/llm/instructions.py,sha256=fz2onBZZWcl5W8iy7zEWkPGU9N
|
|
|
35
35
|
valor_lite/text_generation/llm/integrations.py,sha256=-rTfdAjq1zH-4ixwYuMQEOQ80pIFzMTe0BYfroVx3Pg,6974
|
|
36
36
|
valor_lite/text_generation/llm/utilities.py,sha256=bjqatGgtVTcl1PrMwiDKTYPGJXKrBrx7PDtzIblGSys,1178
|
|
37
37
|
valor_lite/text_generation/llm/validators.py,sha256=Wzr5RlfF58_2wOU-uTw7C8skan_fYdhy4Gfn0jSJ8HM,2700
|
|
38
|
-
valor_lite-0.36.
|
|
39
|
-
valor_lite-0.36.
|
|
40
|
-
valor_lite-0.36.
|
|
41
|
-
valor_lite-0.36.
|
|
38
|
+
valor_lite-0.36.3.dist-info/METADATA,sha256=My77v0oKGoLJuhNu26YD_Q2dKJGTLTbmsJtYGwGDomA,5071
|
|
39
|
+
valor_lite-0.36.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
valor_lite-0.36.3.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
|
|
41
|
+
valor_lite-0.36.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|