valor-lite 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valor_lite/cache/__init__.py +11 -0
- valor_lite/cache/compute.py +211 -0
- valor_lite/cache/ephemeral.py +302 -0
- valor_lite/cache/persistent.py +536 -0
- valor_lite/classification/__init__.py +5 -10
- valor_lite/classification/annotation.py +4 -0
- valor_lite/classification/computation.py +233 -251
- valor_lite/classification/evaluator.py +882 -0
- valor_lite/classification/loader.py +97 -0
- valor_lite/classification/metric.py +141 -4
- valor_lite/classification/shared.py +184 -0
- valor_lite/classification/utilities.py +221 -118
- valor_lite/exceptions.py +5 -0
- valor_lite/object_detection/__init__.py +5 -4
- valor_lite/object_detection/annotation.py +13 -1
- valor_lite/object_detection/computation.py +367 -304
- valor_lite/object_detection/evaluator.py +804 -0
- valor_lite/object_detection/loader.py +292 -0
- valor_lite/object_detection/metric.py +152 -3
- valor_lite/object_detection/shared.py +206 -0
- valor_lite/object_detection/utilities.py +182 -109
- valor_lite/semantic_segmentation/__init__.py +5 -4
- valor_lite/semantic_segmentation/annotation.py +7 -0
- valor_lite/semantic_segmentation/computation.py +20 -110
- valor_lite/semantic_segmentation/evaluator.py +414 -0
- valor_lite/semantic_segmentation/loader.py +205 -0
- valor_lite/semantic_segmentation/shared.py +149 -0
- valor_lite/semantic_segmentation/utilities.py +6 -23
- {valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
- valor_lite-0.37.5.dist-info/RECORD +49 -0
- {valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
- valor_lite/classification/manager.py +0 -545
- valor_lite/object_detection/manager.py +0 -865
- valor_lite/profiling.py +0 -374
- valor_lite/semantic_segmentation/benchmark.py +0 -237
- valor_lite/semantic_segmentation/manager.py +0 -446
- valor_lite-0.36.5.dist-info/RECORD +0 -41
- {valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0
|
@@ -2,93 +2,13 @@ import numpy as np
|
|
|
2
2
|
from numpy.typing import NDArray
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
def
|
|
6
|
-
confusion_matrices: NDArray[np.int64],
|
|
7
|
-
n_labels: int,
|
|
8
|
-
) -> NDArray[np.int64]:
|
|
9
|
-
"""
|
|
10
|
-
Computes label metadata returning a count of annotations per label.
|
|
11
|
-
|
|
12
|
-
Parameters
|
|
13
|
-
----------
|
|
14
|
-
confusion_matrices : NDArray[np.int64]
|
|
15
|
-
Confusion matrices per datum with shape (n_datums, n_labels + 1, n_labels + 1).
|
|
16
|
-
n_labels : int
|
|
17
|
-
The total number of unique labels.
|
|
18
|
-
|
|
19
|
-
Returns
|
|
20
|
-
-------
|
|
21
|
-
NDArray[np.int64]
|
|
22
|
-
The label metadata array with shape (n_labels, 2).
|
|
23
|
-
Index 0 - Ground truth label count
|
|
24
|
-
Index 1 - Prediction label count
|
|
25
|
-
"""
|
|
26
|
-
label_metadata = np.zeros((n_labels, 2), dtype=np.int64)
|
|
27
|
-
label_metadata[:, 0] = confusion_matrices[:, 1:, :].sum(axis=(0, 2))
|
|
28
|
-
label_metadata[:, 1] = confusion_matrices[:, :, 1:].sum(axis=(0, 1))
|
|
29
|
-
return label_metadata
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def filter_cache(
|
|
33
|
-
confusion_matrices: NDArray[np.int64],
|
|
34
|
-
datum_mask: NDArray[np.bool_],
|
|
35
|
-
label_mask: NDArray[np.bool_],
|
|
36
|
-
number_of_labels: int,
|
|
37
|
-
) -> tuple[NDArray[np.int64], NDArray[np.int64]]:
|
|
38
|
-
"""
|
|
39
|
-
Performs the filter operation over the internal cache.
|
|
40
|
-
|
|
41
|
-
Parameters
|
|
42
|
-
----------
|
|
43
|
-
confusion_matrices : NDArray[int64]
|
|
44
|
-
The internal evaluator cache.
|
|
45
|
-
datum_mask : NDArray[bool]
|
|
46
|
-
A mask that filters out datums.
|
|
47
|
-
datum_mask : NDArray[bool]
|
|
48
|
-
A mask that filters out labels.
|
|
49
|
-
|
|
50
|
-
Returns
|
|
51
|
-
-------
|
|
52
|
-
NDArray[int64]
|
|
53
|
-
Filtered confusion matrices.
|
|
54
|
-
NDArray[int64]
|
|
55
|
-
Filtered label metadata.
|
|
56
|
-
"""
|
|
57
|
-
if label_mask.any():
|
|
58
|
-
# add filtered labels to background
|
|
59
|
-
null_predictions = confusion_matrices[:, label_mask, :].sum(
|
|
60
|
-
axis=(1, 2)
|
|
61
|
-
)
|
|
62
|
-
null_groundtruths = confusion_matrices[:, :, label_mask].sum(
|
|
63
|
-
axis=(1, 2)
|
|
64
|
-
)
|
|
65
|
-
null_intersection = (
|
|
66
|
-
confusion_matrices[:, label_mask, label_mask]
|
|
67
|
-
.reshape(confusion_matrices.shape[0], -1)
|
|
68
|
-
.sum(axis=1)
|
|
69
|
-
)
|
|
70
|
-
confusion_matrices[:, 0, 0] += (
|
|
71
|
-
null_groundtruths + null_predictions - null_intersection
|
|
72
|
-
)
|
|
73
|
-
confusion_matrices[:, label_mask, :] = 0
|
|
74
|
-
confusion_matrices[:, :, label_mask] = 0
|
|
75
|
-
|
|
76
|
-
confusion_matrices = confusion_matrices[datum_mask]
|
|
77
|
-
|
|
78
|
-
label_metadata = compute_label_metadata(
|
|
79
|
-
confusion_matrices=confusion_matrices,
|
|
80
|
-
n_labels=number_of_labels,
|
|
81
|
-
)
|
|
82
|
-
return confusion_matrices, label_metadata
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def compute_intermediate_confusion_matrices(
|
|
5
|
+
def compute_intermediates(
|
|
86
6
|
groundtruths: NDArray[np.bool_],
|
|
87
7
|
predictions: NDArray[np.bool_],
|
|
88
8
|
groundtruth_labels: NDArray[np.int64],
|
|
89
9
|
prediction_labels: NDArray[np.int64],
|
|
90
10
|
n_labels: int,
|
|
91
|
-
) -> NDArray[np.
|
|
11
|
+
) -> NDArray[np.uint64]:
|
|
92
12
|
"""
|
|
93
13
|
Computes an intermediate confusion matrix containing label counts.
|
|
94
14
|
|
|
@@ -99,15 +19,15 @@ def compute_intermediate_confusion_matrices(
|
|
|
99
19
|
predictions : NDArray[np.bool_]
|
|
100
20
|
A 2-D array containing flattened bitmasks for each label.
|
|
101
21
|
groundtruth_labels : NDArray[np.int64]
|
|
102
|
-
A 1-D array containing label indices.
|
|
103
|
-
|
|
104
|
-
A 1-D array containing label indices.
|
|
22
|
+
A 1-D array containing ground truth label indices.
|
|
23
|
+
prediction_labels : NDArray[np.int64]
|
|
24
|
+
A 1-D array containing prediction label indices.
|
|
105
25
|
n_labels : int
|
|
106
26
|
The number of unique labels.
|
|
107
27
|
|
|
108
28
|
Returns
|
|
109
29
|
-------
|
|
110
|
-
NDArray[np.
|
|
30
|
+
NDArray[np.uint64]
|
|
111
31
|
A 2-D confusion matrix with shape (n_labels + 1, n_labels + 1).
|
|
112
32
|
"""
|
|
113
33
|
|
|
@@ -125,7 +45,7 @@ def compute_intermediate_confusion_matrices(
|
|
|
125
45
|
intersected_groundtruth_counts = intersection_counts.sum(axis=1)
|
|
126
46
|
intersected_prediction_counts = intersection_counts.sum(axis=0)
|
|
127
47
|
|
|
128
|
-
confusion_matrix = np.zeros((n_labels + 1, n_labels + 1), dtype=np.
|
|
48
|
+
confusion_matrix = np.zeros((n_labels + 1, n_labels + 1), dtype=np.uint64)
|
|
129
49
|
confusion_matrix[0, 0] = background_counts
|
|
130
50
|
confusion_matrix[
|
|
131
51
|
np.ix_(groundtruth_labels + 1, prediction_labels + 1)
|
|
@@ -136,14 +56,11 @@ def compute_intermediate_confusion_matrices(
|
|
|
136
56
|
confusion_matrix[groundtruth_labels + 1, 0] = (
|
|
137
57
|
groundtruth_counts - intersected_groundtruth_counts
|
|
138
58
|
)
|
|
139
|
-
|
|
140
59
|
return confusion_matrix
|
|
141
60
|
|
|
142
61
|
|
|
143
62
|
def compute_metrics(
|
|
144
|
-
|
|
145
|
-
label_metadata: NDArray[np.int64],
|
|
146
|
-
n_pixels: int,
|
|
63
|
+
confusion_matrix: NDArray[np.uint64],
|
|
147
64
|
) -> tuple[
|
|
148
65
|
NDArray[np.float64],
|
|
149
66
|
NDArray[np.float64],
|
|
@@ -156,16 +73,10 @@ def compute_metrics(
|
|
|
156
73
|
"""
|
|
157
74
|
Computes semantic segmentation metrics.
|
|
158
75
|
|
|
159
|
-
Takes data with shape (3, N).
|
|
160
|
-
|
|
161
76
|
Parameters
|
|
162
77
|
----------
|
|
163
|
-
|
|
164
|
-
A
|
|
165
|
-
label_metadata : NDArray[np.int64]
|
|
166
|
-
A 2-D array containing label metadata with shape (n_labels, 2).
|
|
167
|
-
Index 0: Ground Truth Label Count
|
|
168
|
-
Index 1: Prediction Label Count
|
|
78
|
+
counts : NDArray[np.uint64]
|
|
79
|
+
A 2-D confusion matrix with shape (n_labels + 1, n_labels + 1).
|
|
169
80
|
|
|
170
81
|
Returns
|
|
171
82
|
-------
|
|
@@ -184,14 +95,13 @@ def compute_metrics(
|
|
|
184
95
|
NDArray[np.float64]
|
|
185
96
|
Unmatched ground truth ratios.
|
|
186
97
|
"""
|
|
187
|
-
n_labels =
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
counts = confusion_matrices.sum(axis=0)
|
|
98
|
+
n_labels = confusion_matrix.shape[0] - 1
|
|
99
|
+
n_pixels = confusion_matrix.sum()
|
|
100
|
+
gt_counts = confusion_matrix[1:, :].sum(axis=1)
|
|
101
|
+
pd_counts = confusion_matrix[:, 1:].sum(axis=0)
|
|
192
102
|
|
|
193
103
|
# compute iou, unmatched_ground_truth and unmatched predictions
|
|
194
|
-
intersection_ =
|
|
104
|
+
intersection_ = confusion_matrix[1:, 1:]
|
|
195
105
|
union_ = (
|
|
196
106
|
gt_counts[:, np.newaxis] + pd_counts[np.newaxis, :] - intersection_
|
|
197
107
|
)
|
|
@@ -206,7 +116,7 @@ def compute_metrics(
|
|
|
206
116
|
|
|
207
117
|
unmatched_prediction_ratio = np.zeros((n_labels), dtype=np.float64)
|
|
208
118
|
np.divide(
|
|
209
|
-
|
|
119
|
+
confusion_matrix[0, 1:],
|
|
210
120
|
pd_counts,
|
|
211
121
|
where=pd_counts > 1e-9,
|
|
212
122
|
out=unmatched_prediction_ratio,
|
|
@@ -214,14 +124,14 @@ def compute_metrics(
|
|
|
214
124
|
|
|
215
125
|
unmatched_ground_truth_ratio = np.zeros((n_labels), dtype=np.float64)
|
|
216
126
|
np.divide(
|
|
217
|
-
|
|
127
|
+
confusion_matrix[1:, 0],
|
|
218
128
|
gt_counts,
|
|
219
129
|
where=gt_counts > 1e-9,
|
|
220
130
|
out=unmatched_ground_truth_ratio,
|
|
221
131
|
)
|
|
222
132
|
|
|
223
133
|
# compute precision, recall, f1
|
|
224
|
-
tp_counts =
|
|
134
|
+
tp_counts = confusion_matrix.diagonal()[1:]
|
|
225
135
|
|
|
226
136
|
precision = np.zeros(n_labels, dtype=np.float64)
|
|
227
137
|
np.divide(tp_counts, pd_counts, where=pd_counts > 1e-9, out=precision)
|
|
@@ -238,8 +148,8 @@ def compute_metrics(
|
|
|
238
148
|
)
|
|
239
149
|
|
|
240
150
|
# compute accuracy
|
|
241
|
-
tp_count =
|
|
242
|
-
background_count =
|
|
151
|
+
tp_count = confusion_matrix[1:, 1:].diagonal().sum()
|
|
152
|
+
background_count = confusion_matrix[0, 0]
|
|
243
153
|
accuracy = (
|
|
244
154
|
(tp_count + background_count) / n_pixels if n_pixels > 0 else 0.0
|
|
245
155
|
)
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pyarrow as pa
|
|
8
|
+
import pyarrow.compute as pc
|
|
9
|
+
from numpy.typing import NDArray
|
|
10
|
+
|
|
11
|
+
from valor_lite.cache import (
|
|
12
|
+
FileCacheReader,
|
|
13
|
+
FileCacheWriter,
|
|
14
|
+
MemoryCacheReader,
|
|
15
|
+
MemoryCacheWriter,
|
|
16
|
+
)
|
|
17
|
+
from valor_lite.exceptions import EmptyCacheError
|
|
18
|
+
from valor_lite.semantic_segmentation.computation import compute_metrics
|
|
19
|
+
from valor_lite.semantic_segmentation.metric import MetricType
|
|
20
|
+
from valor_lite.semantic_segmentation.shared import (
|
|
21
|
+
EvaluatorInfo,
|
|
22
|
+
decode_metadata_fields,
|
|
23
|
+
encode_metadata_fields,
|
|
24
|
+
extract_counts,
|
|
25
|
+
extract_labels,
|
|
26
|
+
generate_cache_path,
|
|
27
|
+
generate_metadata_path,
|
|
28
|
+
generate_schema,
|
|
29
|
+
)
|
|
30
|
+
from valor_lite.semantic_segmentation.utilities import (
|
|
31
|
+
unpack_precision_recall_iou_into_metric_lists,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class Builder:
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
writer: MemoryCacheWriter | FileCacheWriter,
|
|
39
|
+
metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
|
|
40
|
+
):
|
|
41
|
+
self._writer = writer
|
|
42
|
+
self._metadata_fields = metadata_fields
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def in_memory(
|
|
46
|
+
cls,
|
|
47
|
+
batch_size: int = 10_000,
|
|
48
|
+
metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
|
|
49
|
+
):
|
|
50
|
+
"""
|
|
51
|
+
Create an in-memory evaluator cache.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
batch_size : int, default=10_000
|
|
56
|
+
The target number of rows to buffer before writing to the cache. Defaults to 10_000.
|
|
57
|
+
metadata_fields : list[tuple[str, str | pa.DataType]], optional
|
|
58
|
+
Optional metadata field definitions.
|
|
59
|
+
"""
|
|
60
|
+
# create cache
|
|
61
|
+
writer = MemoryCacheWriter.create(
|
|
62
|
+
schema=generate_schema(metadata_fields),
|
|
63
|
+
batch_size=batch_size,
|
|
64
|
+
)
|
|
65
|
+
return cls(
|
|
66
|
+
writer=writer,
|
|
67
|
+
metadata_fields=metadata_fields,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def persistent(
|
|
72
|
+
cls,
|
|
73
|
+
path: str | Path,
|
|
74
|
+
batch_size: int = 10_000,
|
|
75
|
+
rows_per_file: int = 100_000,
|
|
76
|
+
compression: str = "snappy",
|
|
77
|
+
metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
Create a persistent file-based evaluator cache.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
path : str | Path
|
|
85
|
+
Where to store the file-based cache.
|
|
86
|
+
batch_size : int, default=10_000
|
|
87
|
+
The target number of rows to buffer before writing to the cache. Defaults to 10_000.
|
|
88
|
+
rows_per_file : int, default=100_000
|
|
89
|
+
The target number of rows to store per cache file. Defaults to 100_000.
|
|
90
|
+
compression : str, default="snappy"
|
|
91
|
+
The compression methods used when writing cache files.
|
|
92
|
+
metadata_fields : list[tuple[str, str | pa.DataType]], optional
|
|
93
|
+
Optional metadata field definitions.
|
|
94
|
+
"""
|
|
95
|
+
path = Path(path)
|
|
96
|
+
|
|
97
|
+
# create cache
|
|
98
|
+
writer = FileCacheWriter.create(
|
|
99
|
+
path=generate_cache_path(path),
|
|
100
|
+
schema=generate_schema(metadata_fields),
|
|
101
|
+
batch_size=batch_size,
|
|
102
|
+
rows_per_file=rows_per_file,
|
|
103
|
+
compression=compression,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# write metadata
|
|
107
|
+
metadata_path = generate_metadata_path(path)
|
|
108
|
+
with open(metadata_path, "w") as f:
|
|
109
|
+
encoded_types = encode_metadata_fields(metadata_fields)
|
|
110
|
+
json.dump(encoded_types, f, indent=2)
|
|
111
|
+
|
|
112
|
+
return cls(
|
|
113
|
+
writer=writer,
|
|
114
|
+
metadata_fields=metadata_fields,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def finalize(
|
|
118
|
+
self,
|
|
119
|
+
index_to_label_override: dict[int, str] | None = None,
|
|
120
|
+
):
|
|
121
|
+
"""
|
|
122
|
+
Performs data finalization and some preprocessing steps.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
index_to_label_override : dict[int, str], optional
|
|
127
|
+
Pre-configures label mapping. Used when operating over filtered subsets.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
Evaluator
|
|
132
|
+
A ready-to-use evaluator object.
|
|
133
|
+
"""
|
|
134
|
+
self._writer.flush()
|
|
135
|
+
if self._writer.count_rows() == 0:
|
|
136
|
+
raise EmptyCacheError()
|
|
137
|
+
|
|
138
|
+
reader = self._writer.to_reader()
|
|
139
|
+
|
|
140
|
+
# extract labels
|
|
141
|
+
index_to_label = extract_labels(
|
|
142
|
+
reader=reader,
|
|
143
|
+
index_to_label_override=index_to_label_override,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return Evaluator(
|
|
147
|
+
reader=reader,
|
|
148
|
+
index_to_label=index_to_label,
|
|
149
|
+
metadata_fields=self._metadata_fields,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class Evaluator:
|
|
154
|
+
def __init__(
|
|
155
|
+
self,
|
|
156
|
+
reader: MemoryCacheReader | FileCacheReader,
|
|
157
|
+
index_to_label: dict[int, str],
|
|
158
|
+
metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
|
|
159
|
+
):
|
|
160
|
+
self._reader = reader
|
|
161
|
+
self._index_to_label = index_to_label
|
|
162
|
+
self._metadata_fields = metadata_fields
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def info(self) -> EvaluatorInfo:
|
|
166
|
+
return self.get_info()
|
|
167
|
+
|
|
168
|
+
def get_info(
|
|
169
|
+
self,
|
|
170
|
+
datums: pc.Expression | None = None,
|
|
171
|
+
groundtruths: pc.Expression | None = None,
|
|
172
|
+
predictions: pc.Expression | None = None,
|
|
173
|
+
) -> EvaluatorInfo:
|
|
174
|
+
info = EvaluatorInfo()
|
|
175
|
+
info.number_of_rows = self._reader.count_rows()
|
|
176
|
+
info.number_of_labels = len(self._index_to_label)
|
|
177
|
+
info.metadata_fields = self._metadata_fields
|
|
178
|
+
(
|
|
179
|
+
info.number_of_datums,
|
|
180
|
+
info.number_of_pixels,
|
|
181
|
+
info.number_of_groundtruth_pixels,
|
|
182
|
+
info.number_of_prediction_pixels,
|
|
183
|
+
) = extract_counts(
|
|
184
|
+
reader=self._reader,
|
|
185
|
+
datums=datums,
|
|
186
|
+
groundtruths=groundtruths,
|
|
187
|
+
predictions=predictions,
|
|
188
|
+
)
|
|
189
|
+
return info
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def load(
|
|
193
|
+
cls,
|
|
194
|
+
path: str | Path,
|
|
195
|
+
index_to_label_override: dict[int, str] | None = None,
|
|
196
|
+
):
|
|
197
|
+
"""
|
|
198
|
+
Load from an existing semantic segmentation cache.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
path : str | Path
|
|
203
|
+
Path to the existing cache.
|
|
204
|
+
index_to_label_override : dict[int, str], optional
|
|
205
|
+
Option to preset index to label dictionary. Used when loading from filtered caches.
|
|
206
|
+
"""
|
|
207
|
+
# validate path
|
|
208
|
+
path = Path(path)
|
|
209
|
+
if not path.exists():
|
|
210
|
+
raise FileNotFoundError(f"Directory does not exist: {path}")
|
|
211
|
+
elif not path.is_dir():
|
|
212
|
+
raise NotADirectoryError(
|
|
213
|
+
f"Path exists but is not a directory: {path}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# load cache
|
|
217
|
+
reader = FileCacheReader.load(generate_cache_path(path))
|
|
218
|
+
|
|
219
|
+
# extract labels
|
|
220
|
+
index_to_label = extract_labels(
|
|
221
|
+
reader=reader,
|
|
222
|
+
index_to_label_override=index_to_label_override,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# read config
|
|
226
|
+
metadata_path = generate_metadata_path(path)
|
|
227
|
+
metadata_fields = None
|
|
228
|
+
with open(metadata_path, "r") as f:
|
|
229
|
+
metadata_types = json.load(f)
|
|
230
|
+
metadata_fields = decode_metadata_fields(metadata_types)
|
|
231
|
+
|
|
232
|
+
return cls(
|
|
233
|
+
reader=reader,
|
|
234
|
+
index_to_label=index_to_label,
|
|
235
|
+
metadata_fields=metadata_fields,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def filter(
|
|
239
|
+
self,
|
|
240
|
+
datums: pc.Expression | None = None,
|
|
241
|
+
groundtruths: pc.Expression | None = None,
|
|
242
|
+
predictions: pc.Expression | None = None,
|
|
243
|
+
path: str | Path | None = None,
|
|
244
|
+
) -> Evaluator:
|
|
245
|
+
"""
|
|
246
|
+
Filter evaluator cache.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
datums : pc.Expression | None = None
|
|
251
|
+
A filter expression used to filter datums.
|
|
252
|
+
groundtruths : pc.Expression | None = None
|
|
253
|
+
A filter expression used to filter ground truth annotations.
|
|
254
|
+
predictions : pc.Expression | None = None
|
|
255
|
+
A filter expression used to filter predictions.
|
|
256
|
+
path : str | Path, optional
|
|
257
|
+
Where to store the filtered cache if storing on disk.
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
Evaluator
|
|
262
|
+
A new evaluator object containing the filtered cache.
|
|
263
|
+
"""
|
|
264
|
+
if isinstance(self._reader, FileCacheReader):
|
|
265
|
+
if not path:
|
|
266
|
+
raise ValueError(
|
|
267
|
+
"expected path to be defined for file-based cache"
|
|
268
|
+
)
|
|
269
|
+
builder = Builder.persistent(
|
|
270
|
+
path=path,
|
|
271
|
+
batch_size=self._reader.batch_size,
|
|
272
|
+
rows_per_file=self._reader.rows_per_file,
|
|
273
|
+
compression=self._reader.compression,
|
|
274
|
+
metadata_fields=self.info.metadata_fields,
|
|
275
|
+
)
|
|
276
|
+
else:
|
|
277
|
+
builder = Builder.in_memory(
|
|
278
|
+
batch_size=self._reader.batch_size,
|
|
279
|
+
metadata_fields=self.info.metadata_fields,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
for tbl in self._reader.iterate_tables(filter=datums):
|
|
283
|
+
columns = (
|
|
284
|
+
"datum_id",
|
|
285
|
+
"gt_label_id",
|
|
286
|
+
"pd_label_id",
|
|
287
|
+
)
|
|
288
|
+
pairs = np.column_stack([tbl[col].to_numpy() for col in columns])
|
|
289
|
+
|
|
290
|
+
n_pairs = pairs.shape[0]
|
|
291
|
+
gt_ids = pairs[:, (0, 1)].astype(np.int64)
|
|
292
|
+
pd_ids = pairs[:, (0, 2)].astype(np.int64)
|
|
293
|
+
|
|
294
|
+
if groundtruths is not None:
|
|
295
|
+
mask_valid_gt = np.zeros(n_pairs, dtype=np.bool_)
|
|
296
|
+
gt_tbl = tbl.filter(groundtruths)
|
|
297
|
+
gt_pairs = np.column_stack(
|
|
298
|
+
[
|
|
299
|
+
gt_tbl[col].to_numpy()
|
|
300
|
+
for col in ("datum_id", "gt_label_id")
|
|
301
|
+
]
|
|
302
|
+
).astype(np.int64)
|
|
303
|
+
for gt in np.unique(gt_pairs, axis=0):
|
|
304
|
+
mask_valid_gt |= (gt_ids == gt).all(axis=1)
|
|
305
|
+
else:
|
|
306
|
+
mask_valid_gt = np.ones(n_pairs, dtype=np.bool_)
|
|
307
|
+
|
|
308
|
+
if predictions is not None:
|
|
309
|
+
mask_valid_pd = np.zeros(n_pairs, dtype=np.bool_)
|
|
310
|
+
pd_tbl = tbl.filter(predictions)
|
|
311
|
+
pd_pairs = np.column_stack(
|
|
312
|
+
[
|
|
313
|
+
pd_tbl[col].to_numpy()
|
|
314
|
+
for col in ("datum_id", "pd_label_id")
|
|
315
|
+
]
|
|
316
|
+
).astype(np.int64)
|
|
317
|
+
for pd in np.unique(pd_pairs, axis=0):
|
|
318
|
+
mask_valid_pd |= (pd_ids == pd).all(axis=1)
|
|
319
|
+
else:
|
|
320
|
+
mask_valid_pd = np.ones(n_pairs, dtype=np.bool_)
|
|
321
|
+
|
|
322
|
+
mask_valid = mask_valid_gt | mask_valid_pd
|
|
323
|
+
mask_valid_gt &= mask_valid
|
|
324
|
+
mask_valid_pd &= mask_valid
|
|
325
|
+
|
|
326
|
+
pairs[~mask_valid_gt, 1] = -1
|
|
327
|
+
pairs[~mask_valid_pd, 2] = -1
|
|
328
|
+
|
|
329
|
+
for idx, col in enumerate(columns):
|
|
330
|
+
tbl = tbl.set_column(
|
|
331
|
+
tbl.schema.names.index(col), col, pa.array(pairs[:, idx])
|
|
332
|
+
)
|
|
333
|
+
builder._writer.write_table(tbl)
|
|
334
|
+
|
|
335
|
+
return builder.finalize(index_to_label_override=self._index_to_label)
|
|
336
|
+
|
|
337
|
+
def _compute_confusion_matrix_intermediate(
|
|
338
|
+
self, datums: pc.Expression | None = None
|
|
339
|
+
) -> NDArray[np.uint64]:
|
|
340
|
+
"""
|
|
341
|
+
Performs an evaluation and returns metrics.
|
|
342
|
+
|
|
343
|
+
Parameters
|
|
344
|
+
----------
|
|
345
|
+
datums : pyarrow.compute.Expression, optional
|
|
346
|
+
Option to filter datums by an expression.
|
|
347
|
+
|
|
348
|
+
Returns
|
|
349
|
+
-------
|
|
350
|
+
dict[MetricType, list]
|
|
351
|
+
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
352
|
+
"""
|
|
353
|
+
n_labels = len(self._index_to_label)
|
|
354
|
+
confusion_matrix = np.zeros(
|
|
355
|
+
(n_labels + 1, n_labels + 1), dtype=np.uint64
|
|
356
|
+
)
|
|
357
|
+
for tbl in self._reader.iterate_tables(filter=datums):
|
|
358
|
+
columns = (
|
|
359
|
+
"datum_id",
|
|
360
|
+
"gt_label_id",
|
|
361
|
+
"pd_label_id",
|
|
362
|
+
)
|
|
363
|
+
ids = np.column_stack(
|
|
364
|
+
[tbl[col].to_numpy() for col in columns]
|
|
365
|
+
).astype(np.int64)
|
|
366
|
+
counts = tbl["count"].to_numpy()
|
|
367
|
+
|
|
368
|
+
mask_null_gts = ids[:, 1] == -1
|
|
369
|
+
mask_null_pds = ids[:, 2] == -1
|
|
370
|
+
confusion_matrix[0, 0] += counts[
|
|
371
|
+
mask_null_gts & mask_null_pds
|
|
372
|
+
].sum()
|
|
373
|
+
for idx in range(n_labels):
|
|
374
|
+
mask_gts = ids[:, 1] == idx
|
|
375
|
+
for pidx in range(n_labels):
|
|
376
|
+
mask_pds = ids[:, 2] == pidx
|
|
377
|
+
confusion_matrix[idx + 1, pidx + 1] += counts[
|
|
378
|
+
mask_gts & mask_pds
|
|
379
|
+
].sum()
|
|
380
|
+
|
|
381
|
+
mask_unmatched_gts = mask_gts & mask_null_pds
|
|
382
|
+
confusion_matrix[idx + 1, 0] += counts[
|
|
383
|
+
mask_unmatched_gts
|
|
384
|
+
].sum()
|
|
385
|
+
mask_unmatched_pds = mask_null_gts & (ids[:, 2] == idx)
|
|
386
|
+
confusion_matrix[0, idx + 1] += counts[
|
|
387
|
+
mask_unmatched_pds
|
|
388
|
+
].sum()
|
|
389
|
+
return confusion_matrix
|
|
390
|
+
|
|
391
|
+
def compute_precision_recall_iou(
|
|
392
|
+
self, datums: pc.Expression | None = None
|
|
393
|
+
) -> dict[MetricType, list]:
|
|
394
|
+
"""
|
|
395
|
+
Performs an evaluation and returns metrics.
|
|
396
|
+
|
|
397
|
+
Parameters
|
|
398
|
+
----------
|
|
399
|
+
datums : pyarrow.compute.Expression, optional
|
|
400
|
+
Option to filter datums by an expression.
|
|
401
|
+
|
|
402
|
+
Returns
|
|
403
|
+
-------
|
|
404
|
+
dict[MetricType, list]
|
|
405
|
+
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
406
|
+
"""
|
|
407
|
+
confusion_matrix = self._compute_confusion_matrix_intermediate(
|
|
408
|
+
datums=datums
|
|
409
|
+
)
|
|
410
|
+
results = compute_metrics(confusion_matrix=confusion_matrix)
|
|
411
|
+
return unpack_precision_recall_iou_into_metric_lists(
|
|
412
|
+
results=results,
|
|
413
|
+
index_to_label=self._index_to_label,
|
|
414
|
+
)
|