valor-lite 0.34.3__py3-none-any.whl → 0.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/classification/computation.py +147 -38
- valor_lite/classification/manager.py +229 -236
- valor_lite/classification/metric.py +5 -8
- valor_lite/classification/utilities.py +18 -14
- valor_lite/object_detection/__init__.py +2 -15
- valor_lite/object_detection/annotation.py +24 -48
- valor_lite/object_detection/computation.py +324 -384
- valor_lite/object_detection/manager.py +549 -456
- valor_lite/object_detection/metric.py +16 -34
- valor_lite/object_detection/utilities.py +134 -305
- valor_lite/semantic_segmentation/__init__.py +3 -3
- valor_lite/semantic_segmentation/annotation.py +32 -103
- valor_lite/semantic_segmentation/benchmark.py +87 -1
- valor_lite/semantic_segmentation/computation.py +96 -14
- valor_lite/semantic_segmentation/manager.py +199 -222
- valor_lite/semantic_segmentation/utilities.py +3 -3
- {valor_lite-0.34.3.dist-info → valor_lite-0.36.0.dist-info}/METADATA +2 -2
- {valor_lite-0.34.3.dist-info → valor_lite-0.36.0.dist-info}/RECORD +20 -20
- {valor_lite-0.34.3.dist-info → valor_lite-0.36.0.dist-info}/WHEEL +1 -1
- {valor_lite-0.34.3.dist-info → valor_lite-0.36.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
from dataclasses import dataclass
|
|
1
|
+
import warnings
|
|
2
|
+
from dataclasses import asdict, dataclass
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
from numpy.typing import NDArray
|
|
@@ -8,7 +8,9 @@ from tqdm import tqdm
|
|
|
8
8
|
from valor_lite.classification.annotation import Classification
|
|
9
9
|
from valor_lite.classification.computation import (
|
|
10
10
|
compute_confusion_matrix,
|
|
11
|
+
compute_label_metadata,
|
|
11
12
|
compute_precision_recall_rocauc,
|
|
13
|
+
filter_cache,
|
|
12
14
|
)
|
|
13
15
|
from valor_lite.classification.metric import Metric, MetricType
|
|
14
16
|
from valor_lite.classification.utilities import (
|
|
@@ -37,11 +39,51 @@ filtered_metrics = evaluator.evaluate(filter_mask=filter_mask)
|
|
|
37
39
|
"""
|
|
38
40
|
|
|
39
41
|
|
|
42
|
+
@dataclass
|
|
43
|
+
class Metadata:
|
|
44
|
+
number_of_datums: int = 0
|
|
45
|
+
number_of_ground_truths: int = 0
|
|
46
|
+
number_of_predictions: int = 0
|
|
47
|
+
number_of_labels: int = 0
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def create(
|
|
51
|
+
cls,
|
|
52
|
+
detailed_pairs: NDArray[np.float64],
|
|
53
|
+
number_of_datums: int,
|
|
54
|
+
number_of_labels: int,
|
|
55
|
+
):
|
|
56
|
+
# count number of unique ground truths
|
|
57
|
+
mask_valid_gts = detailed_pairs[:, 1] >= 0
|
|
58
|
+
unique_ids = np.unique(
|
|
59
|
+
detailed_pairs[np.ix_(mask_valid_gts, (0, 1))], # type: ignore - np.ix_ typing
|
|
60
|
+
axis=0,
|
|
61
|
+
)
|
|
62
|
+
number_of_ground_truths = int(unique_ids.shape[0])
|
|
63
|
+
|
|
64
|
+
# count number of unqiue predictions
|
|
65
|
+
mask_valid_pds = detailed_pairs[:, 2] >= 0
|
|
66
|
+
unique_ids = np.unique(
|
|
67
|
+
detailed_pairs[np.ix_(mask_valid_pds, (0, 2))], axis=0 # type: ignore - np.ix_ typing
|
|
68
|
+
)
|
|
69
|
+
number_of_predictions = int(unique_ids.shape[0])
|
|
70
|
+
|
|
71
|
+
return cls(
|
|
72
|
+
number_of_datums=number_of_datums,
|
|
73
|
+
number_of_ground_truths=number_of_ground_truths,
|
|
74
|
+
number_of_predictions=number_of_predictions,
|
|
75
|
+
number_of_labels=number_of_labels,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def to_dict(self) -> dict[str, int | bool]:
|
|
79
|
+
return asdict(self)
|
|
80
|
+
|
|
81
|
+
|
|
40
82
|
@dataclass
|
|
41
83
|
class Filter:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
84
|
+
datum_mask: NDArray[np.bool_]
|
|
85
|
+
valid_label_indices: NDArray[np.int32] | None
|
|
86
|
+
metadata: Metadata
|
|
45
87
|
|
|
46
88
|
|
|
47
89
|
class Evaluator:
|
|
@@ -50,25 +92,21 @@ class Evaluator:
|
|
|
50
92
|
"""
|
|
51
93
|
|
|
52
94
|
def __init__(self):
|
|
95
|
+
# external references
|
|
96
|
+
self.datum_id_to_index: dict[str, int] = {}
|
|
97
|
+
self.label_to_index: dict[str, int] = {}
|
|
53
98
|
|
|
54
|
-
|
|
55
|
-
self.
|
|
56
|
-
self.n_groundtruths = 0
|
|
57
|
-
self.n_predictions = 0
|
|
58
|
-
self.n_labels = 0
|
|
59
|
-
|
|
60
|
-
# datum reference
|
|
61
|
-
self.uid_to_index: dict[str, int] = dict()
|
|
62
|
-
self.index_to_uid: dict[int, str] = dict()
|
|
99
|
+
self.index_to_datum_id: list[str] = []
|
|
100
|
+
self.index_to_label: list[str] = []
|
|
63
101
|
|
|
64
|
-
#
|
|
65
|
-
self.label_to_index: dict[str, int] = dict()
|
|
66
|
-
self.index_to_label: dict[int, str] = dict()
|
|
67
|
-
|
|
68
|
-
# computation caches
|
|
102
|
+
# internal caches
|
|
69
103
|
self._detailed_pairs = np.array([])
|
|
70
104
|
self._label_metadata = np.array([], dtype=np.int32)
|
|
71
|
-
self.
|
|
105
|
+
self._metadata = Metadata()
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def metadata(self) -> Metadata:
|
|
109
|
+
return self._metadata
|
|
72
110
|
|
|
73
111
|
@property
|
|
74
112
|
def ignored_prediction_labels(self) -> list[str]:
|
|
@@ -92,97 +130,120 @@ class Evaluator:
|
|
|
92
130
|
self.index_to_label[label_id] for label_id in (glabels - plabels)
|
|
93
131
|
]
|
|
94
132
|
|
|
95
|
-
@property
|
|
96
|
-
def metadata(self) -> dict:
|
|
97
|
-
"""
|
|
98
|
-
Evaluation metadata.
|
|
99
|
-
"""
|
|
100
|
-
return {
|
|
101
|
-
"n_datums": self.n_datums,
|
|
102
|
-
"n_groundtruths": self.n_groundtruths,
|
|
103
|
-
"n_predictions": self.n_predictions,
|
|
104
|
-
"n_labels": self.n_labels,
|
|
105
|
-
"ignored_prediction_labels": self.ignored_prediction_labels,
|
|
106
|
-
"missing_prediction_labels": self.missing_prediction_labels,
|
|
107
|
-
}
|
|
108
|
-
|
|
109
133
|
def create_filter(
|
|
110
134
|
self,
|
|
111
|
-
|
|
112
|
-
labels: list[str] |
|
|
135
|
+
datum_ids: list[str] | None = None,
|
|
136
|
+
labels: list[str] | None = None,
|
|
113
137
|
) -> Filter:
|
|
114
138
|
"""
|
|
115
|
-
Creates a
|
|
139
|
+
Creates a filter object.
|
|
116
140
|
|
|
117
141
|
Parameters
|
|
118
142
|
----------
|
|
119
|
-
datum_uids : list[str]
|
|
120
|
-
An optional list of string uids
|
|
121
|
-
labels : list[str]
|
|
122
|
-
An optional list of labels
|
|
143
|
+
datum_uids : list[str], optional
|
|
144
|
+
An optional list of string uids representing datums.
|
|
145
|
+
labels : list[str], optional
|
|
146
|
+
An optional list of labels.
|
|
123
147
|
|
|
124
148
|
Returns
|
|
125
149
|
-------
|
|
126
150
|
Filter
|
|
127
|
-
|
|
151
|
+
The filter object representing the input parameters.
|
|
128
152
|
"""
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if isinstance(datum_uids, list):
|
|
140
|
-
datum_uids = np.array(
|
|
141
|
-
[self.uid_to_index[uid] for uid in datum_uids],
|
|
142
|
-
dtype=np.int32,
|
|
153
|
+
# create datum mask
|
|
154
|
+
n_pairs = self._detailed_pairs.shape[0]
|
|
155
|
+
datum_mask = np.ones(n_pairs, dtype=np.bool_)
|
|
156
|
+
if datum_ids is not None:
|
|
157
|
+
if not datum_ids:
|
|
158
|
+
warnings.warn("no valid filtered pairs")
|
|
159
|
+
return Filter(
|
|
160
|
+
datum_mask=np.zeros_like(datum_mask),
|
|
161
|
+
valid_label_indices=None,
|
|
162
|
+
metadata=Metadata(),
|
|
143
163
|
)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
np.
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
mask[datum_uids] = True
|
|
152
|
-
mask_datums &= mask
|
|
164
|
+
valid_datum_indices = np.array(
|
|
165
|
+
[self.datum_id_to_index[uid] for uid in datum_ids],
|
|
166
|
+
dtype=np.int32,
|
|
167
|
+
)
|
|
168
|
+
datum_mask = np.isin(
|
|
169
|
+
self._detailed_pairs[:, 0], valid_datum_indices
|
|
170
|
+
)
|
|
153
171
|
|
|
172
|
+
# collect valid label indices
|
|
173
|
+
valid_label_indices = None
|
|
154
174
|
if labels is not None:
|
|
155
|
-
if
|
|
156
|
-
|
|
157
|
-
|
|
175
|
+
if not labels:
|
|
176
|
+
warnings.warn("no valid filtered pairs")
|
|
177
|
+
return Filter(
|
|
178
|
+
datum_mask=datum_mask,
|
|
179
|
+
valid_label_indices=np.array([], dtype=np.int32),
|
|
180
|
+
metadata=Metadata(),
|
|
158
181
|
)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
np.isin(self._detailed_pairs[:, 1].astype(int), labels)
|
|
162
|
-
] = True
|
|
163
|
-
mask_pairs &= mask
|
|
164
|
-
|
|
165
|
-
mask = np.zeros_like(mask_labels, dtype=np.bool_)
|
|
166
|
-
mask[labels] = True
|
|
167
|
-
mask_labels &= mask
|
|
168
|
-
|
|
169
|
-
mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
|
|
170
|
-
label_metadata_per_datum = self._label_metadata_per_datum.copy()
|
|
171
|
-
label_metadata_per_datum[:, ~mask] = 0
|
|
172
|
-
|
|
173
|
-
label_metadata: NDArray[np.int32] = np.transpose(
|
|
174
|
-
np.sum(
|
|
175
|
-
label_metadata_per_datum,
|
|
176
|
-
axis=1,
|
|
182
|
+
valid_label_indices = np.array(
|
|
183
|
+
[self.label_to_index[label] for label in labels] + [-1]
|
|
177
184
|
)
|
|
185
|
+
|
|
186
|
+
filtered_detailed_pairs, _ = filter_cache(
|
|
187
|
+
detailed_pairs=self._detailed_pairs,
|
|
188
|
+
datum_mask=datum_mask,
|
|
189
|
+
valid_label_indices=valid_label_indices,
|
|
190
|
+
n_labels=self.metadata.number_of_labels,
|
|
178
191
|
)
|
|
179
192
|
|
|
180
|
-
|
|
193
|
+
number_of_datums = (
|
|
194
|
+
len(datum_ids)
|
|
195
|
+
if datum_ids is not None
|
|
196
|
+
else self.metadata.number_of_datums
|
|
197
|
+
)
|
|
181
198
|
|
|
182
199
|
return Filter(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
200
|
+
datum_mask=datum_mask,
|
|
201
|
+
valid_label_indices=valid_label_indices,
|
|
202
|
+
metadata=Metadata.create(
|
|
203
|
+
detailed_pairs=filtered_detailed_pairs,
|
|
204
|
+
number_of_datums=number_of_datums,
|
|
205
|
+
number_of_labels=self.metadata.number_of_labels,
|
|
206
|
+
),
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def filter(
|
|
210
|
+
self, filter_: Filter
|
|
211
|
+
) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
|
|
212
|
+
"""
|
|
213
|
+
Performs filtering over the internal cache.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
filter_ : Filter
|
|
218
|
+
The filter object representation.
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
NDArray[float64]
|
|
223
|
+
The filtered detailed pairs.
|
|
224
|
+
NDArray[int32]
|
|
225
|
+
The filtered label metadata.
|
|
226
|
+
"""
|
|
227
|
+
empty_datum_mask = not filter_.datum_mask.any()
|
|
228
|
+
empty_label_mask = (
|
|
229
|
+
filter_.valid_label_indices.size == 0
|
|
230
|
+
if filter_.valid_label_indices is not None
|
|
231
|
+
else False
|
|
232
|
+
)
|
|
233
|
+
if empty_datum_mask or empty_label_mask:
|
|
234
|
+
if empty_datum_mask:
|
|
235
|
+
warnings.warn("filter removes all datums")
|
|
236
|
+
if empty_label_mask:
|
|
237
|
+
warnings.warn("filter removes all labels")
|
|
238
|
+
return (
|
|
239
|
+
np.array([], dtype=np.float64),
|
|
240
|
+
np.zeros((self.metadata.number_of_labels, 2), dtype=np.int32),
|
|
241
|
+
)
|
|
242
|
+
return filter_cache(
|
|
243
|
+
detailed_pairs=self._detailed_pairs,
|
|
244
|
+
datum_mask=filter_.datum_mask,
|
|
245
|
+
valid_label_indices=filter_.valid_label_indices,
|
|
246
|
+
n_labels=self.metadata.number_of_labels,
|
|
186
247
|
)
|
|
187
248
|
|
|
188
249
|
def compute_precision_recall_rocauc(
|
|
@@ -201,31 +262,29 @@ class Evaluator:
|
|
|
201
262
|
hardmax : bool
|
|
202
263
|
Toggles whether a hardmax is applied to predictions.
|
|
203
264
|
filter_ : Filter, optional
|
|
204
|
-
|
|
265
|
+
Applies a filter to the internal cache.
|
|
205
266
|
|
|
206
267
|
Returns
|
|
207
268
|
-------
|
|
208
269
|
dict[MetricType, list]
|
|
209
270
|
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
210
271
|
"""
|
|
211
|
-
|
|
212
272
|
# apply filters
|
|
213
|
-
data = self._detailed_pairs
|
|
214
|
-
label_metadata = self._label_metadata
|
|
215
|
-
n_datums = self.n_datums
|
|
216
273
|
if filter_ is not None:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
274
|
+
detailed_pairs, label_metadata = self.filter(filter_=filter_)
|
|
275
|
+
n_datums = filter_.metadata.number_of_datums
|
|
276
|
+
else:
|
|
277
|
+
detailed_pairs = self._detailed_pairs
|
|
278
|
+
label_metadata = self._label_metadata
|
|
279
|
+
n_datums = self.metadata.number_of_datums
|
|
220
280
|
|
|
221
281
|
results = compute_precision_recall_rocauc(
|
|
222
|
-
|
|
282
|
+
detailed_pairs=detailed_pairs,
|
|
223
283
|
label_metadata=label_metadata,
|
|
224
284
|
score_thresholds=np.array(score_thresholds),
|
|
225
285
|
hardmax=hardmax,
|
|
226
286
|
n_datums=n_datums,
|
|
227
287
|
)
|
|
228
|
-
|
|
229
288
|
return unpack_precision_recall_rocauc_into_metric_lists(
|
|
230
289
|
results=results,
|
|
231
290
|
score_thresholds=score_thresholds,
|
|
@@ -253,37 +312,35 @@ class Evaluator:
|
|
|
253
312
|
number_of_examples : int, default=0
|
|
254
313
|
The number of examples to return per count.
|
|
255
314
|
filter_ : Filter, optional
|
|
256
|
-
|
|
315
|
+
Applies a filter to the internal cache.
|
|
257
316
|
|
|
258
317
|
Returns
|
|
259
318
|
-------
|
|
260
319
|
list[Metric]
|
|
261
320
|
A list of confusion matrices.
|
|
262
321
|
"""
|
|
263
|
-
|
|
264
322
|
# apply filters
|
|
265
|
-
data = self._detailed_pairs
|
|
266
|
-
label_metadata = self._label_metadata
|
|
267
323
|
if filter_ is not None:
|
|
268
|
-
|
|
269
|
-
|
|
324
|
+
detailed_pairs, label_metadata = self.filter(filter_=filter_)
|
|
325
|
+
else:
|
|
326
|
+
detailed_pairs = self._detailed_pairs
|
|
327
|
+
label_metadata = self._label_metadata
|
|
270
328
|
|
|
271
|
-
if
|
|
329
|
+
if detailed_pairs.size == 0:
|
|
272
330
|
return list()
|
|
273
331
|
|
|
274
332
|
results = compute_confusion_matrix(
|
|
275
|
-
|
|
333
|
+
detailed_pairs=detailed_pairs,
|
|
276
334
|
label_metadata=label_metadata,
|
|
277
335
|
score_thresholds=np.array(score_thresholds),
|
|
278
336
|
hardmax=hardmax,
|
|
279
337
|
n_examples=number_of_examples,
|
|
280
338
|
)
|
|
281
|
-
|
|
282
339
|
return unpack_confusion_matrix_into_metric_list(
|
|
283
340
|
results=results,
|
|
284
341
|
score_thresholds=score_thresholds,
|
|
285
342
|
number_of_examples=number_of_examples,
|
|
286
|
-
|
|
343
|
+
index_to_datum_id=self.index_to_datum_id,
|
|
287
344
|
index_to_label=self.index_to_label,
|
|
288
345
|
)
|
|
289
346
|
|
|
@@ -306,40 +363,26 @@ class Evaluator:
|
|
|
306
363
|
number_of_examples : int, default=0
|
|
307
364
|
The number of examples to return per count.
|
|
308
365
|
filter_ : Filter, optional
|
|
309
|
-
|
|
366
|
+
Applies a filter to the internal cache.
|
|
310
367
|
|
|
311
368
|
Returns
|
|
312
369
|
-------
|
|
313
370
|
dict[MetricType, list[Metric]]
|
|
314
371
|
Lists of metrics organized by metric type.
|
|
315
372
|
"""
|
|
316
|
-
|
|
317
373
|
metrics = self.compute_precision_recall_rocauc(
|
|
318
374
|
score_thresholds=score_thresholds,
|
|
319
375
|
hardmax=hardmax,
|
|
320
376
|
filter_=filter_,
|
|
321
377
|
)
|
|
322
|
-
|
|
323
378
|
metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
|
|
324
379
|
score_thresholds=score_thresholds,
|
|
325
380
|
hardmax=hardmax,
|
|
326
381
|
number_of_examples=number_of_examples,
|
|
327
382
|
filter_=filter_,
|
|
328
383
|
)
|
|
329
|
-
|
|
330
384
|
return metrics
|
|
331
385
|
|
|
332
|
-
|
|
333
|
-
class DataLoader:
|
|
334
|
-
"""
|
|
335
|
-
Classification DataLoader.
|
|
336
|
-
"""
|
|
337
|
-
|
|
338
|
-
def __init__(self):
|
|
339
|
-
self._evaluator = Evaluator()
|
|
340
|
-
self.groundtruth_count = defaultdict(lambda: defaultdict(int))
|
|
341
|
-
self.prediction_count = defaultdict(lambda: defaultdict(int))
|
|
342
|
-
|
|
343
386
|
def _add_datum(self, uid: str) -> int:
|
|
344
387
|
"""
|
|
345
388
|
Helper function for adding a datum to the cache.
|
|
@@ -354,11 +397,11 @@ class DataLoader:
|
|
|
354
397
|
int
|
|
355
398
|
The datum index.
|
|
356
399
|
"""
|
|
357
|
-
if uid not in self.
|
|
358
|
-
index = len(self.
|
|
359
|
-
self.
|
|
360
|
-
self.
|
|
361
|
-
return self.
|
|
400
|
+
if uid not in self.datum_id_to_index:
|
|
401
|
+
index = len(self.datum_id_to_index)
|
|
402
|
+
self.datum_id_to_index[uid] = index
|
|
403
|
+
self.index_to_datum_id.append(uid)
|
|
404
|
+
return self.datum_id_to_index[uid]
|
|
362
405
|
|
|
363
406
|
def _add_label(self, label: str) -> int:
|
|
364
407
|
"""
|
|
@@ -374,47 +417,12 @@ class DataLoader:
|
|
|
374
417
|
int
|
|
375
418
|
Label index.
|
|
376
419
|
"""
|
|
377
|
-
label_id = len(self.
|
|
378
|
-
if label not in self.
|
|
379
|
-
self.
|
|
380
|
-
self.
|
|
381
|
-
|
|
420
|
+
label_id = len(self.index_to_label)
|
|
421
|
+
if label not in self.label_to_index:
|
|
422
|
+
self.label_to_index[label] = label_id
|
|
423
|
+
self.index_to_label.append(label)
|
|
382
424
|
label_id += 1
|
|
383
|
-
|
|
384
|
-
return self._evaluator.label_to_index[label]
|
|
385
|
-
|
|
386
|
-
def _add_data(
|
|
387
|
-
self,
|
|
388
|
-
uid_index: int,
|
|
389
|
-
groundtruth: int,
|
|
390
|
-
predictions: list[tuple[int, float]],
|
|
391
|
-
):
|
|
392
|
-
|
|
393
|
-
pairs = list()
|
|
394
|
-
scores = np.array([score for _, score in predictions])
|
|
395
|
-
max_score_idx = np.argmax(scores)
|
|
396
|
-
|
|
397
|
-
for idx, (plabel, score) in enumerate(predictions):
|
|
398
|
-
pairs.append(
|
|
399
|
-
(
|
|
400
|
-
float(uid_index),
|
|
401
|
-
float(groundtruth),
|
|
402
|
-
float(plabel),
|
|
403
|
-
float(score),
|
|
404
|
-
float(max_score_idx == idx),
|
|
405
|
-
)
|
|
406
|
-
)
|
|
407
|
-
|
|
408
|
-
if self._evaluator._detailed_pairs.size == 0:
|
|
409
|
-
self._evaluator._detailed_pairs = np.array(pairs)
|
|
410
|
-
else:
|
|
411
|
-
self._evaluator._detailed_pairs = np.concatenate(
|
|
412
|
-
[
|
|
413
|
-
self._evaluator._detailed_pairs,
|
|
414
|
-
np.array(pairs),
|
|
415
|
-
],
|
|
416
|
-
axis=0,
|
|
417
|
-
)
|
|
425
|
+
return self.label_to_index[label]
|
|
418
426
|
|
|
419
427
|
def add_data(
|
|
420
428
|
self,
|
|
@@ -439,24 +447,18 @@ class DataLoader:
|
|
|
439
447
|
raise ValueError(
|
|
440
448
|
"Classifications must contain at least one prediction."
|
|
441
449
|
)
|
|
442
|
-
# update metadata
|
|
443
|
-
self._evaluator.n_datums += 1
|
|
444
|
-
self._evaluator.n_groundtruths += 1
|
|
445
|
-
self._evaluator.n_predictions += len(classification.predictions)
|
|
446
450
|
|
|
447
451
|
# update datum uid index
|
|
448
452
|
uid_index = self._add_datum(uid=classification.uid)
|
|
449
453
|
|
|
450
454
|
# cache labels and annotations
|
|
451
455
|
groundtruth = self._add_label(classification.groundtruth)
|
|
452
|
-
self.groundtruth_count[groundtruth][uid_index] += 1
|
|
453
456
|
|
|
454
457
|
predictions = list()
|
|
455
458
|
for plabel, pscore in zip(
|
|
456
459
|
classification.predictions, classification.scores
|
|
457
460
|
):
|
|
458
461
|
label_idx = self._add_label(plabel)
|
|
459
|
-
self.prediction_count[label_idx][uid_index] += 1
|
|
460
462
|
predictions.append(
|
|
461
463
|
(
|
|
462
464
|
label_idx,
|
|
@@ -464,13 +466,33 @@ class DataLoader:
|
|
|
464
466
|
)
|
|
465
467
|
)
|
|
466
468
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
)
|
|
469
|
+
pairs = list()
|
|
470
|
+
scores = np.array([score for _, score in predictions])
|
|
471
|
+
max_score_idx = np.argmax(scores)
|
|
472
|
+
|
|
473
|
+
for idx, (plabel, score) in enumerate(predictions):
|
|
474
|
+
pairs.append(
|
|
475
|
+
(
|
|
476
|
+
float(uid_index),
|
|
477
|
+
float(groundtruth),
|
|
478
|
+
float(plabel),
|
|
479
|
+
float(score),
|
|
480
|
+
float(max_score_idx == idx),
|
|
481
|
+
)
|
|
482
|
+
)
|
|
472
483
|
|
|
473
|
-
|
|
484
|
+
if self._detailed_pairs.size == 0:
|
|
485
|
+
self._detailed_pairs = np.array(pairs)
|
|
486
|
+
else:
|
|
487
|
+
self._detailed_pairs = np.concatenate(
|
|
488
|
+
[
|
|
489
|
+
self._detailed_pairs,
|
|
490
|
+
np.array(pairs),
|
|
491
|
+
],
|
|
492
|
+
axis=0,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
def finalize(self):
|
|
474
496
|
"""
|
|
475
497
|
Performs data finalization and some preprocessing steps.
|
|
476
498
|
|
|
@@ -479,63 +501,34 @@ class DataLoader:
|
|
|
479
501
|
Evaluator
|
|
480
502
|
A ready-to-use evaluator object.
|
|
481
503
|
"""
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
self._evaluator._label_metadata_per_datum = np.zeros(
|
|
492
|
-
(2, n_datums, n_labels), dtype=np.int32
|
|
504
|
+
if self._detailed_pairs.size == 0:
|
|
505
|
+
self._label_metadata = np.array([], dtype=np.int32)
|
|
506
|
+
warnings.warn("evaluator is empty")
|
|
507
|
+
return self
|
|
508
|
+
|
|
509
|
+
self._label_metadata = compute_label_metadata(
|
|
510
|
+
ids=self._detailed_pairs[:, :3].astype(np.int32),
|
|
511
|
+
n_labels=len(self.index_to_label),
|
|
493
512
|
)
|
|
494
|
-
for datum_idx in range(n_datums):
|
|
495
|
-
for label_idx in range(n_labels):
|
|
496
|
-
gt_count = (
|
|
497
|
-
self.groundtruth_count[label_idx].get(datum_idx, 0)
|
|
498
|
-
if label_idx in self.groundtruth_count
|
|
499
|
-
else 0
|
|
500
|
-
)
|
|
501
|
-
pd_count = (
|
|
502
|
-
self.prediction_count[label_idx].get(datum_idx, 0)
|
|
503
|
-
if label_idx in self.prediction_count
|
|
504
|
-
else 0
|
|
505
|
-
)
|
|
506
|
-
self._evaluator._label_metadata_per_datum[
|
|
507
|
-
:, datum_idx, label_idx
|
|
508
|
-
] = np.array([gt_count, pd_count])
|
|
509
|
-
|
|
510
|
-
self._evaluator._label_metadata = np.array(
|
|
511
|
-
[
|
|
512
|
-
[
|
|
513
|
-
np.sum(
|
|
514
|
-
self._evaluator._label_metadata_per_datum[
|
|
515
|
-
0, :, label_idx
|
|
516
|
-
]
|
|
517
|
-
),
|
|
518
|
-
np.sum(
|
|
519
|
-
self._evaluator._label_metadata_per_datum[
|
|
520
|
-
1, :, label_idx
|
|
521
|
-
]
|
|
522
|
-
),
|
|
523
|
-
]
|
|
524
|
-
for label_idx in range(n_labels)
|
|
525
|
-
],
|
|
526
|
-
dtype=np.int32,
|
|
527
|
-
)
|
|
528
|
-
|
|
529
|
-
# sort pairs by groundtruth, prediction, score
|
|
530
513
|
indices = np.lexsort(
|
|
531
514
|
(
|
|
532
|
-
self.
|
|
533
|
-
self.
|
|
534
|
-
-self.
|
|
515
|
+
self._detailed_pairs[:, 1], # ground truth
|
|
516
|
+
self._detailed_pairs[:, 2], # prediction
|
|
517
|
+
-self._detailed_pairs[:, 3], # score
|
|
535
518
|
)
|
|
536
519
|
)
|
|
537
|
-
self.
|
|
538
|
-
|
|
539
|
-
|
|
520
|
+
self._detailed_pairs = self._detailed_pairs[indices]
|
|
521
|
+
self._metadata = Metadata.create(
|
|
522
|
+
detailed_pairs=self._detailed_pairs,
|
|
523
|
+
number_of_datums=len(self.index_to_datum_id),
|
|
524
|
+
number_of_labels=len(self.index_to_label),
|
|
525
|
+
)
|
|
526
|
+
return self
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
class DataLoader(Evaluator):
|
|
530
|
+
"""
|
|
531
|
+
Used for backwards compatibility as the Evaluator now handles ingestion.
|
|
532
|
+
"""
|
|
540
533
|
|
|
541
|
-
|
|
534
|
+
pass
|
|
@@ -335,8 +335,8 @@ class Metric(BaseMetric):
|
|
|
335
335
|
The confusion matrix and related metrics for the classification task.
|
|
336
336
|
|
|
337
337
|
This class encapsulates detailed information about the model's performance, including correct
|
|
338
|
-
predictions, misclassifications
|
|
339
|
-
|
|
338
|
+
predictions, misclassifications and unmatched ground truths (subset of false negatives).
|
|
339
|
+
It provides counts and examples for each category to facilitate in-depth analysis.
|
|
340
340
|
|
|
341
341
|
Confusion Matrix Structure:
|
|
342
342
|
{
|
|
@@ -345,10 +345,8 @@ class Metric(BaseMetric):
|
|
|
345
345
|
'count': int,
|
|
346
346
|
'examples': [
|
|
347
347
|
{
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
351
|
-
'score': float,
|
|
348
|
+
"datum_id": str,
|
|
349
|
+
"score": float
|
|
352
350
|
},
|
|
353
351
|
...
|
|
354
352
|
],
|
|
@@ -364,8 +362,7 @@ class Metric(BaseMetric):
|
|
|
364
362
|
'count': int,
|
|
365
363
|
'examples': [
|
|
366
364
|
{
|
|
367
|
-
|
|
368
|
-
'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
|
|
365
|
+
"datum_id": str
|
|
369
366
|
},
|
|
370
367
|
...
|
|
371
368
|
],
|