valor-lite 0.33.7__py3-none-any.whl → 0.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,549 @@
1
+ from collections import defaultdict
2
+ from dataclasses import dataclass
3
+
4
+ import numpy as np
5
+ from numpy.typing import NDArray
6
+ from tqdm import tqdm
7
+ from valor_lite.semantic_segmentation.annotation import Segmentation
8
+ from valor_lite.semantic_segmentation.computation import (
9
+ compute_intermediate_confusion_matrices,
10
+ compute_metrics,
11
+ )
12
+ from valor_lite.semantic_segmentation.metric import (
13
+ F1,
14
+ Accuracy,
15
+ ConfusionMatrix,
16
+ IoU,
17
+ MetricType,
18
+ Precision,
19
+ Recall,
20
+ mIoU,
21
+ )
22
+
23
+ """
24
+ Usage
25
+ -----
26
+
27
+ manager = DataLoader()
28
+ manager.add_data(
29
+ groundtruths=groundtruths,
30
+ predictions=predictions,
31
+ )
32
+ evaluator = manager.finalize()
33
+
34
+ metrics = evaluator.evaluate()
35
+
36
+ f1_metrics = metrics[MetricType.F1]
37
+ accuracy_metrics = metrics[MetricType.Accuracy]
38
+
39
+ filter_mask = evaluator.create_filter(datum_uids=["uid1", "uid2"])
40
+ filtered_metrics = evaluator.evaluate(filter_mask=filter_mask)
41
+ """
42
+
43
+
44
+ @dataclass
45
+ class Filter:
46
+ indices: NDArray[np.int32]
47
+ label_metadata: NDArray[np.int32]
48
+ n_pixels: int
49
+
50
+
51
+ class Evaluator:
52
+ """
53
+ Segmentation Evaluator
54
+ """
55
+
56
+ def __init__(self):
57
+
58
+ # metadata
59
+ self.n_datums = 0
60
+ self.n_groundtruths = 0
61
+ self.n_predictions = 0
62
+ self.n_pixels = 0
63
+ self.n_groundtruth_pixels = 0
64
+ self.n_prediction_pixels = 0
65
+ self.n_labels = 0
66
+
67
+ # datum reference
68
+ self.uid_to_index: dict[str, int] = dict()
69
+ self.index_to_uid: dict[int, str] = dict()
70
+
71
+ # label reference
72
+ self.label_to_index: dict[str, int] = dict()
73
+ self.index_to_label: dict[int, str] = dict()
74
+
75
+ # computation caches
76
+ self._confusion_matrices = np.array([])
77
+ self._label_metadata = np.array([], dtype=np.int32)
78
+ self._label_metadata_per_datum = np.array([], dtype=np.int32)
79
+ self._n_pixels_per_datum = np.array([], dtype=np.int32)
80
+
81
+ @property
82
+ def ignored_prediction_labels(self) -> list[str]:
83
+ """
84
+ Prediction labels that are not present in the ground truth set.
85
+ """
86
+ glabels = set(np.where(self._label_metadata[:, 0] > 0)[0])
87
+ plabels = set(np.where(self._label_metadata[:, 1] > 0)[0])
88
+ return [
89
+ self.index_to_label[label_id] for label_id in (plabels - glabels)
90
+ ]
91
+
92
+ @property
93
+ def missing_prediction_labels(self) -> list[str]:
94
+ """
95
+ Ground truth labels that are not present in the prediction set.
96
+ """
97
+ glabels = set(np.where(self._label_metadata[:, 0] > 0)[0])
98
+ plabels = set(np.where(self._label_metadata[:, 1] > 0)[0])
99
+ return [
100
+ self.index_to_label[label_id] for label_id in (glabels - plabels)
101
+ ]
102
+
103
+ @property
104
+ def metadata(self) -> dict:
105
+ """
106
+ Evaluation metadata.
107
+ """
108
+ return {
109
+ "number_of_datums": self.n_datums,
110
+ "number_of_groundtruths": self.n_groundtruths,
111
+ "number_of_predictions": self.n_predictions,
112
+ "number_of_groundtruth_pixels": self.n_groundtruth_pixels,
113
+ "number_of_prediction_pixels": self.n_prediction_pixels,
114
+ "number_of_labels": self.n_labels,
115
+ "ignored_prediction_labels": self.ignored_prediction_labels,
116
+ "missing_prediction_labels": self.missing_prediction_labels,
117
+ }
118
+
119
+ def create_filter(
120
+ self,
121
+ datum_uids: list[str] | NDArray[np.int32] | None = None,
122
+ labels: list[str] | NDArray[np.int32] | None = None,
123
+ ) -> Filter:
124
+ """
125
+ Creates a boolean mask that can be passed to an evaluation.
126
+
127
+ Parameters
128
+ ----------
129
+ datum_uids : list[str] | NDArray[np.int32], optional
130
+ An optional list of string uids or a numpy array of uid indices.
131
+ labels : list[tuple[str, str]] | NDArray[np.int32], optional
132
+ An optional list of labels or a numpy array of label indices.
133
+
134
+ Returns
135
+ -------
136
+ Filter
137
+ A filter object that can be passed to the `evaluate` method.
138
+ """
139
+ n_datums = self._label_metadata_per_datum.shape[1]
140
+ n_labels = self._label_metadata_per_datum.shape[2]
141
+
142
+ mask_datums = np.ones(n_datums, dtype=np.bool_)
143
+ mask_labels = np.ones(n_labels, dtype=np.bool_)
144
+
145
+ if datum_uids is not None:
146
+ if isinstance(datum_uids, list):
147
+ datum_uids = np.array(
148
+ [self.uid_to_index[uid] for uid in datum_uids],
149
+ dtype=np.int32,
150
+ )
151
+ if datum_uids.size == 0:
152
+ mask_datums[mask_datums] = False
153
+ else:
154
+ mask = (
155
+ np.arange(n_datums).reshape(-1, 1)
156
+ == datum_uids.reshape(1, -1)
157
+ ).any(axis=1)
158
+ mask_datums[~mask] = False
159
+
160
+ if labels is not None:
161
+ if isinstance(labels, list):
162
+ labels = np.array(
163
+ [self.label_to_index[label] for label in labels],
164
+ dtype=np.int32,
165
+ )
166
+ if labels.size == 0:
167
+ mask_labels[mask_labels] = False
168
+ else:
169
+ mask = (
170
+ np.arange(n_labels).reshape(-1, 1) == labels.reshape(1, -1)
171
+ ).any(axis=1)
172
+ mask_labels[~mask] = False
173
+
174
+ mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
175
+ label_metadata_per_datum = self._label_metadata_per_datum.copy()
176
+ label_metadata_per_datum[:, ~mask] = 0
177
+
178
+ label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
179
+ label_metadata = np.transpose(
180
+ np.sum(
181
+ label_metadata_per_datum,
182
+ axis=1,
183
+ )
184
+ )
185
+ n_datums = int(np.sum(label_metadata[:, 0]))
186
+
187
+ return Filter(
188
+ indices=np.where(mask_datums)[0],
189
+ label_metadata=label_metadata,
190
+ n_pixels=self._n_pixels_per_datum[mask_datums].sum(),
191
+ )
192
+
193
+ def compute_precision_recall_iou(
194
+ self,
195
+ filter_: Filter | None = None,
196
+ as_dict: bool = False,
197
+ ) -> dict[MetricType, list]:
198
+ """
199
+ Performs an evaluation and returns metrics.
200
+
201
+ Parameters
202
+ ----------
203
+ filter_ : Filter, optional
204
+ An optional filter object.
205
+ as_dict : bool, default=False
206
+ An option to return metrics as dictionaries.
207
+
208
+ Returns
209
+ -------
210
+ dict[MetricType, list]
211
+ A dictionary mapping MetricType enumerations to lists of computed metrics.
212
+ """
213
+
214
+ # apply filters
215
+ data = self._confusion_matrices
216
+ label_metadata = self._label_metadata
217
+ n_pixels = self.n_pixels
218
+ if filter_ is not None:
219
+ data = data[filter_.indices]
220
+ label_metadata = filter_.label_metadata
221
+ n_pixels = filter_.n_pixels
222
+
223
+ (
224
+ precision,
225
+ recall,
226
+ f1_score,
227
+ accuracy,
228
+ ious,
229
+ hallucination_ratios,
230
+ missing_prediction_ratios,
231
+ ) = compute_metrics(
232
+ data=data,
233
+ label_metadata=label_metadata,
234
+ n_pixels=n_pixels,
235
+ )
236
+
237
+ metrics = defaultdict(list)
238
+
239
+ metrics[MetricType.Accuracy] = [
240
+ Accuracy(
241
+ value=float(accuracy),
242
+ )
243
+ ]
244
+
245
+ metrics[MetricType.ConfusionMatrix] = [
246
+ ConfusionMatrix(
247
+ confusion_matrix={
248
+ self.index_to_label[gt_label_idx]: {
249
+ self.index_to_label[pd_label_idx]: {
250
+ "iou": float(ious[gt_label_idx, pd_label_idx])
251
+ }
252
+ for pd_label_idx in range(self.n_labels)
253
+ if label_metadata[pd_label_idx, 0] > 0
254
+ }
255
+ for gt_label_idx in range(self.n_labels)
256
+ if label_metadata[gt_label_idx, 0] > 0
257
+ },
258
+ hallucinations={
259
+ self.index_to_label[pd_label_idx]: {
260
+ "ratio": float(hallucination_ratios[pd_label_idx])
261
+ }
262
+ for pd_label_idx in range(self.n_labels)
263
+ if label_metadata[pd_label_idx, 0] > 0
264
+ },
265
+ missing_predictions={
266
+ self.index_to_label[gt_label_idx]: {
267
+ "ratio": float(missing_prediction_ratios[gt_label_idx])
268
+ }
269
+ for gt_label_idx in range(self.n_labels)
270
+ if label_metadata[gt_label_idx, 0] > 0
271
+ },
272
+ )
273
+ ]
274
+
275
+ metrics[MetricType.mIoU] = [
276
+ mIoU(
277
+ value=float(ious.diagonal().mean()),
278
+ )
279
+ ]
280
+
281
+ for label_idx, label in self.index_to_label.items():
282
+
283
+ kwargs = {
284
+ "label": label,
285
+ }
286
+
287
+ # if no groundtruths exists for a label, skip it.
288
+ if label_metadata[label_idx, 0] == 0:
289
+ continue
290
+
291
+ metrics[MetricType.Precision].append(
292
+ Precision(
293
+ value=float(precision[label_idx]),
294
+ **kwargs,
295
+ )
296
+ )
297
+ metrics[MetricType.Recall].append(
298
+ Recall(
299
+ value=float(recall[label_idx]),
300
+ **kwargs,
301
+ )
302
+ )
303
+ metrics[MetricType.F1].append(
304
+ F1(
305
+ value=float(f1_score[label_idx]),
306
+ **kwargs,
307
+ )
308
+ )
309
+ metrics[MetricType.IoU].append(
310
+ IoU(
311
+ value=float(ious[label_idx, label_idx]),
312
+ **kwargs,
313
+ )
314
+ )
315
+
316
+ if as_dict:
317
+ return {
318
+ mtype: [metric.to_dict() for metric in mvalues]
319
+ for mtype, mvalues in metrics.items()
320
+ }
321
+
322
+ return metrics
323
+
324
+ def evaluate(
325
+ self,
326
+ filter_: Filter | None = None,
327
+ as_dict: bool = False,
328
+ ) -> dict[MetricType, list]:
329
+ """
330
+ Computes all available metrics.
331
+
332
+ Parameters
333
+ ----------
334
+ filter_ : Filter, optional
335
+ An optional filter object.
336
+ as_dict : bool, default=False
337
+ An option to return metrics as dictionaries.
338
+
339
+ Returns
340
+ -------
341
+ dict[MetricType, list]
342
+ A dictionary mapping metric type to lists of metrics.
343
+ """
344
+ return self.compute_precision_recall_iou(
345
+ filter_=filter_, as_dict=as_dict
346
+ )
347
+
348
+
349
+ class DataLoader:
350
+ """
351
+ Segmentation DataLoader.
352
+ """
353
+
354
+ def __init__(self):
355
+ self._evaluator = Evaluator()
356
+ self.groundtruth_count = defaultdict(lambda: defaultdict(int))
357
+ self.prediction_count = defaultdict(lambda: defaultdict(int))
358
+ self.matrices = list()
359
+ self.pixel_count = list()
360
+
361
+ def _add_datum(self, uid: str) -> int:
362
+ """
363
+ Helper function for adding a datum to the cache.
364
+
365
+ Parameters
366
+ ----------
367
+ uid : str
368
+ The datum uid.
369
+
370
+ Returns
371
+ -------
372
+ int
373
+ The datum index.
374
+ """
375
+ if uid in self._evaluator.uid_to_index:
376
+ raise ValueError(f"Datum with uid `{uid}` has already been added.")
377
+ index = len(self._evaluator.uid_to_index)
378
+ self._evaluator.uid_to_index[uid] = index
379
+ self._evaluator.index_to_uid[index] = uid
380
+ return index
381
+
382
+ def _add_label(self, label: str) -> int:
383
+ """
384
+ Helper function for adding a label to the cache.
385
+
386
+ Parameters
387
+ ----------
388
+ label : str
389
+ A string label.
390
+
391
+ Returns
392
+ -------
393
+ int
394
+ The label's index.
395
+ """
396
+ if label not in self._evaluator.label_to_index:
397
+ label_id = len(self._evaluator.index_to_label)
398
+ self._evaluator.label_to_index[label] = label_id
399
+ self._evaluator.index_to_label[label_id] = label
400
+ return self._evaluator.label_to_index[label]
401
+
402
+ def add_data(
403
+ self,
404
+ segmentations: list[Segmentation],
405
+ show_progress: bool = False,
406
+ ):
407
+ """
408
+ Adds segmentations to the cache.
409
+
410
+ Parameters
411
+ ----------
412
+ segmentations : list[Segmentation]
413
+ A list of Segmentation objects.
414
+ show_progress : bool, default=False
415
+ Toggle for tqdm progress bar.
416
+ """
417
+
418
+ disable_tqdm = not show_progress
419
+ for segmentation in tqdm(segmentations, disable=disable_tqdm):
420
+
421
+ # update metadata
422
+ self._evaluator.n_datums += 1
423
+ self._evaluator.n_groundtruths += len(segmentation.groundtruths)
424
+ self._evaluator.n_predictions += len(segmentation.predictions)
425
+ self._evaluator.n_pixels += segmentation.size
426
+ self._evaluator.n_groundtruth_pixels += segmentation.size * len(
427
+ segmentation.groundtruths
428
+ )
429
+ self._evaluator.n_prediction_pixels += segmentation.size * len(
430
+ segmentation.predictions
431
+ )
432
+
433
+ # update datum cache
434
+ uid_index = self._add_datum(segmentation.uid)
435
+
436
+ groundtruth_labels = np.full(
437
+ len(segmentation.groundtruths), fill_value=-1
438
+ )
439
+ for idx, groundtruth in enumerate(segmentation.groundtruths):
440
+ label_idx = self._add_label(groundtruth.label)
441
+ groundtruth_labels[idx] = label_idx
442
+ self.groundtruth_count[label_idx][
443
+ uid_index
444
+ ] += groundtruth.mask.sum()
445
+
446
+ prediction_labels = np.full(
447
+ len(segmentation.predictions), fill_value=-1
448
+ )
449
+ for idx, prediction in enumerate(segmentation.predictions):
450
+ label_idx = self._add_label(prediction.label)
451
+ prediction_labels[idx] = label_idx
452
+ self.prediction_count[label_idx][
453
+ uid_index
454
+ ] += prediction.mask.sum()
455
+
456
+ combined_groundtruths = np.stack(
457
+ [
458
+ groundtruth.mask.flatten()
459
+ for groundtruth in segmentation.groundtruths
460
+ ],
461
+ axis=0,
462
+ )
463
+ combined_predictions = np.stack(
464
+ [
465
+ prediction.mask.flatten()
466
+ for prediction in segmentation.predictions
467
+ ],
468
+ axis=0,
469
+ )
470
+
471
+ self.matrices.append(
472
+ compute_intermediate_confusion_matrices(
473
+ groundtruths=combined_groundtruths,
474
+ predictions=combined_predictions,
475
+ groundtruth_labels=groundtruth_labels,
476
+ prediction_labels=prediction_labels,
477
+ n_labels=len(self._evaluator.index_to_label),
478
+ )
479
+ )
480
+ self.pixel_count.append(segmentation.size)
481
+
482
+ def finalize(self) -> Evaluator:
483
+ """
484
+ Performs data finalization and some preprocessing steps.
485
+
486
+ Returns
487
+ -------
488
+ Evaluator
489
+ A ready-to-use evaluator object.
490
+ """
491
+
492
+ if len(self.matrices) == 0:
493
+ raise ValueError("No data available to create evaluator.")
494
+
495
+ n_datums = self._evaluator.n_datums
496
+ n_labels = len(self._evaluator.index_to_label)
497
+
498
+ self._evaluator.n_labels = n_labels
499
+
500
+ self._evaluator._label_metadata_per_datum = np.zeros(
501
+ (2, n_datums, n_labels), dtype=np.int32
502
+ )
503
+ for datum_idx in range(n_datums):
504
+ for label_idx in range(n_labels):
505
+ gt_count = (
506
+ self.groundtruth_count[label_idx].get(datum_idx, 0)
507
+ if label_idx in self.groundtruth_count
508
+ else 0
509
+ )
510
+ pd_count = (
511
+ self.prediction_count[label_idx].get(datum_idx, 0)
512
+ if label_idx in self.prediction_count
513
+ else 0
514
+ )
515
+ self._evaluator._label_metadata_per_datum[
516
+ :, datum_idx, label_idx
517
+ ] = np.array([gt_count, pd_count])
518
+
519
+ self._evaluator._label_metadata = np.array(
520
+ [
521
+ [
522
+ np.sum(
523
+ self._evaluator._label_metadata_per_datum[
524
+ 0, :, label_idx
525
+ ]
526
+ ),
527
+ np.sum(
528
+ self._evaluator._label_metadata_per_datum[
529
+ 1, :, label_idx
530
+ ]
531
+ ),
532
+ ]
533
+ for label_idx in range(n_labels)
534
+ ],
535
+ dtype=np.int32,
536
+ )
537
+
538
+ self._evaluator._n_pixels_per_datum = np.array(
539
+ self.pixel_count, dtype=np.int32
540
+ )
541
+
542
+ self._evaluator._confusion_matrices = np.zeros(
543
+ (n_datums, n_labels + 1, n_labels + 1), dtype=np.int32
544
+ )
545
+ for idx, matrix in enumerate(self.matrices):
546
+ h, w = matrix.shape
547
+ self._evaluator._confusion_matrices[idx, :h, :w] = matrix
548
+
549
+ return self._evaluator