valor-lite 0.33.8__py3-none-any.whl → 0.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/LICENSE +21 -0
- valor_lite/classification/annotation.py +24 -0
- valor_lite/classification/manager.py +189 -217
- valor_lite/classification/metric.py +266 -27
- valor_lite/{detection → object_detection}/annotation.py +144 -3
- valor_lite/{detection → object_detection}/computation.py +33 -9
- valor_lite/{detection → object_detection}/manager.py +289 -368
- valor_lite/object_detection/metric.py +795 -0
- valor_lite/semantic_segmentation/annotation.py +96 -0
- valor_lite/{segmentation → semantic_segmentation}/manager.py +33 -16
- valor_lite/semantic_segmentation/metric.py +278 -0
- valor_lite/text_generation/__init__.py +0 -0
- valor_lite-0.33.10.dist-info/METADATA +179 -0
- valor_lite-0.33.10.dist-info/RECORD +24 -0
- valor_lite/detection/metric.py +0 -380
- valor_lite/segmentation/annotation.py +0 -49
- valor_lite/segmentation/metric.py +0 -119
- valor_lite-0.33.8.dist-info/METADATA +0 -41
- valor_lite-0.33.8.dist-info/RECORD +0 -22
- /valor_lite/{detection → object_detection}/__init__.py +0 -0
- /valor_lite/{segmentation → semantic_segmentation}/__init__.py +0 -0
- /valor_lite/{segmentation → semantic_segmentation}/computation.py +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.10.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.10.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.10.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,18 @@
|
|
|
1
|
-
import warnings
|
|
2
1
|
from collections import defaultdict
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from typing import Type
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
|
-
import valor_lite.
|
|
6
|
+
import valor_lite.object_detection.annotation as annotation
|
|
8
7
|
from numpy.typing import NDArray
|
|
9
8
|
from tqdm import tqdm
|
|
10
|
-
from valor_lite.
|
|
9
|
+
from valor_lite.object_detection.annotation import (
|
|
11
10
|
Bitmask,
|
|
12
11
|
BoundingBox,
|
|
13
12
|
Detection,
|
|
14
13
|
Polygon,
|
|
15
14
|
)
|
|
16
|
-
from valor_lite.
|
|
15
|
+
from valor_lite.object_detection.computation import (
|
|
17
16
|
compute_bbox_iou,
|
|
18
17
|
compute_bitmask_iou,
|
|
19
18
|
compute_confusion_matrix,
|
|
@@ -21,7 +20,7 @@ from valor_lite.detection.computation import (
|
|
|
21
20
|
compute_polygon_iou,
|
|
22
21
|
compute_ranked_pairs,
|
|
23
22
|
)
|
|
24
|
-
from valor_lite.
|
|
23
|
+
from valor_lite.object_detection.metric import (
|
|
25
24
|
AP,
|
|
26
25
|
AR,
|
|
27
26
|
F1,
|
|
@@ -213,229 +212,6 @@ class Evaluator:
|
|
|
213
212
|
label_metadata=label_metadata,
|
|
214
213
|
)
|
|
215
214
|
|
|
216
|
-
def evaluate(
|
|
217
|
-
self,
|
|
218
|
-
metrics_to_return: list[MetricType] = MetricType.base_metrics(),
|
|
219
|
-
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
220
|
-
score_thresholds: list[float] = [0.5],
|
|
221
|
-
number_of_examples: int = 0,
|
|
222
|
-
filter_: Filter | None = None,
|
|
223
|
-
as_dict: bool = False,
|
|
224
|
-
) -> dict[MetricType, list]:
|
|
225
|
-
"""
|
|
226
|
-
Performs an evaluation and returns metrics.
|
|
227
|
-
|
|
228
|
-
Parameters
|
|
229
|
-
----------
|
|
230
|
-
metrics_to_return : list[MetricType]
|
|
231
|
-
A list of metrics to return in the results.
|
|
232
|
-
iou_thresholds : list[float]
|
|
233
|
-
A list of IoU thresholds to compute metrics over.
|
|
234
|
-
score_thresholds : list[float]
|
|
235
|
-
A list of score thresholds to compute metrics over.
|
|
236
|
-
number_of_examples : int, default=0
|
|
237
|
-
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
238
|
-
filter_ : Filter, optional
|
|
239
|
-
An optional filter object.
|
|
240
|
-
as_dict : bool, default=False
|
|
241
|
-
An option to return metrics as dictionaries.
|
|
242
|
-
|
|
243
|
-
Returns
|
|
244
|
-
-------
|
|
245
|
-
dict[MetricType, list]
|
|
246
|
-
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
247
|
-
"""
|
|
248
|
-
|
|
249
|
-
ranked_pairs = self._ranked_pairs
|
|
250
|
-
detailed_pairs = self._detailed_pairs
|
|
251
|
-
label_metadata = self._label_metadata
|
|
252
|
-
if filter_ is not None:
|
|
253
|
-
ranked_pairs = ranked_pairs[filter_.ranked_indices]
|
|
254
|
-
detailed_pairs = detailed_pairs[filter_.detailed_indices]
|
|
255
|
-
label_metadata = filter_.label_metadata
|
|
256
|
-
|
|
257
|
-
(
|
|
258
|
-
(
|
|
259
|
-
average_precision,
|
|
260
|
-
mean_average_precision,
|
|
261
|
-
average_precision_average_over_ious,
|
|
262
|
-
mean_average_precision_average_over_ious,
|
|
263
|
-
),
|
|
264
|
-
(
|
|
265
|
-
average_recall,
|
|
266
|
-
mean_average_recall,
|
|
267
|
-
average_recall_averaged_over_scores,
|
|
268
|
-
mean_average_recall_averaged_over_scores,
|
|
269
|
-
),
|
|
270
|
-
precision_recall,
|
|
271
|
-
pr_curves,
|
|
272
|
-
) = compute_metrics(
|
|
273
|
-
data=ranked_pairs,
|
|
274
|
-
label_metadata=label_metadata,
|
|
275
|
-
iou_thresholds=np.array(iou_thresholds),
|
|
276
|
-
score_thresholds=np.array(score_thresholds),
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
metrics = defaultdict(list)
|
|
280
|
-
|
|
281
|
-
metrics[MetricType.AP] = [
|
|
282
|
-
AP(
|
|
283
|
-
value=average_precision[iou_idx][label_idx],
|
|
284
|
-
iou_threshold=iou_thresholds[iou_idx],
|
|
285
|
-
label=self.index_to_label[label_idx],
|
|
286
|
-
)
|
|
287
|
-
for iou_idx in range(average_precision.shape[0])
|
|
288
|
-
for label_idx in range(average_precision.shape[1])
|
|
289
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
290
|
-
]
|
|
291
|
-
|
|
292
|
-
metrics[MetricType.mAP] = [
|
|
293
|
-
mAP(
|
|
294
|
-
value=mean_average_precision[iou_idx],
|
|
295
|
-
iou_threshold=iou_thresholds[iou_idx],
|
|
296
|
-
)
|
|
297
|
-
for iou_idx in range(mean_average_precision.shape[0])
|
|
298
|
-
]
|
|
299
|
-
|
|
300
|
-
metrics[MetricType.APAveragedOverIOUs] = [
|
|
301
|
-
APAveragedOverIOUs(
|
|
302
|
-
value=average_precision_average_over_ious[label_idx],
|
|
303
|
-
iou_thresholds=iou_thresholds,
|
|
304
|
-
label=self.index_to_label[label_idx],
|
|
305
|
-
)
|
|
306
|
-
for label_idx in range(self.n_labels)
|
|
307
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
308
|
-
]
|
|
309
|
-
|
|
310
|
-
metrics[MetricType.mAPAveragedOverIOUs] = [
|
|
311
|
-
mAPAveragedOverIOUs(
|
|
312
|
-
value=mean_average_precision_average_over_ious,
|
|
313
|
-
iou_thresholds=iou_thresholds,
|
|
314
|
-
)
|
|
315
|
-
]
|
|
316
|
-
|
|
317
|
-
metrics[MetricType.AR] = [
|
|
318
|
-
AR(
|
|
319
|
-
value=average_recall[score_idx][label_idx],
|
|
320
|
-
iou_thresholds=iou_thresholds,
|
|
321
|
-
score_threshold=score_thresholds[score_idx],
|
|
322
|
-
label=self.index_to_label[label_idx],
|
|
323
|
-
)
|
|
324
|
-
for score_idx in range(average_recall.shape[0])
|
|
325
|
-
for label_idx in range(average_recall.shape[1])
|
|
326
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
327
|
-
]
|
|
328
|
-
|
|
329
|
-
metrics[MetricType.mAR] = [
|
|
330
|
-
mAR(
|
|
331
|
-
value=mean_average_recall[score_idx],
|
|
332
|
-
iou_thresholds=iou_thresholds,
|
|
333
|
-
score_threshold=score_thresholds[score_idx],
|
|
334
|
-
)
|
|
335
|
-
for score_idx in range(mean_average_recall.shape[0])
|
|
336
|
-
]
|
|
337
|
-
|
|
338
|
-
metrics[MetricType.ARAveragedOverScores] = [
|
|
339
|
-
ARAveragedOverScores(
|
|
340
|
-
value=average_recall_averaged_over_scores[label_idx],
|
|
341
|
-
score_thresholds=score_thresholds,
|
|
342
|
-
iou_thresholds=iou_thresholds,
|
|
343
|
-
label=self.index_to_label[label_idx],
|
|
344
|
-
)
|
|
345
|
-
for label_idx in range(self.n_labels)
|
|
346
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
347
|
-
]
|
|
348
|
-
|
|
349
|
-
metrics[MetricType.mARAveragedOverScores] = [
|
|
350
|
-
mARAveragedOverScores(
|
|
351
|
-
value=mean_average_recall_averaged_over_scores,
|
|
352
|
-
score_thresholds=score_thresholds,
|
|
353
|
-
iou_thresholds=iou_thresholds,
|
|
354
|
-
)
|
|
355
|
-
]
|
|
356
|
-
|
|
357
|
-
metrics[MetricType.PrecisionRecallCurve] = [
|
|
358
|
-
PrecisionRecallCurve(
|
|
359
|
-
precision=list(pr_curves[iou_idx][label_idx]),
|
|
360
|
-
iou_threshold=iou_threshold,
|
|
361
|
-
label=label,
|
|
362
|
-
)
|
|
363
|
-
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
364
|
-
for label_idx, label in self.index_to_label.items()
|
|
365
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
366
|
-
]
|
|
367
|
-
|
|
368
|
-
for label_idx, label in self.index_to_label.items():
|
|
369
|
-
|
|
370
|
-
if label_metadata[label_idx, 0] == 0:
|
|
371
|
-
continue
|
|
372
|
-
|
|
373
|
-
for score_idx, score_threshold in enumerate(score_thresholds):
|
|
374
|
-
for iou_idx, iou_threshold in enumerate(iou_thresholds):
|
|
375
|
-
|
|
376
|
-
row = precision_recall[iou_idx][score_idx][label_idx]
|
|
377
|
-
kwargs = {
|
|
378
|
-
"label": label,
|
|
379
|
-
"iou_threshold": iou_threshold,
|
|
380
|
-
"score_threshold": score_threshold,
|
|
381
|
-
}
|
|
382
|
-
metrics[MetricType.Counts].append(
|
|
383
|
-
Counts(
|
|
384
|
-
tp=int(row[0]),
|
|
385
|
-
fp=int(row[1]),
|
|
386
|
-
fn=int(row[2]),
|
|
387
|
-
**kwargs,
|
|
388
|
-
)
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
metrics[MetricType.Precision].append(
|
|
392
|
-
Precision(
|
|
393
|
-
value=row[3],
|
|
394
|
-
**kwargs,
|
|
395
|
-
)
|
|
396
|
-
)
|
|
397
|
-
metrics[MetricType.Recall].append(
|
|
398
|
-
Recall(
|
|
399
|
-
value=row[4],
|
|
400
|
-
**kwargs,
|
|
401
|
-
)
|
|
402
|
-
)
|
|
403
|
-
metrics[MetricType.F1].append(
|
|
404
|
-
F1(
|
|
405
|
-
value=row[5],
|
|
406
|
-
**kwargs,
|
|
407
|
-
)
|
|
408
|
-
)
|
|
409
|
-
metrics[MetricType.Accuracy].append(
|
|
410
|
-
Accuracy(
|
|
411
|
-
value=row[6],
|
|
412
|
-
**kwargs,
|
|
413
|
-
)
|
|
414
|
-
)
|
|
415
|
-
|
|
416
|
-
if MetricType.ConfusionMatrix in metrics_to_return:
|
|
417
|
-
metrics[
|
|
418
|
-
MetricType.ConfusionMatrix
|
|
419
|
-
] = self._compute_confusion_matrix(
|
|
420
|
-
data=detailed_pairs,
|
|
421
|
-
label_metadata=label_metadata,
|
|
422
|
-
iou_thresholds=iou_thresholds,
|
|
423
|
-
score_thresholds=score_thresholds,
|
|
424
|
-
number_of_examples=number_of_examples,
|
|
425
|
-
)
|
|
426
|
-
|
|
427
|
-
for metric in set(metrics.keys()):
|
|
428
|
-
if metric not in metrics_to_return:
|
|
429
|
-
del metrics[metric]
|
|
430
|
-
|
|
431
|
-
if as_dict:
|
|
432
|
-
return {
|
|
433
|
-
mtype: [metric.to_dict() for metric in mvalues]
|
|
434
|
-
for mtype, mvalues in metrics.items()
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
return metrics
|
|
438
|
-
|
|
439
215
|
def _convert_example_to_dict(
|
|
440
216
|
self, box: NDArray[np.float16]
|
|
441
217
|
) -> dict[str, float]:
|
|
@@ -443,10 +219,10 @@ class Evaluator:
|
|
|
443
219
|
Converts a cached bounding box example to dictionary format.
|
|
444
220
|
"""
|
|
445
221
|
return {
|
|
446
|
-
"xmin": box[0],
|
|
447
|
-
"xmax": box[1],
|
|
448
|
-
"ymin": box[2],
|
|
449
|
-
"ymax": box[3],
|
|
222
|
+
"xmin": float(box[0]),
|
|
223
|
+
"xmax": float(box[1]),
|
|
224
|
+
"ymin": float(box[2]),
|
|
225
|
+
"ymax": float(box[3]),
|
|
450
226
|
}
|
|
451
227
|
|
|
452
228
|
def _unpack_confusion_matrix(
|
|
@@ -464,7 +240,7 @@ class Evaluator:
|
|
|
464
240
|
| list[
|
|
465
241
|
dict[
|
|
466
242
|
str,
|
|
467
|
-
str |
|
|
243
|
+
str | dict[str, float] | float,
|
|
468
244
|
]
|
|
469
245
|
],
|
|
470
246
|
],
|
|
@@ -684,37 +460,248 @@ class Evaluator:
|
|
|
684
460
|
for gt_label_idx in range(number_of_labels)
|
|
685
461
|
}
|
|
686
462
|
|
|
687
|
-
def
|
|
463
|
+
def compute_precision_recall(
|
|
688
464
|
self,
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
) -> list[ConfusionMatrix]:
|
|
465
|
+
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
466
|
+
score_thresholds: list[float] = [0.5],
|
|
467
|
+
filter_: Filter | None = None,
|
|
468
|
+
as_dict: bool = False,
|
|
469
|
+
) -> dict[MetricType, list]:
|
|
695
470
|
"""
|
|
696
|
-
Computes
|
|
471
|
+
Computes all metrics except for ConfusionMatrix
|
|
697
472
|
|
|
698
473
|
Parameters
|
|
699
474
|
----------
|
|
700
|
-
data : NDArray[np.float64]
|
|
701
|
-
An array containing detailed pairs of detections.
|
|
702
|
-
label_metadata : NDArray[np.int32]
|
|
703
|
-
An array containing label metadata.
|
|
704
475
|
iou_thresholds : list[float]
|
|
705
|
-
|
|
476
|
+
A list of IoU thresholds to compute metrics over.
|
|
706
477
|
score_thresholds : list[float]
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
478
|
+
A list of score thresholds to compute metrics over.
|
|
479
|
+
filter_ : Filter, optional
|
|
480
|
+
An optional filter object.
|
|
481
|
+
as_dict : bool, default=False
|
|
482
|
+
An option to return metrics as dictionaries.
|
|
710
483
|
|
|
711
484
|
Returns
|
|
712
485
|
-------
|
|
713
|
-
|
|
714
|
-
|
|
486
|
+
dict[MetricType, list]
|
|
487
|
+
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
715
488
|
"""
|
|
716
489
|
|
|
717
|
-
|
|
490
|
+
ranked_pairs = self._ranked_pairs
|
|
491
|
+
label_metadata = self._label_metadata
|
|
492
|
+
if filter_ is not None:
|
|
493
|
+
ranked_pairs = ranked_pairs[filter_.ranked_indices]
|
|
494
|
+
label_metadata = filter_.label_metadata
|
|
495
|
+
|
|
496
|
+
(
|
|
497
|
+
(
|
|
498
|
+
average_precision,
|
|
499
|
+
mean_average_precision,
|
|
500
|
+
average_precision_average_over_ious,
|
|
501
|
+
mean_average_precision_average_over_ious,
|
|
502
|
+
),
|
|
503
|
+
(
|
|
504
|
+
average_recall,
|
|
505
|
+
mean_average_recall,
|
|
506
|
+
average_recall_averaged_over_scores,
|
|
507
|
+
mean_average_recall_averaged_over_scores,
|
|
508
|
+
),
|
|
509
|
+
precision_recall,
|
|
510
|
+
pr_curves,
|
|
511
|
+
) = compute_metrics(
|
|
512
|
+
data=ranked_pairs,
|
|
513
|
+
label_metadata=label_metadata,
|
|
514
|
+
iou_thresholds=np.array(iou_thresholds),
|
|
515
|
+
score_thresholds=np.array(score_thresholds),
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
metrics = defaultdict(list)
|
|
519
|
+
|
|
520
|
+
metrics[MetricType.AP] = [
|
|
521
|
+
AP(
|
|
522
|
+
value=float(average_precision[iou_idx][label_idx]),
|
|
523
|
+
iou_threshold=iou_thresholds[iou_idx],
|
|
524
|
+
label=self.index_to_label[label_idx],
|
|
525
|
+
)
|
|
526
|
+
for iou_idx in range(average_precision.shape[0])
|
|
527
|
+
for label_idx in range(average_precision.shape[1])
|
|
528
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
529
|
+
]
|
|
530
|
+
|
|
531
|
+
metrics[MetricType.mAP] = [
|
|
532
|
+
mAP(
|
|
533
|
+
value=float(mean_average_precision[iou_idx]),
|
|
534
|
+
iou_threshold=iou_thresholds[iou_idx],
|
|
535
|
+
)
|
|
536
|
+
for iou_idx in range(mean_average_precision.shape[0])
|
|
537
|
+
]
|
|
538
|
+
|
|
539
|
+
metrics[MetricType.APAveragedOverIOUs] = [
|
|
540
|
+
APAveragedOverIOUs(
|
|
541
|
+
value=float(average_precision_average_over_ious[label_idx]),
|
|
542
|
+
iou_thresholds=iou_thresholds,
|
|
543
|
+
label=self.index_to_label[label_idx],
|
|
544
|
+
)
|
|
545
|
+
for label_idx in range(self.n_labels)
|
|
546
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
547
|
+
]
|
|
548
|
+
|
|
549
|
+
metrics[MetricType.mAPAveragedOverIOUs] = [
|
|
550
|
+
mAPAveragedOverIOUs(
|
|
551
|
+
value=float(mean_average_precision_average_over_ious),
|
|
552
|
+
iou_thresholds=iou_thresholds,
|
|
553
|
+
)
|
|
554
|
+
]
|
|
555
|
+
|
|
556
|
+
metrics[MetricType.AR] = [
|
|
557
|
+
AR(
|
|
558
|
+
value=float(average_recall[score_idx][label_idx]),
|
|
559
|
+
iou_thresholds=iou_thresholds,
|
|
560
|
+
score_threshold=score_thresholds[score_idx],
|
|
561
|
+
label=self.index_to_label[label_idx],
|
|
562
|
+
)
|
|
563
|
+
for score_idx in range(average_recall.shape[0])
|
|
564
|
+
for label_idx in range(average_recall.shape[1])
|
|
565
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
566
|
+
]
|
|
567
|
+
|
|
568
|
+
metrics[MetricType.mAR] = [
|
|
569
|
+
mAR(
|
|
570
|
+
value=float(mean_average_recall[score_idx]),
|
|
571
|
+
iou_thresholds=iou_thresholds,
|
|
572
|
+
score_threshold=score_thresholds[score_idx],
|
|
573
|
+
)
|
|
574
|
+
for score_idx in range(mean_average_recall.shape[0])
|
|
575
|
+
]
|
|
576
|
+
|
|
577
|
+
metrics[MetricType.ARAveragedOverScores] = [
|
|
578
|
+
ARAveragedOverScores(
|
|
579
|
+
value=float(average_recall_averaged_over_scores[label_idx]),
|
|
580
|
+
score_thresholds=score_thresholds,
|
|
581
|
+
iou_thresholds=iou_thresholds,
|
|
582
|
+
label=self.index_to_label[label_idx],
|
|
583
|
+
)
|
|
584
|
+
for label_idx in range(self.n_labels)
|
|
585
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
586
|
+
]
|
|
587
|
+
|
|
588
|
+
metrics[MetricType.mARAveragedOverScores] = [
|
|
589
|
+
mARAveragedOverScores(
|
|
590
|
+
value=float(mean_average_recall_averaged_over_scores),
|
|
591
|
+
score_thresholds=score_thresholds,
|
|
592
|
+
iou_thresholds=iou_thresholds,
|
|
593
|
+
)
|
|
594
|
+
]
|
|
595
|
+
|
|
596
|
+
metrics[MetricType.PrecisionRecallCurve] = [
|
|
597
|
+
PrecisionRecallCurve(
|
|
598
|
+
precisions=pr_curves[iou_idx, label_idx, :, 0]
|
|
599
|
+
.astype(float)
|
|
600
|
+
.tolist(),
|
|
601
|
+
scores=pr_curves[iou_idx, label_idx, :, 1]
|
|
602
|
+
.astype(float)
|
|
603
|
+
.tolist(),
|
|
604
|
+
iou_threshold=iou_threshold,
|
|
605
|
+
label=label,
|
|
606
|
+
)
|
|
607
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
608
|
+
for label_idx, label in self.index_to_label.items()
|
|
609
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
610
|
+
]
|
|
611
|
+
|
|
612
|
+
for label_idx, label in self.index_to_label.items():
|
|
613
|
+
|
|
614
|
+
if label_metadata[label_idx, 0] == 0:
|
|
615
|
+
continue
|
|
616
|
+
|
|
617
|
+
for score_idx, score_threshold in enumerate(score_thresholds):
|
|
618
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds):
|
|
619
|
+
|
|
620
|
+
row = precision_recall[iou_idx][score_idx][label_idx]
|
|
621
|
+
kwargs = {
|
|
622
|
+
"label": label,
|
|
623
|
+
"iou_threshold": iou_threshold,
|
|
624
|
+
"score_threshold": score_threshold,
|
|
625
|
+
}
|
|
626
|
+
metrics[MetricType.Counts].append(
|
|
627
|
+
Counts(
|
|
628
|
+
tp=int(row[0]),
|
|
629
|
+
fp=int(row[1]),
|
|
630
|
+
fn=int(row[2]),
|
|
631
|
+
**kwargs,
|
|
632
|
+
)
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
metrics[MetricType.Precision].append(
|
|
636
|
+
Precision(
|
|
637
|
+
value=float(row[3]),
|
|
638
|
+
**kwargs,
|
|
639
|
+
)
|
|
640
|
+
)
|
|
641
|
+
metrics[MetricType.Recall].append(
|
|
642
|
+
Recall(
|
|
643
|
+
value=float(row[4]),
|
|
644
|
+
**kwargs,
|
|
645
|
+
)
|
|
646
|
+
)
|
|
647
|
+
metrics[MetricType.F1].append(
|
|
648
|
+
F1(
|
|
649
|
+
value=float(row[5]),
|
|
650
|
+
**kwargs,
|
|
651
|
+
)
|
|
652
|
+
)
|
|
653
|
+
metrics[MetricType.Accuracy].append(
|
|
654
|
+
Accuracy(
|
|
655
|
+
value=float(row[6]),
|
|
656
|
+
**kwargs,
|
|
657
|
+
)
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
if as_dict:
|
|
661
|
+
return {
|
|
662
|
+
mtype: [metric.to_dict() for metric in mvalues]
|
|
663
|
+
for mtype, mvalues in metrics.items()
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
return metrics
|
|
667
|
+
|
|
668
|
+
def compute_confusion_matrix(
|
|
669
|
+
self,
|
|
670
|
+
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
671
|
+
score_thresholds: list[float] = [0.5],
|
|
672
|
+
number_of_examples: int = 0,
|
|
673
|
+
filter_: Filter | None = None,
|
|
674
|
+
as_dict: bool = False,
|
|
675
|
+
) -> list:
|
|
676
|
+
"""
|
|
677
|
+
Computes confusion matrices at various thresholds.
|
|
678
|
+
|
|
679
|
+
Parameters
|
|
680
|
+
----------
|
|
681
|
+
iou_thresholds : list[float]
|
|
682
|
+
A list of IoU thresholds to compute metrics over.
|
|
683
|
+
score_thresholds : list[float]
|
|
684
|
+
A list of score thresholds to compute metrics over.
|
|
685
|
+
number_of_examples : int, default=0
|
|
686
|
+
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
687
|
+
filter_ : Filter, optional
|
|
688
|
+
An optional filter object.
|
|
689
|
+
as_dict : bool, default=False
|
|
690
|
+
An option to return metrics as dictionaries.
|
|
691
|
+
|
|
692
|
+
Returns
|
|
693
|
+
-------
|
|
694
|
+
list[ConfusionMatrix] | list[dict]
|
|
695
|
+
List of confusion matrices per threshold pair.
|
|
696
|
+
"""
|
|
697
|
+
|
|
698
|
+
detailed_pairs = self._detailed_pairs
|
|
699
|
+
label_metadata = self._label_metadata
|
|
700
|
+
if filter_ is not None:
|
|
701
|
+
detailed_pairs = detailed_pairs[filter_.detailed_indices]
|
|
702
|
+
label_metadata = filter_.label_metadata
|
|
703
|
+
|
|
704
|
+
if detailed_pairs.size == 0:
|
|
718
705
|
return list()
|
|
719
706
|
|
|
720
707
|
(
|
|
@@ -722,7 +709,7 @@ class Evaluator:
|
|
|
722
709
|
hallucinations,
|
|
723
710
|
missing_predictions,
|
|
724
711
|
) = compute_confusion_matrix(
|
|
725
|
-
data=
|
|
712
|
+
data=detailed_pairs,
|
|
726
713
|
label_metadata=label_metadata,
|
|
727
714
|
iou_thresholds=np.array(iou_thresholds),
|
|
728
715
|
score_thresholds=np.array(score_thresholds),
|
|
@@ -730,7 +717,7 @@ class Evaluator:
|
|
|
730
717
|
)
|
|
731
718
|
|
|
732
719
|
n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
|
|
733
|
-
|
|
720
|
+
matrices = [
|
|
734
721
|
ConfusionMatrix(
|
|
735
722
|
iou_threshold=iou_thresholds[iou_idx],
|
|
736
723
|
score_threshold=score_thresholds[score_idx],
|
|
@@ -759,6 +746,54 @@ class Evaluator:
|
|
|
759
746
|
for score_idx in range(n_scores)
|
|
760
747
|
]
|
|
761
748
|
|
|
749
|
+
if as_dict:
|
|
750
|
+
return [m.to_dict() for m in matrices]
|
|
751
|
+
return matrices
|
|
752
|
+
|
|
753
|
+
def evaluate(
|
|
754
|
+
self,
|
|
755
|
+
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
756
|
+
score_thresholds: list[float] = [0.5],
|
|
757
|
+
number_of_examples: int = 0,
|
|
758
|
+
filter_: Filter | None = None,
|
|
759
|
+
as_dict: bool = False,
|
|
760
|
+
) -> dict[MetricType, list]:
|
|
761
|
+
"""
|
|
762
|
+
Computes all avaiable metrics.
|
|
763
|
+
|
|
764
|
+
Parameters
|
|
765
|
+
----------
|
|
766
|
+
iou_thresholds : list[float]
|
|
767
|
+
A list of IoU thresholds to compute metrics over.
|
|
768
|
+
score_thresholds : list[float]
|
|
769
|
+
A list of score thresholds to compute metrics over.
|
|
770
|
+
number_of_examples : int, default=0
|
|
771
|
+
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
772
|
+
filter_ : Filter, optional
|
|
773
|
+
An optional filter object.
|
|
774
|
+
as_dict : bool, default=False
|
|
775
|
+
An option to return metrics as dictionaries.
|
|
776
|
+
|
|
777
|
+
Returns
|
|
778
|
+
-------
|
|
779
|
+
dict[MetricType, list]
|
|
780
|
+
A dictionary mapping metric type to a list of metrics.
|
|
781
|
+
"""
|
|
782
|
+
results = self.compute_precision_recall(
|
|
783
|
+
iou_thresholds=iou_thresholds,
|
|
784
|
+
score_thresholds=score_thresholds,
|
|
785
|
+
filter_=filter_,
|
|
786
|
+
as_dict=as_dict,
|
|
787
|
+
)
|
|
788
|
+
results[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
|
|
789
|
+
iou_thresholds=iou_thresholds,
|
|
790
|
+
score_thresholds=score_thresholds,
|
|
791
|
+
number_of_examples=number_of_examples,
|
|
792
|
+
filter_=filter_,
|
|
793
|
+
as_dict=as_dict,
|
|
794
|
+
)
|
|
795
|
+
return results
|
|
796
|
+
|
|
762
797
|
|
|
763
798
|
class DataLoader:
|
|
764
799
|
"""
|
|
@@ -1070,120 +1105,6 @@ class DataLoader:
|
|
|
1070
1105
|
annotation_type=Bitmask,
|
|
1071
1106
|
)
|
|
1072
1107
|
|
|
1073
|
-
def add_bounding_boxes_from_valor_dict(
|
|
1074
|
-
self,
|
|
1075
|
-
detections: list[tuple[dict, dict]],
|
|
1076
|
-
show_progress: bool = False,
|
|
1077
|
-
):
|
|
1078
|
-
"""
|
|
1079
|
-
Adds Valor-format detections to the cache.
|
|
1080
|
-
|
|
1081
|
-
Parameters
|
|
1082
|
-
----------
|
|
1083
|
-
detections : list[tuple[dict, dict]]
|
|
1084
|
-
A list of groundtruth, prediction pairs in Valor-format dictionaries.
|
|
1085
|
-
annotation_type : type[Bitmask] | type[BoundingBox] | type[Polygon]
|
|
1086
|
-
The annotation type to process.
|
|
1087
|
-
show_progress : bool, default=False
|
|
1088
|
-
Toggle for tqdm progress bar.
|
|
1089
|
-
"""
|
|
1090
|
-
warnings.warn(
|
|
1091
|
-
"The `...from_valor_dict` functions are deprecated and will be deleted in the near future. Use `add_bounding_boxes`, `add_bitmasks`, or `add_polygons` instead.",
|
|
1092
|
-
DeprecationWarning,
|
|
1093
|
-
)
|
|
1094
|
-
|
|
1095
|
-
def _get_bbox_extrema(
|
|
1096
|
-
data: list,
|
|
1097
|
-
) -> tuple[float, float, float, float]:
|
|
1098
|
-
"""Get the correct representation of an annotation object from a valor dictionary."""
|
|
1099
|
-
x = [point[0] for shape in data for point in shape]
|
|
1100
|
-
y = [point[1] for shape in data for point in shape]
|
|
1101
|
-
return (min(x), max(x), min(y), max(y))
|
|
1102
|
-
|
|
1103
|
-
disable_tqdm = not show_progress
|
|
1104
|
-
for groundtruth, prediction in tqdm(detections, disable=disable_tqdm):
|
|
1105
|
-
|
|
1106
|
-
if not isinstance(groundtruth, dict) or not isinstance(
|
|
1107
|
-
prediction, dict
|
|
1108
|
-
):
|
|
1109
|
-
raise ValueError(
|
|
1110
|
-
f"Received values with type `{type(groundtruth)}` which are not valid Valor dictionaries."
|
|
1111
|
-
)
|
|
1112
|
-
|
|
1113
|
-
# update metadata
|
|
1114
|
-
self._evaluator.n_datums += 1
|
|
1115
|
-
self._evaluator.n_groundtruths += len(groundtruth["annotations"])
|
|
1116
|
-
self._evaluator.n_predictions += len(prediction["annotations"])
|
|
1117
|
-
|
|
1118
|
-
# update datum uid index
|
|
1119
|
-
uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
|
|
1120
|
-
|
|
1121
|
-
# initialize bounding box examples
|
|
1122
|
-
self._evaluator.groundtruth_examples[uid_index] = np.zeros(
|
|
1123
|
-
(len(groundtruth["annotations"]), 4), dtype=np.float16
|
|
1124
|
-
)
|
|
1125
|
-
self._evaluator.prediction_examples[uid_index] = np.zeros(
|
|
1126
|
-
(len(prediction["annotations"]), 4), dtype=np.float16
|
|
1127
|
-
)
|
|
1128
|
-
|
|
1129
|
-
# cache labels and annotations
|
|
1130
|
-
groundtruths = list()
|
|
1131
|
-
predictions = list()
|
|
1132
|
-
|
|
1133
|
-
for gidx, gann in enumerate(groundtruth["annotations"]):
|
|
1134
|
-
if gann["bounding_box"] is None:
|
|
1135
|
-
raise ValueError(
|
|
1136
|
-
f"Detection `{groundtruth['datum']['uid']}` contains a ground truth without a bounding box."
|
|
1137
|
-
)
|
|
1138
|
-
self._evaluator.groundtruth_examples[uid_index][
|
|
1139
|
-
gidx
|
|
1140
|
-
] = np.array(
|
|
1141
|
-
_get_bbox_extrema(gann["bounding_box"]),
|
|
1142
|
-
)
|
|
1143
|
-
for valor_label in gann["labels"]:
|
|
1144
|
-
if valor_label["key"] != "name":
|
|
1145
|
-
continue
|
|
1146
|
-
glabel = f'{valor_label["key" ]}_{valor_label[ "value" ]}'
|
|
1147
|
-
label_idx = self._add_label(glabel)
|
|
1148
|
-
self.groundtruth_count[label_idx][uid_index] += 1
|
|
1149
|
-
groundtruths.append(
|
|
1150
|
-
(
|
|
1151
|
-
gidx,
|
|
1152
|
-
label_idx,
|
|
1153
|
-
_get_bbox_extrema(gann["bounding_box"]),
|
|
1154
|
-
)
|
|
1155
|
-
)
|
|
1156
|
-
for pidx, pann in enumerate(prediction["annotations"]):
|
|
1157
|
-
if pann["bounding_box"] is None:
|
|
1158
|
-
raise ValueError(
|
|
1159
|
-
f"Detection `{prediction['datum']['uid']}` contains a prediction without a bounding box."
|
|
1160
|
-
)
|
|
1161
|
-
self._evaluator.prediction_examples[uid_index][
|
|
1162
|
-
pidx
|
|
1163
|
-
] = np.array(_get_bbox_extrema(pann["bounding_box"]))
|
|
1164
|
-
for valor_label in pann["labels"]:
|
|
1165
|
-
if valor_label["key"] != "name":
|
|
1166
|
-
continue
|
|
1167
|
-
plabel = valor_label["value"]
|
|
1168
|
-
pscore = valor_label["score"]
|
|
1169
|
-
label_idx = self._add_label(plabel)
|
|
1170
|
-
self.prediction_count[label_idx][uid_index] += 1
|
|
1171
|
-
predictions.append(
|
|
1172
|
-
(
|
|
1173
|
-
pidx,
|
|
1174
|
-
label_idx,
|
|
1175
|
-
pscore,
|
|
1176
|
-
_get_bbox_extrema(pann["bounding_box"]),
|
|
1177
|
-
)
|
|
1178
|
-
)
|
|
1179
|
-
|
|
1180
|
-
self._compute_ious_and_cache_pairs(
|
|
1181
|
-
uid_index=uid_index,
|
|
1182
|
-
groundtruths=groundtruths,
|
|
1183
|
-
predictions=predictions,
|
|
1184
|
-
annotation_type=BoundingBox,
|
|
1185
|
-
)
|
|
1186
|
-
|
|
1187
1108
|
def finalize(self) -> Evaluator:
|
|
1188
1109
|
"""
|
|
1189
1110
|
Performs data finalization and some preprocessing steps.
|