valor-lite 0.33.8__py3-none-any.whl → 0.33.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valor_lite/LICENSE +21 -0
- valor_lite/classification/annotation.py +24 -0
- valor_lite/classification/manager.py +189 -217
- valor_lite/classification/metric.py +266 -27
- valor_lite/{detection → object_detection}/annotation.py +144 -3
- valor_lite/{detection → object_detection}/manager.py +284 -368
- valor_lite/object_detection/metric.py +789 -0
- valor_lite/semantic_segmentation/annotation.py +96 -0
- valor_lite/{segmentation → semantic_segmentation}/manager.py +33 -16
- valor_lite/semantic_segmentation/metric.py +278 -0
- valor_lite/text_generation/__init__.py +0 -0
- valor_lite-0.33.9.dist-info/METADATA +179 -0
- valor_lite-0.33.9.dist-info/RECORD +24 -0
- valor_lite/detection/metric.py +0 -380
- valor_lite/segmentation/annotation.py +0 -49
- valor_lite/segmentation/metric.py +0 -119
- valor_lite-0.33.8.dist-info/METADATA +0 -41
- valor_lite-0.33.8.dist-info/RECORD +0 -22
- /valor_lite/{detection → object_detection}/__init__.py +0 -0
- /valor_lite/{detection → object_detection}/computation.py +0 -0
- /valor_lite/{segmentation → semantic_segmentation}/__init__.py +0 -0
- /valor_lite/{segmentation → semantic_segmentation}/computation.py +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.9.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.9.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.8.dist-info → valor_lite-0.33.9.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,18 @@
|
|
|
1
|
-
import warnings
|
|
2
1
|
from collections import defaultdict
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from typing import Type
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
7
|
-
import valor_lite.
|
|
6
|
+
import valor_lite.object_detection.annotation as annotation
|
|
8
7
|
from numpy.typing import NDArray
|
|
9
8
|
from tqdm import tqdm
|
|
10
|
-
from valor_lite.
|
|
9
|
+
from valor_lite.object_detection.annotation import (
|
|
11
10
|
Bitmask,
|
|
12
11
|
BoundingBox,
|
|
13
12
|
Detection,
|
|
14
13
|
Polygon,
|
|
15
14
|
)
|
|
16
|
-
from valor_lite.
|
|
15
|
+
from valor_lite.object_detection.computation import (
|
|
17
16
|
compute_bbox_iou,
|
|
18
17
|
compute_bitmask_iou,
|
|
19
18
|
compute_confusion_matrix,
|
|
@@ -21,7 +20,7 @@ from valor_lite.detection.computation import (
|
|
|
21
20
|
compute_polygon_iou,
|
|
22
21
|
compute_ranked_pairs,
|
|
23
22
|
)
|
|
24
|
-
from valor_lite.
|
|
23
|
+
from valor_lite.object_detection.metric import (
|
|
25
24
|
AP,
|
|
26
25
|
AR,
|
|
27
26
|
F1,
|
|
@@ -213,229 +212,6 @@ class Evaluator:
|
|
|
213
212
|
label_metadata=label_metadata,
|
|
214
213
|
)
|
|
215
214
|
|
|
216
|
-
def evaluate(
|
|
217
|
-
self,
|
|
218
|
-
metrics_to_return: list[MetricType] = MetricType.base_metrics(),
|
|
219
|
-
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
220
|
-
score_thresholds: list[float] = [0.5],
|
|
221
|
-
number_of_examples: int = 0,
|
|
222
|
-
filter_: Filter | None = None,
|
|
223
|
-
as_dict: bool = False,
|
|
224
|
-
) -> dict[MetricType, list]:
|
|
225
|
-
"""
|
|
226
|
-
Performs an evaluation and returns metrics.
|
|
227
|
-
|
|
228
|
-
Parameters
|
|
229
|
-
----------
|
|
230
|
-
metrics_to_return : list[MetricType]
|
|
231
|
-
A list of metrics to return in the results.
|
|
232
|
-
iou_thresholds : list[float]
|
|
233
|
-
A list of IoU thresholds to compute metrics over.
|
|
234
|
-
score_thresholds : list[float]
|
|
235
|
-
A list of score thresholds to compute metrics over.
|
|
236
|
-
number_of_examples : int, default=0
|
|
237
|
-
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
238
|
-
filter_ : Filter, optional
|
|
239
|
-
An optional filter object.
|
|
240
|
-
as_dict : bool, default=False
|
|
241
|
-
An option to return metrics as dictionaries.
|
|
242
|
-
|
|
243
|
-
Returns
|
|
244
|
-
-------
|
|
245
|
-
dict[MetricType, list]
|
|
246
|
-
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
247
|
-
"""
|
|
248
|
-
|
|
249
|
-
ranked_pairs = self._ranked_pairs
|
|
250
|
-
detailed_pairs = self._detailed_pairs
|
|
251
|
-
label_metadata = self._label_metadata
|
|
252
|
-
if filter_ is not None:
|
|
253
|
-
ranked_pairs = ranked_pairs[filter_.ranked_indices]
|
|
254
|
-
detailed_pairs = detailed_pairs[filter_.detailed_indices]
|
|
255
|
-
label_metadata = filter_.label_metadata
|
|
256
|
-
|
|
257
|
-
(
|
|
258
|
-
(
|
|
259
|
-
average_precision,
|
|
260
|
-
mean_average_precision,
|
|
261
|
-
average_precision_average_over_ious,
|
|
262
|
-
mean_average_precision_average_over_ious,
|
|
263
|
-
),
|
|
264
|
-
(
|
|
265
|
-
average_recall,
|
|
266
|
-
mean_average_recall,
|
|
267
|
-
average_recall_averaged_over_scores,
|
|
268
|
-
mean_average_recall_averaged_over_scores,
|
|
269
|
-
),
|
|
270
|
-
precision_recall,
|
|
271
|
-
pr_curves,
|
|
272
|
-
) = compute_metrics(
|
|
273
|
-
data=ranked_pairs,
|
|
274
|
-
label_metadata=label_metadata,
|
|
275
|
-
iou_thresholds=np.array(iou_thresholds),
|
|
276
|
-
score_thresholds=np.array(score_thresholds),
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
metrics = defaultdict(list)
|
|
280
|
-
|
|
281
|
-
metrics[MetricType.AP] = [
|
|
282
|
-
AP(
|
|
283
|
-
value=average_precision[iou_idx][label_idx],
|
|
284
|
-
iou_threshold=iou_thresholds[iou_idx],
|
|
285
|
-
label=self.index_to_label[label_idx],
|
|
286
|
-
)
|
|
287
|
-
for iou_idx in range(average_precision.shape[0])
|
|
288
|
-
for label_idx in range(average_precision.shape[1])
|
|
289
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
290
|
-
]
|
|
291
|
-
|
|
292
|
-
metrics[MetricType.mAP] = [
|
|
293
|
-
mAP(
|
|
294
|
-
value=mean_average_precision[iou_idx],
|
|
295
|
-
iou_threshold=iou_thresholds[iou_idx],
|
|
296
|
-
)
|
|
297
|
-
for iou_idx in range(mean_average_precision.shape[0])
|
|
298
|
-
]
|
|
299
|
-
|
|
300
|
-
metrics[MetricType.APAveragedOverIOUs] = [
|
|
301
|
-
APAveragedOverIOUs(
|
|
302
|
-
value=average_precision_average_over_ious[label_idx],
|
|
303
|
-
iou_thresholds=iou_thresholds,
|
|
304
|
-
label=self.index_to_label[label_idx],
|
|
305
|
-
)
|
|
306
|
-
for label_idx in range(self.n_labels)
|
|
307
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
308
|
-
]
|
|
309
|
-
|
|
310
|
-
metrics[MetricType.mAPAveragedOverIOUs] = [
|
|
311
|
-
mAPAveragedOverIOUs(
|
|
312
|
-
value=mean_average_precision_average_over_ious,
|
|
313
|
-
iou_thresholds=iou_thresholds,
|
|
314
|
-
)
|
|
315
|
-
]
|
|
316
|
-
|
|
317
|
-
metrics[MetricType.AR] = [
|
|
318
|
-
AR(
|
|
319
|
-
value=average_recall[score_idx][label_idx],
|
|
320
|
-
iou_thresholds=iou_thresholds,
|
|
321
|
-
score_threshold=score_thresholds[score_idx],
|
|
322
|
-
label=self.index_to_label[label_idx],
|
|
323
|
-
)
|
|
324
|
-
for score_idx in range(average_recall.shape[0])
|
|
325
|
-
for label_idx in range(average_recall.shape[1])
|
|
326
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
327
|
-
]
|
|
328
|
-
|
|
329
|
-
metrics[MetricType.mAR] = [
|
|
330
|
-
mAR(
|
|
331
|
-
value=mean_average_recall[score_idx],
|
|
332
|
-
iou_thresholds=iou_thresholds,
|
|
333
|
-
score_threshold=score_thresholds[score_idx],
|
|
334
|
-
)
|
|
335
|
-
for score_idx in range(mean_average_recall.shape[0])
|
|
336
|
-
]
|
|
337
|
-
|
|
338
|
-
metrics[MetricType.ARAveragedOverScores] = [
|
|
339
|
-
ARAveragedOverScores(
|
|
340
|
-
value=average_recall_averaged_over_scores[label_idx],
|
|
341
|
-
score_thresholds=score_thresholds,
|
|
342
|
-
iou_thresholds=iou_thresholds,
|
|
343
|
-
label=self.index_to_label[label_idx],
|
|
344
|
-
)
|
|
345
|
-
for label_idx in range(self.n_labels)
|
|
346
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
347
|
-
]
|
|
348
|
-
|
|
349
|
-
metrics[MetricType.mARAveragedOverScores] = [
|
|
350
|
-
mARAveragedOverScores(
|
|
351
|
-
value=mean_average_recall_averaged_over_scores,
|
|
352
|
-
score_thresholds=score_thresholds,
|
|
353
|
-
iou_thresholds=iou_thresholds,
|
|
354
|
-
)
|
|
355
|
-
]
|
|
356
|
-
|
|
357
|
-
metrics[MetricType.PrecisionRecallCurve] = [
|
|
358
|
-
PrecisionRecallCurve(
|
|
359
|
-
precision=list(pr_curves[iou_idx][label_idx]),
|
|
360
|
-
iou_threshold=iou_threshold,
|
|
361
|
-
label=label,
|
|
362
|
-
)
|
|
363
|
-
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
364
|
-
for label_idx, label in self.index_to_label.items()
|
|
365
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
366
|
-
]
|
|
367
|
-
|
|
368
|
-
for label_idx, label in self.index_to_label.items():
|
|
369
|
-
|
|
370
|
-
if label_metadata[label_idx, 0] == 0:
|
|
371
|
-
continue
|
|
372
|
-
|
|
373
|
-
for score_idx, score_threshold in enumerate(score_thresholds):
|
|
374
|
-
for iou_idx, iou_threshold in enumerate(iou_thresholds):
|
|
375
|
-
|
|
376
|
-
row = precision_recall[iou_idx][score_idx][label_idx]
|
|
377
|
-
kwargs = {
|
|
378
|
-
"label": label,
|
|
379
|
-
"iou_threshold": iou_threshold,
|
|
380
|
-
"score_threshold": score_threshold,
|
|
381
|
-
}
|
|
382
|
-
metrics[MetricType.Counts].append(
|
|
383
|
-
Counts(
|
|
384
|
-
tp=int(row[0]),
|
|
385
|
-
fp=int(row[1]),
|
|
386
|
-
fn=int(row[2]),
|
|
387
|
-
**kwargs,
|
|
388
|
-
)
|
|
389
|
-
)
|
|
390
|
-
|
|
391
|
-
metrics[MetricType.Precision].append(
|
|
392
|
-
Precision(
|
|
393
|
-
value=row[3],
|
|
394
|
-
**kwargs,
|
|
395
|
-
)
|
|
396
|
-
)
|
|
397
|
-
metrics[MetricType.Recall].append(
|
|
398
|
-
Recall(
|
|
399
|
-
value=row[4],
|
|
400
|
-
**kwargs,
|
|
401
|
-
)
|
|
402
|
-
)
|
|
403
|
-
metrics[MetricType.F1].append(
|
|
404
|
-
F1(
|
|
405
|
-
value=row[5],
|
|
406
|
-
**kwargs,
|
|
407
|
-
)
|
|
408
|
-
)
|
|
409
|
-
metrics[MetricType.Accuracy].append(
|
|
410
|
-
Accuracy(
|
|
411
|
-
value=row[6],
|
|
412
|
-
**kwargs,
|
|
413
|
-
)
|
|
414
|
-
)
|
|
415
|
-
|
|
416
|
-
if MetricType.ConfusionMatrix in metrics_to_return:
|
|
417
|
-
metrics[
|
|
418
|
-
MetricType.ConfusionMatrix
|
|
419
|
-
] = self._compute_confusion_matrix(
|
|
420
|
-
data=detailed_pairs,
|
|
421
|
-
label_metadata=label_metadata,
|
|
422
|
-
iou_thresholds=iou_thresholds,
|
|
423
|
-
score_thresholds=score_thresholds,
|
|
424
|
-
number_of_examples=number_of_examples,
|
|
425
|
-
)
|
|
426
|
-
|
|
427
|
-
for metric in set(metrics.keys()):
|
|
428
|
-
if metric not in metrics_to_return:
|
|
429
|
-
del metrics[metric]
|
|
430
|
-
|
|
431
|
-
if as_dict:
|
|
432
|
-
return {
|
|
433
|
-
mtype: [metric.to_dict() for metric in mvalues]
|
|
434
|
-
for mtype, mvalues in metrics.items()
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
return metrics
|
|
438
|
-
|
|
439
215
|
def _convert_example_to_dict(
|
|
440
216
|
self, box: NDArray[np.float16]
|
|
441
217
|
) -> dict[str, float]:
|
|
@@ -443,10 +219,10 @@ class Evaluator:
|
|
|
443
219
|
Converts a cached bounding box example to dictionary format.
|
|
444
220
|
"""
|
|
445
221
|
return {
|
|
446
|
-
"xmin": box[0],
|
|
447
|
-
"xmax": box[1],
|
|
448
|
-
"ymin": box[2],
|
|
449
|
-
"ymax": box[3],
|
|
222
|
+
"xmin": float(box[0]),
|
|
223
|
+
"xmax": float(box[1]),
|
|
224
|
+
"ymin": float(box[2]),
|
|
225
|
+
"ymax": float(box[3]),
|
|
450
226
|
}
|
|
451
227
|
|
|
452
228
|
def _unpack_confusion_matrix(
|
|
@@ -464,7 +240,7 @@ class Evaluator:
|
|
|
464
240
|
| list[
|
|
465
241
|
dict[
|
|
466
242
|
str,
|
|
467
|
-
str |
|
|
243
|
+
str | dict[str, float] | float,
|
|
468
244
|
]
|
|
469
245
|
],
|
|
470
246
|
],
|
|
@@ -684,37 +460,243 @@ class Evaluator:
|
|
|
684
460
|
for gt_label_idx in range(number_of_labels)
|
|
685
461
|
}
|
|
686
462
|
|
|
687
|
-
def
|
|
463
|
+
def compute_precision_recall(
|
|
688
464
|
self,
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
) -> list[ConfusionMatrix]:
|
|
465
|
+
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
466
|
+
score_thresholds: list[float] = [0.5],
|
|
467
|
+
filter_: Filter | None = None,
|
|
468
|
+
as_dict: bool = False,
|
|
469
|
+
) -> dict[MetricType, list]:
|
|
695
470
|
"""
|
|
696
|
-
Computes
|
|
471
|
+
Computes all metrics except for ConfusionMatrix
|
|
697
472
|
|
|
698
473
|
Parameters
|
|
699
474
|
----------
|
|
700
|
-
data : NDArray[np.float64]
|
|
701
|
-
An array containing detailed pairs of detections.
|
|
702
|
-
label_metadata : NDArray[np.int32]
|
|
703
|
-
An array containing label metadata.
|
|
704
475
|
iou_thresholds : list[float]
|
|
705
|
-
|
|
476
|
+
A list of IoU thresholds to compute metrics over.
|
|
706
477
|
score_thresholds : list[float]
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
478
|
+
A list of score thresholds to compute metrics over.
|
|
479
|
+
filter_ : Filter, optional
|
|
480
|
+
An optional filter object.
|
|
481
|
+
as_dict : bool, default=False
|
|
482
|
+
An option to return metrics as dictionaries.
|
|
710
483
|
|
|
711
484
|
Returns
|
|
712
485
|
-------
|
|
713
|
-
|
|
714
|
-
|
|
486
|
+
dict[MetricType, list]
|
|
487
|
+
A dictionary mapping MetricType enumerations to lists of computed metrics.
|
|
715
488
|
"""
|
|
716
489
|
|
|
717
|
-
|
|
490
|
+
ranked_pairs = self._ranked_pairs
|
|
491
|
+
label_metadata = self._label_metadata
|
|
492
|
+
if filter_ is not None:
|
|
493
|
+
ranked_pairs = ranked_pairs[filter_.ranked_indices]
|
|
494
|
+
label_metadata = filter_.label_metadata
|
|
495
|
+
|
|
496
|
+
(
|
|
497
|
+
(
|
|
498
|
+
average_precision,
|
|
499
|
+
mean_average_precision,
|
|
500
|
+
average_precision_average_over_ious,
|
|
501
|
+
mean_average_precision_average_over_ious,
|
|
502
|
+
),
|
|
503
|
+
(
|
|
504
|
+
average_recall,
|
|
505
|
+
mean_average_recall,
|
|
506
|
+
average_recall_averaged_over_scores,
|
|
507
|
+
mean_average_recall_averaged_over_scores,
|
|
508
|
+
),
|
|
509
|
+
precision_recall,
|
|
510
|
+
pr_curves,
|
|
511
|
+
) = compute_metrics(
|
|
512
|
+
data=ranked_pairs,
|
|
513
|
+
label_metadata=label_metadata,
|
|
514
|
+
iou_thresholds=np.array(iou_thresholds),
|
|
515
|
+
score_thresholds=np.array(score_thresholds),
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
metrics = defaultdict(list)
|
|
519
|
+
|
|
520
|
+
metrics[MetricType.AP] = [
|
|
521
|
+
AP(
|
|
522
|
+
value=float(average_precision[iou_idx][label_idx]),
|
|
523
|
+
iou_threshold=iou_thresholds[iou_idx],
|
|
524
|
+
label=self.index_to_label[label_idx],
|
|
525
|
+
)
|
|
526
|
+
for iou_idx in range(average_precision.shape[0])
|
|
527
|
+
for label_idx in range(average_precision.shape[1])
|
|
528
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
529
|
+
]
|
|
530
|
+
|
|
531
|
+
metrics[MetricType.mAP] = [
|
|
532
|
+
mAP(
|
|
533
|
+
value=float(mean_average_precision[iou_idx]),
|
|
534
|
+
iou_threshold=iou_thresholds[iou_idx],
|
|
535
|
+
)
|
|
536
|
+
for iou_idx in range(mean_average_precision.shape[0])
|
|
537
|
+
]
|
|
538
|
+
|
|
539
|
+
metrics[MetricType.APAveragedOverIOUs] = [
|
|
540
|
+
APAveragedOverIOUs(
|
|
541
|
+
value=float(average_precision_average_over_ious[label_idx]),
|
|
542
|
+
iou_thresholds=iou_thresholds,
|
|
543
|
+
label=self.index_to_label[label_idx],
|
|
544
|
+
)
|
|
545
|
+
for label_idx in range(self.n_labels)
|
|
546
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
547
|
+
]
|
|
548
|
+
|
|
549
|
+
metrics[MetricType.mAPAveragedOverIOUs] = [
|
|
550
|
+
mAPAveragedOverIOUs(
|
|
551
|
+
value=float(mean_average_precision_average_over_ious),
|
|
552
|
+
iou_thresholds=iou_thresholds,
|
|
553
|
+
)
|
|
554
|
+
]
|
|
555
|
+
|
|
556
|
+
metrics[MetricType.AR] = [
|
|
557
|
+
AR(
|
|
558
|
+
value=float(average_recall[score_idx][label_idx]),
|
|
559
|
+
iou_thresholds=iou_thresholds,
|
|
560
|
+
score_threshold=score_thresholds[score_idx],
|
|
561
|
+
label=self.index_to_label[label_idx],
|
|
562
|
+
)
|
|
563
|
+
for score_idx in range(average_recall.shape[0])
|
|
564
|
+
for label_idx in range(average_recall.shape[1])
|
|
565
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
566
|
+
]
|
|
567
|
+
|
|
568
|
+
metrics[MetricType.mAR] = [
|
|
569
|
+
mAR(
|
|
570
|
+
value=float(mean_average_recall[score_idx]),
|
|
571
|
+
iou_thresholds=iou_thresholds,
|
|
572
|
+
score_threshold=score_thresholds[score_idx],
|
|
573
|
+
)
|
|
574
|
+
for score_idx in range(mean_average_recall.shape[0])
|
|
575
|
+
]
|
|
576
|
+
|
|
577
|
+
metrics[MetricType.ARAveragedOverScores] = [
|
|
578
|
+
ARAveragedOverScores(
|
|
579
|
+
value=float(average_recall_averaged_over_scores[label_idx]),
|
|
580
|
+
score_thresholds=score_thresholds,
|
|
581
|
+
iou_thresholds=iou_thresholds,
|
|
582
|
+
label=self.index_to_label[label_idx],
|
|
583
|
+
)
|
|
584
|
+
for label_idx in range(self.n_labels)
|
|
585
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
586
|
+
]
|
|
587
|
+
|
|
588
|
+
metrics[MetricType.mARAveragedOverScores] = [
|
|
589
|
+
mARAveragedOverScores(
|
|
590
|
+
value=float(mean_average_recall_averaged_over_scores),
|
|
591
|
+
score_thresholds=score_thresholds,
|
|
592
|
+
iou_thresholds=iou_thresholds,
|
|
593
|
+
)
|
|
594
|
+
]
|
|
595
|
+
|
|
596
|
+
metrics[MetricType.PrecisionRecallCurve] = [
|
|
597
|
+
PrecisionRecallCurve(
|
|
598
|
+
precision=pr_curves[iou_idx][label_idx].astype(float).tolist(),
|
|
599
|
+
iou_threshold=iou_threshold,
|
|
600
|
+
label=label,
|
|
601
|
+
)
|
|
602
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
603
|
+
for label_idx, label in self.index_to_label.items()
|
|
604
|
+
if int(label_metadata[label_idx, 0]) > 0
|
|
605
|
+
]
|
|
606
|
+
|
|
607
|
+
for label_idx, label in self.index_to_label.items():
|
|
608
|
+
|
|
609
|
+
if label_metadata[label_idx, 0] == 0:
|
|
610
|
+
continue
|
|
611
|
+
|
|
612
|
+
for score_idx, score_threshold in enumerate(score_thresholds):
|
|
613
|
+
for iou_idx, iou_threshold in enumerate(iou_thresholds):
|
|
614
|
+
|
|
615
|
+
row = precision_recall[iou_idx][score_idx][label_idx]
|
|
616
|
+
kwargs = {
|
|
617
|
+
"label": label,
|
|
618
|
+
"iou_threshold": iou_threshold,
|
|
619
|
+
"score_threshold": score_threshold,
|
|
620
|
+
}
|
|
621
|
+
metrics[MetricType.Counts].append(
|
|
622
|
+
Counts(
|
|
623
|
+
tp=int(row[0]),
|
|
624
|
+
fp=int(row[1]),
|
|
625
|
+
fn=int(row[2]),
|
|
626
|
+
**kwargs,
|
|
627
|
+
)
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
metrics[MetricType.Precision].append(
|
|
631
|
+
Precision(
|
|
632
|
+
value=float(row[3]),
|
|
633
|
+
**kwargs,
|
|
634
|
+
)
|
|
635
|
+
)
|
|
636
|
+
metrics[MetricType.Recall].append(
|
|
637
|
+
Recall(
|
|
638
|
+
value=float(row[4]),
|
|
639
|
+
**kwargs,
|
|
640
|
+
)
|
|
641
|
+
)
|
|
642
|
+
metrics[MetricType.F1].append(
|
|
643
|
+
F1(
|
|
644
|
+
value=float(row[5]),
|
|
645
|
+
**kwargs,
|
|
646
|
+
)
|
|
647
|
+
)
|
|
648
|
+
metrics[MetricType.Accuracy].append(
|
|
649
|
+
Accuracy(
|
|
650
|
+
value=float(row[6]),
|
|
651
|
+
**kwargs,
|
|
652
|
+
)
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
if as_dict:
|
|
656
|
+
return {
|
|
657
|
+
mtype: [metric.to_dict() for metric in mvalues]
|
|
658
|
+
for mtype, mvalues in metrics.items()
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
return metrics
|
|
662
|
+
|
|
663
|
+
def compute_confusion_matrix(
|
|
664
|
+
self,
|
|
665
|
+
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
666
|
+
score_thresholds: list[float] = [0.5],
|
|
667
|
+
number_of_examples: int = 0,
|
|
668
|
+
filter_: Filter | None = None,
|
|
669
|
+
as_dict: bool = False,
|
|
670
|
+
) -> list:
|
|
671
|
+
"""
|
|
672
|
+
Computes confusion matrices at various thresholds.
|
|
673
|
+
|
|
674
|
+
Parameters
|
|
675
|
+
----------
|
|
676
|
+
iou_thresholds : list[float]
|
|
677
|
+
A list of IoU thresholds to compute metrics over.
|
|
678
|
+
score_thresholds : list[float]
|
|
679
|
+
A list of score thresholds to compute metrics over.
|
|
680
|
+
number_of_examples : int, default=0
|
|
681
|
+
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
682
|
+
filter_ : Filter, optional
|
|
683
|
+
An optional filter object.
|
|
684
|
+
as_dict : bool, default=False
|
|
685
|
+
An option to return metrics as dictionaries.
|
|
686
|
+
|
|
687
|
+
Returns
|
|
688
|
+
-------
|
|
689
|
+
list[ConfusionMatrix] | list[dict]
|
|
690
|
+
List of confusion matrices per threshold pair.
|
|
691
|
+
"""
|
|
692
|
+
|
|
693
|
+
detailed_pairs = self._detailed_pairs
|
|
694
|
+
label_metadata = self._label_metadata
|
|
695
|
+
if filter_ is not None:
|
|
696
|
+
detailed_pairs = detailed_pairs[filter_.detailed_indices]
|
|
697
|
+
label_metadata = filter_.label_metadata
|
|
698
|
+
|
|
699
|
+
if detailed_pairs.size == 0:
|
|
718
700
|
return list()
|
|
719
701
|
|
|
720
702
|
(
|
|
@@ -722,7 +704,7 @@ class Evaluator:
|
|
|
722
704
|
hallucinations,
|
|
723
705
|
missing_predictions,
|
|
724
706
|
) = compute_confusion_matrix(
|
|
725
|
-
data=
|
|
707
|
+
data=detailed_pairs,
|
|
726
708
|
label_metadata=label_metadata,
|
|
727
709
|
iou_thresholds=np.array(iou_thresholds),
|
|
728
710
|
score_thresholds=np.array(score_thresholds),
|
|
@@ -730,7 +712,7 @@ class Evaluator:
|
|
|
730
712
|
)
|
|
731
713
|
|
|
732
714
|
n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
|
|
733
|
-
|
|
715
|
+
matrices = [
|
|
734
716
|
ConfusionMatrix(
|
|
735
717
|
iou_threshold=iou_thresholds[iou_idx],
|
|
736
718
|
score_threshold=score_thresholds[score_idx],
|
|
@@ -759,6 +741,54 @@ class Evaluator:
|
|
|
759
741
|
for score_idx in range(n_scores)
|
|
760
742
|
]
|
|
761
743
|
|
|
744
|
+
if as_dict:
|
|
745
|
+
return [m.to_dict() for m in matrices]
|
|
746
|
+
return matrices
|
|
747
|
+
|
|
748
|
+
def evaluate(
|
|
749
|
+
self,
|
|
750
|
+
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
751
|
+
score_thresholds: list[float] = [0.5],
|
|
752
|
+
number_of_examples: int = 0,
|
|
753
|
+
filter_: Filter | None = None,
|
|
754
|
+
as_dict: bool = False,
|
|
755
|
+
) -> dict[MetricType, list]:
|
|
756
|
+
"""
|
|
757
|
+
Computes all avaiable metrics.
|
|
758
|
+
|
|
759
|
+
Parameters
|
|
760
|
+
----------
|
|
761
|
+
iou_thresholds : list[float]
|
|
762
|
+
A list of IoU thresholds to compute metrics over.
|
|
763
|
+
score_thresholds : list[float]
|
|
764
|
+
A list of score thresholds to compute metrics over.
|
|
765
|
+
number_of_examples : int, default=0
|
|
766
|
+
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
767
|
+
filter_ : Filter, optional
|
|
768
|
+
An optional filter object.
|
|
769
|
+
as_dict : bool, default=False
|
|
770
|
+
An option to return metrics as dictionaries.
|
|
771
|
+
|
|
772
|
+
Returns
|
|
773
|
+
-------
|
|
774
|
+
dict[MetricType, list]
|
|
775
|
+
A dictionary mapping metric type to a list of metrics.
|
|
776
|
+
"""
|
|
777
|
+
results = self.compute_precision_recall(
|
|
778
|
+
iou_thresholds=iou_thresholds,
|
|
779
|
+
score_thresholds=score_thresholds,
|
|
780
|
+
filter_=filter_,
|
|
781
|
+
as_dict=as_dict,
|
|
782
|
+
)
|
|
783
|
+
results[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
|
|
784
|
+
iou_thresholds=iou_thresholds,
|
|
785
|
+
score_thresholds=score_thresholds,
|
|
786
|
+
number_of_examples=number_of_examples,
|
|
787
|
+
filter_=filter_,
|
|
788
|
+
as_dict=as_dict,
|
|
789
|
+
)
|
|
790
|
+
return results
|
|
791
|
+
|
|
762
792
|
|
|
763
793
|
class DataLoader:
|
|
764
794
|
"""
|
|
@@ -1070,120 +1100,6 @@ class DataLoader:
|
|
|
1070
1100
|
annotation_type=Bitmask,
|
|
1071
1101
|
)
|
|
1072
1102
|
|
|
1073
|
-
def add_bounding_boxes_from_valor_dict(
|
|
1074
|
-
self,
|
|
1075
|
-
detections: list[tuple[dict, dict]],
|
|
1076
|
-
show_progress: bool = False,
|
|
1077
|
-
):
|
|
1078
|
-
"""
|
|
1079
|
-
Adds Valor-format detections to the cache.
|
|
1080
|
-
|
|
1081
|
-
Parameters
|
|
1082
|
-
----------
|
|
1083
|
-
detections : list[tuple[dict, dict]]
|
|
1084
|
-
A list of groundtruth, prediction pairs in Valor-format dictionaries.
|
|
1085
|
-
annotation_type : type[Bitmask] | type[BoundingBox] | type[Polygon]
|
|
1086
|
-
The annotation type to process.
|
|
1087
|
-
show_progress : bool, default=False
|
|
1088
|
-
Toggle for tqdm progress bar.
|
|
1089
|
-
"""
|
|
1090
|
-
warnings.warn(
|
|
1091
|
-
"The `...from_valor_dict` functions are deprecated and will be deleted in the near future. Use `add_bounding_boxes`, `add_bitmasks`, or `add_polygons` instead.",
|
|
1092
|
-
DeprecationWarning,
|
|
1093
|
-
)
|
|
1094
|
-
|
|
1095
|
-
def _get_bbox_extrema(
|
|
1096
|
-
data: list,
|
|
1097
|
-
) -> tuple[float, float, float, float]:
|
|
1098
|
-
"""Get the correct representation of an annotation object from a valor dictionary."""
|
|
1099
|
-
x = [point[0] for shape in data for point in shape]
|
|
1100
|
-
y = [point[1] for shape in data for point in shape]
|
|
1101
|
-
return (min(x), max(x), min(y), max(y))
|
|
1102
|
-
|
|
1103
|
-
disable_tqdm = not show_progress
|
|
1104
|
-
for groundtruth, prediction in tqdm(detections, disable=disable_tqdm):
|
|
1105
|
-
|
|
1106
|
-
if not isinstance(groundtruth, dict) or not isinstance(
|
|
1107
|
-
prediction, dict
|
|
1108
|
-
):
|
|
1109
|
-
raise ValueError(
|
|
1110
|
-
f"Received values with type `{type(groundtruth)}` which are not valid Valor dictionaries."
|
|
1111
|
-
)
|
|
1112
|
-
|
|
1113
|
-
# update metadata
|
|
1114
|
-
self._evaluator.n_datums += 1
|
|
1115
|
-
self._evaluator.n_groundtruths += len(groundtruth["annotations"])
|
|
1116
|
-
self._evaluator.n_predictions += len(prediction["annotations"])
|
|
1117
|
-
|
|
1118
|
-
# update datum uid index
|
|
1119
|
-
uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
|
|
1120
|
-
|
|
1121
|
-
# initialize bounding box examples
|
|
1122
|
-
self._evaluator.groundtruth_examples[uid_index] = np.zeros(
|
|
1123
|
-
(len(groundtruth["annotations"]), 4), dtype=np.float16
|
|
1124
|
-
)
|
|
1125
|
-
self._evaluator.prediction_examples[uid_index] = np.zeros(
|
|
1126
|
-
(len(prediction["annotations"]), 4), dtype=np.float16
|
|
1127
|
-
)
|
|
1128
|
-
|
|
1129
|
-
# cache labels and annotations
|
|
1130
|
-
groundtruths = list()
|
|
1131
|
-
predictions = list()
|
|
1132
|
-
|
|
1133
|
-
for gidx, gann in enumerate(groundtruth["annotations"]):
|
|
1134
|
-
if gann["bounding_box"] is None:
|
|
1135
|
-
raise ValueError(
|
|
1136
|
-
f"Detection `{groundtruth['datum']['uid']}` contains a ground truth without a bounding box."
|
|
1137
|
-
)
|
|
1138
|
-
self._evaluator.groundtruth_examples[uid_index][
|
|
1139
|
-
gidx
|
|
1140
|
-
] = np.array(
|
|
1141
|
-
_get_bbox_extrema(gann["bounding_box"]),
|
|
1142
|
-
)
|
|
1143
|
-
for valor_label in gann["labels"]:
|
|
1144
|
-
if valor_label["key"] != "name":
|
|
1145
|
-
continue
|
|
1146
|
-
glabel = f'{valor_label["key" ]}_{valor_label[ "value" ]}'
|
|
1147
|
-
label_idx = self._add_label(glabel)
|
|
1148
|
-
self.groundtruth_count[label_idx][uid_index] += 1
|
|
1149
|
-
groundtruths.append(
|
|
1150
|
-
(
|
|
1151
|
-
gidx,
|
|
1152
|
-
label_idx,
|
|
1153
|
-
_get_bbox_extrema(gann["bounding_box"]),
|
|
1154
|
-
)
|
|
1155
|
-
)
|
|
1156
|
-
for pidx, pann in enumerate(prediction["annotations"]):
|
|
1157
|
-
if pann["bounding_box"] is None:
|
|
1158
|
-
raise ValueError(
|
|
1159
|
-
f"Detection `{prediction['datum']['uid']}` contains a prediction without a bounding box."
|
|
1160
|
-
)
|
|
1161
|
-
self._evaluator.prediction_examples[uid_index][
|
|
1162
|
-
pidx
|
|
1163
|
-
] = np.array(_get_bbox_extrema(pann["bounding_box"]))
|
|
1164
|
-
for valor_label in pann["labels"]:
|
|
1165
|
-
if valor_label["key"] != "name":
|
|
1166
|
-
continue
|
|
1167
|
-
plabel = valor_label["value"]
|
|
1168
|
-
pscore = valor_label["score"]
|
|
1169
|
-
label_idx = self._add_label(plabel)
|
|
1170
|
-
self.prediction_count[label_idx][uid_index] += 1
|
|
1171
|
-
predictions.append(
|
|
1172
|
-
(
|
|
1173
|
-
pidx,
|
|
1174
|
-
label_idx,
|
|
1175
|
-
pscore,
|
|
1176
|
-
_get_bbox_extrema(pann["bounding_box"]),
|
|
1177
|
-
)
|
|
1178
|
-
)
|
|
1179
|
-
|
|
1180
|
-
self._compute_ious_and_cache_pairs(
|
|
1181
|
-
uid_index=uid_index,
|
|
1182
|
-
groundtruths=groundtruths,
|
|
1183
|
-
predictions=predictions,
|
|
1184
|
-
annotation_type=BoundingBox,
|
|
1185
|
-
)
|
|
1186
|
-
|
|
1187
1103
|
def finalize(self) -> Evaluator:
|
|
1188
1104
|
"""
|
|
1189
1105
|
Performs data finalization and some preprocessing steps.
|