valor-lite 0.33.13__py3-none-any.whl → 0.33.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valor_lite/classification/__init__.py +8 -21
- valor_lite/classification/computation.py +2 -2
- valor_lite/classification/manager.py +32 -244
- valor_lite/classification/metric.py +331 -372
- valor_lite/classification/utilities.py +222 -0
- valor_lite/object_detection/__init__.py +4 -35
- valor_lite/object_detection/computation.py +25 -22
- valor_lite/object_detection/manager.py +38 -497
- valor_lite/object_detection/metric.py +633 -706
- valor_lite/object_detection/utilities.py +505 -0
- valor_lite/schemas.py +10 -8
- valor_lite/semantic_segmentation/__init__.py +2 -17
- valor_lite/semantic_segmentation/computation.py +1 -1
- valor_lite/semantic_segmentation/manager.py +13 -116
- valor_lite/semantic_segmentation/metric.py +216 -239
- valor_lite/semantic_segmentation/utilities.py +104 -0
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.15.dist-info}/METADATA +1 -1
- valor_lite-0.33.15.dist-info/RECORD +27 -0
- valor_lite-0.33.13.dist-info/RECORD +0 -24
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.15.dist-info}/LICENSE +0 -0
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.15.dist-info}/WHEEL +0 -0
- {valor_lite-0.33.13.dist-info → valor_lite-0.33.15.dist-info}/top_level.txt +0 -0
|
@@ -16,27 +16,14 @@ from valor_lite.object_detection.computation import (
|
|
|
16
16
|
compute_bbox_iou,
|
|
17
17
|
compute_bitmask_iou,
|
|
18
18
|
compute_confusion_matrix,
|
|
19
|
-
compute_metrics,
|
|
20
19
|
compute_polygon_iou,
|
|
20
|
+
compute_precion_recall,
|
|
21
21
|
compute_ranked_pairs,
|
|
22
22
|
)
|
|
23
|
-
from valor_lite.object_detection.metric import
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
Accuracy,
|
|
28
|
-
APAveragedOverIOUs,
|
|
29
|
-
ARAveragedOverScores,
|
|
30
|
-
ConfusionMatrix,
|
|
31
|
-
Counts,
|
|
32
|
-
MetricType,
|
|
33
|
-
Precision,
|
|
34
|
-
PrecisionRecallCurve,
|
|
35
|
-
Recall,
|
|
36
|
-
mAP,
|
|
37
|
-
mAPAveragedOverIOUs,
|
|
38
|
-
mAR,
|
|
39
|
-
mARAveragedOverScores,
|
|
23
|
+
from valor_lite.object_detection.metric import Metric, MetricType
|
|
24
|
+
from valor_lite.object_detection.utilities import (
|
|
25
|
+
unpack_confusion_matrix_into_metric_list,
|
|
26
|
+
unpack_precision_recall_into_metric_lists,
|
|
40
27
|
)
|
|
41
28
|
|
|
42
29
|
"""
|
|
@@ -212,274 +199,23 @@ class Evaluator:
|
|
|
212
199
|
label_metadata=label_metadata,
|
|
213
200
|
)
|
|
214
201
|
|
|
215
|
-
def _convert_example_to_dict(
|
|
216
|
-
self, box: NDArray[np.float16]
|
|
217
|
-
) -> dict[str, float]:
|
|
218
|
-
"""
|
|
219
|
-
Converts a cached bounding box example to dictionary format.
|
|
220
|
-
"""
|
|
221
|
-
return {
|
|
222
|
-
"xmin": float(box[0]),
|
|
223
|
-
"xmax": float(box[1]),
|
|
224
|
-
"ymin": float(box[2]),
|
|
225
|
-
"ymax": float(box[3]),
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
def _unpack_confusion_matrix(
|
|
229
|
-
self,
|
|
230
|
-
confusion_matrix: NDArray[np.float64],
|
|
231
|
-
number_of_labels: int,
|
|
232
|
-
number_of_examples: int,
|
|
233
|
-
) -> dict[
|
|
234
|
-
str,
|
|
235
|
-
dict[
|
|
236
|
-
str,
|
|
237
|
-
dict[
|
|
238
|
-
str,
|
|
239
|
-
int
|
|
240
|
-
| list[
|
|
241
|
-
dict[
|
|
242
|
-
str,
|
|
243
|
-
str | dict[str, float] | float,
|
|
244
|
-
]
|
|
245
|
-
],
|
|
246
|
-
],
|
|
247
|
-
],
|
|
248
|
-
]:
|
|
249
|
-
"""
|
|
250
|
-
Unpacks a numpy array of confusion matrix counts and examples.
|
|
251
|
-
"""
|
|
252
|
-
|
|
253
|
-
datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
254
|
-
confusion_matrix[
|
|
255
|
-
gt_label_idx,
|
|
256
|
-
pd_label_idx,
|
|
257
|
-
example_idx * 4 + 1,
|
|
258
|
-
]
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
groundtruth_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
262
|
-
confusion_matrix[
|
|
263
|
-
gt_label_idx,
|
|
264
|
-
pd_label_idx,
|
|
265
|
-
example_idx * 4 + 2,
|
|
266
|
-
]
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
prediction_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
270
|
-
confusion_matrix[
|
|
271
|
-
gt_label_idx,
|
|
272
|
-
pd_label_idx,
|
|
273
|
-
example_idx * 4 + 3,
|
|
274
|
-
]
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
278
|
-
confusion_matrix[
|
|
279
|
-
gt_label_idx,
|
|
280
|
-
pd_label_idx,
|
|
281
|
-
example_idx * 4 + 4,
|
|
282
|
-
]
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
return {
|
|
286
|
-
self.index_to_label[gt_label_idx]: {
|
|
287
|
-
self.index_to_label[pd_label_idx]: {
|
|
288
|
-
"count": max(
|
|
289
|
-
int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
|
|
290
|
-
0,
|
|
291
|
-
),
|
|
292
|
-
"examples": [
|
|
293
|
-
{
|
|
294
|
-
"datum": self.index_to_uid[
|
|
295
|
-
datum_idx(
|
|
296
|
-
gt_label_idx, pd_label_idx, example_idx
|
|
297
|
-
)
|
|
298
|
-
],
|
|
299
|
-
"groundtruth": self._convert_example_to_dict(
|
|
300
|
-
self.groundtruth_examples[
|
|
301
|
-
datum_idx(
|
|
302
|
-
gt_label_idx,
|
|
303
|
-
pd_label_idx,
|
|
304
|
-
example_idx,
|
|
305
|
-
)
|
|
306
|
-
][
|
|
307
|
-
groundtruth_idx(
|
|
308
|
-
gt_label_idx,
|
|
309
|
-
pd_label_idx,
|
|
310
|
-
example_idx,
|
|
311
|
-
)
|
|
312
|
-
]
|
|
313
|
-
),
|
|
314
|
-
"prediction": self._convert_example_to_dict(
|
|
315
|
-
self.prediction_examples[
|
|
316
|
-
datum_idx(
|
|
317
|
-
gt_label_idx,
|
|
318
|
-
pd_label_idx,
|
|
319
|
-
example_idx,
|
|
320
|
-
)
|
|
321
|
-
][
|
|
322
|
-
prediction_idx(
|
|
323
|
-
gt_label_idx,
|
|
324
|
-
pd_label_idx,
|
|
325
|
-
example_idx,
|
|
326
|
-
)
|
|
327
|
-
]
|
|
328
|
-
),
|
|
329
|
-
"score": score_idx(
|
|
330
|
-
gt_label_idx, pd_label_idx, example_idx
|
|
331
|
-
),
|
|
332
|
-
}
|
|
333
|
-
for example_idx in range(number_of_examples)
|
|
334
|
-
if datum_idx(gt_label_idx, pd_label_idx, example_idx)
|
|
335
|
-
>= 0
|
|
336
|
-
],
|
|
337
|
-
}
|
|
338
|
-
for pd_label_idx in range(number_of_labels)
|
|
339
|
-
}
|
|
340
|
-
for gt_label_idx in range(number_of_labels)
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
def _unpack_hallucinations(
|
|
344
|
-
self,
|
|
345
|
-
hallucinations: NDArray[np.float64],
|
|
346
|
-
number_of_labels: int,
|
|
347
|
-
number_of_examples: int,
|
|
348
|
-
) -> dict[
|
|
349
|
-
str,
|
|
350
|
-
dict[
|
|
351
|
-
str,
|
|
352
|
-
int | list[dict[str, str | float | dict[str, float]]],
|
|
353
|
-
],
|
|
354
|
-
]:
|
|
355
|
-
"""
|
|
356
|
-
Unpacks a numpy array of hallucination counts and examples.
|
|
357
|
-
"""
|
|
358
|
-
|
|
359
|
-
datum_idx = (
|
|
360
|
-
lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
361
|
-
hallucinations[
|
|
362
|
-
pd_label_idx,
|
|
363
|
-
example_idx * 3 + 1,
|
|
364
|
-
]
|
|
365
|
-
)
|
|
366
|
-
)
|
|
367
|
-
|
|
368
|
-
prediction_idx = (
|
|
369
|
-
lambda pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
370
|
-
hallucinations[
|
|
371
|
-
pd_label_idx,
|
|
372
|
-
example_idx * 3 + 2,
|
|
373
|
-
]
|
|
374
|
-
)
|
|
375
|
-
)
|
|
376
|
-
|
|
377
|
-
score_idx = (
|
|
378
|
-
lambda pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
|
|
379
|
-
hallucinations[
|
|
380
|
-
pd_label_idx,
|
|
381
|
-
example_idx * 3 + 3,
|
|
382
|
-
]
|
|
383
|
-
)
|
|
384
|
-
)
|
|
385
|
-
|
|
386
|
-
return {
|
|
387
|
-
self.index_to_label[pd_label_idx]: {
|
|
388
|
-
"count": max(
|
|
389
|
-
int(hallucinations[pd_label_idx, 0]),
|
|
390
|
-
0,
|
|
391
|
-
),
|
|
392
|
-
"examples": [
|
|
393
|
-
{
|
|
394
|
-
"datum": self.index_to_uid[
|
|
395
|
-
datum_idx(pd_label_idx, example_idx)
|
|
396
|
-
],
|
|
397
|
-
"prediction": self._convert_example_to_dict(
|
|
398
|
-
self.prediction_examples[
|
|
399
|
-
datum_idx(pd_label_idx, example_idx)
|
|
400
|
-
][prediction_idx(pd_label_idx, example_idx)]
|
|
401
|
-
),
|
|
402
|
-
"score": score_idx(pd_label_idx, example_idx),
|
|
403
|
-
}
|
|
404
|
-
for example_idx in range(number_of_examples)
|
|
405
|
-
if datum_idx(pd_label_idx, example_idx) >= 0
|
|
406
|
-
],
|
|
407
|
-
}
|
|
408
|
-
for pd_label_idx in range(number_of_labels)
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
def _unpack_missing_predictions(
|
|
412
|
-
self,
|
|
413
|
-
missing_predictions: NDArray[np.int32],
|
|
414
|
-
number_of_labels: int,
|
|
415
|
-
number_of_examples: int,
|
|
416
|
-
) -> dict[str, dict[str, int | list[dict[str, str | dict[str, float]]]]]:
|
|
417
|
-
"""
|
|
418
|
-
Unpacks a numpy array of missing prediction counts and examples.
|
|
419
|
-
"""
|
|
420
|
-
|
|
421
|
-
datum_idx = (
|
|
422
|
-
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
423
|
-
missing_predictions[
|
|
424
|
-
gt_label_idx,
|
|
425
|
-
example_idx * 2 + 1,
|
|
426
|
-
]
|
|
427
|
-
)
|
|
428
|
-
)
|
|
429
|
-
|
|
430
|
-
groundtruth_idx = (
|
|
431
|
-
lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
|
|
432
|
-
missing_predictions[
|
|
433
|
-
gt_label_idx,
|
|
434
|
-
example_idx * 2 + 2,
|
|
435
|
-
]
|
|
436
|
-
)
|
|
437
|
-
)
|
|
438
|
-
|
|
439
|
-
return {
|
|
440
|
-
self.index_to_label[gt_label_idx]: {
|
|
441
|
-
"count": max(
|
|
442
|
-
int(missing_predictions[gt_label_idx, 0]),
|
|
443
|
-
0,
|
|
444
|
-
),
|
|
445
|
-
"examples": [
|
|
446
|
-
{
|
|
447
|
-
"datum": self.index_to_uid[
|
|
448
|
-
datum_idx(gt_label_idx, example_idx)
|
|
449
|
-
],
|
|
450
|
-
"groundtruth": self._convert_example_to_dict(
|
|
451
|
-
self.groundtruth_examples[
|
|
452
|
-
datum_idx(gt_label_idx, example_idx)
|
|
453
|
-
][groundtruth_idx(gt_label_idx, example_idx)]
|
|
454
|
-
),
|
|
455
|
-
}
|
|
456
|
-
for example_idx in range(number_of_examples)
|
|
457
|
-
if datum_idx(gt_label_idx, example_idx) >= 0
|
|
458
|
-
],
|
|
459
|
-
}
|
|
460
|
-
for gt_label_idx in range(number_of_labels)
|
|
461
|
-
}
|
|
462
|
-
|
|
463
202
|
def compute_precision_recall(
|
|
464
203
|
self,
|
|
465
204
|
iou_thresholds: list[float] = [0.5, 0.75, 0.9],
|
|
466
205
|
score_thresholds: list[float] = [0.5],
|
|
467
206
|
filter_: Filter | None = None,
|
|
468
|
-
|
|
469
|
-
) -> dict[MetricType, list]:
|
|
207
|
+
) -> dict[MetricType, list[Metric]]:
|
|
470
208
|
"""
|
|
471
209
|
Computes all metrics except for ConfusionMatrix
|
|
472
210
|
|
|
473
211
|
Parameters
|
|
474
212
|
----------
|
|
475
213
|
iou_thresholds : list[float]
|
|
476
|
-
A list of
|
|
214
|
+
A list of IOU thresholds to compute metrics over.
|
|
477
215
|
score_thresholds : list[float]
|
|
478
216
|
A list of score thresholds to compute metrics over.
|
|
479
217
|
filter_ : Filter, optional
|
|
480
218
|
An optional filter object.
|
|
481
|
-
as_dict : bool, default=False
|
|
482
|
-
An option to return metrics as dictionaries.
|
|
483
219
|
|
|
484
220
|
Returns
|
|
485
221
|
-------
|
|
@@ -493,182 +229,20 @@ class Evaluator:
|
|
|
493
229
|
ranked_pairs = ranked_pairs[filter_.ranked_indices]
|
|
494
230
|
label_metadata = filter_.label_metadata
|
|
495
231
|
|
|
496
|
-
(
|
|
497
|
-
(
|
|
498
|
-
average_precision,
|
|
499
|
-
mean_average_precision,
|
|
500
|
-
average_precision_average_over_ious,
|
|
501
|
-
mean_average_precision_average_over_ious,
|
|
502
|
-
),
|
|
503
|
-
(
|
|
504
|
-
average_recall,
|
|
505
|
-
mean_average_recall,
|
|
506
|
-
average_recall_averaged_over_scores,
|
|
507
|
-
mean_average_recall_averaged_over_scores,
|
|
508
|
-
),
|
|
509
|
-
accuracy,
|
|
510
|
-
precision_recall,
|
|
511
|
-
pr_curves,
|
|
512
|
-
) = compute_metrics(
|
|
232
|
+
results = compute_precion_recall(
|
|
513
233
|
data=ranked_pairs,
|
|
514
234
|
label_metadata=label_metadata,
|
|
515
235
|
iou_thresholds=np.array(iou_thresholds),
|
|
516
236
|
score_thresholds=np.array(score_thresholds),
|
|
517
237
|
)
|
|
518
238
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
)
|
|
527
|
-
for iou_idx in range(average_precision.shape[0])
|
|
528
|
-
for label_idx in range(average_precision.shape[1])
|
|
529
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
530
|
-
]
|
|
531
|
-
|
|
532
|
-
metrics[MetricType.mAP] = [
|
|
533
|
-
mAP(
|
|
534
|
-
value=float(mean_average_precision[iou_idx]),
|
|
535
|
-
iou_threshold=iou_thresholds[iou_idx],
|
|
536
|
-
)
|
|
537
|
-
for iou_idx in range(mean_average_precision.shape[0])
|
|
538
|
-
]
|
|
539
|
-
|
|
540
|
-
metrics[MetricType.APAveragedOverIOUs] = [
|
|
541
|
-
APAveragedOverIOUs(
|
|
542
|
-
value=float(average_precision_average_over_ious[label_idx]),
|
|
543
|
-
iou_thresholds=iou_thresholds,
|
|
544
|
-
label=self.index_to_label[label_idx],
|
|
545
|
-
)
|
|
546
|
-
for label_idx in range(self.n_labels)
|
|
547
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
548
|
-
]
|
|
549
|
-
|
|
550
|
-
metrics[MetricType.mAPAveragedOverIOUs] = [
|
|
551
|
-
mAPAveragedOverIOUs(
|
|
552
|
-
value=float(mean_average_precision_average_over_ious),
|
|
553
|
-
iou_thresholds=iou_thresholds,
|
|
554
|
-
)
|
|
555
|
-
]
|
|
556
|
-
|
|
557
|
-
metrics[MetricType.AR] = [
|
|
558
|
-
AR(
|
|
559
|
-
value=float(average_recall[score_idx][label_idx]),
|
|
560
|
-
iou_thresholds=iou_thresholds,
|
|
561
|
-
score_threshold=score_thresholds[score_idx],
|
|
562
|
-
label=self.index_to_label[label_idx],
|
|
563
|
-
)
|
|
564
|
-
for score_idx in range(average_recall.shape[0])
|
|
565
|
-
for label_idx in range(average_recall.shape[1])
|
|
566
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
567
|
-
]
|
|
568
|
-
|
|
569
|
-
metrics[MetricType.mAR] = [
|
|
570
|
-
mAR(
|
|
571
|
-
value=float(mean_average_recall[score_idx]),
|
|
572
|
-
iou_thresholds=iou_thresholds,
|
|
573
|
-
score_threshold=score_thresholds[score_idx],
|
|
574
|
-
)
|
|
575
|
-
for score_idx in range(mean_average_recall.shape[0])
|
|
576
|
-
]
|
|
577
|
-
|
|
578
|
-
metrics[MetricType.ARAveragedOverScores] = [
|
|
579
|
-
ARAveragedOverScores(
|
|
580
|
-
value=float(average_recall_averaged_over_scores[label_idx]),
|
|
581
|
-
score_thresholds=score_thresholds,
|
|
582
|
-
iou_thresholds=iou_thresholds,
|
|
583
|
-
label=self.index_to_label[label_idx],
|
|
584
|
-
)
|
|
585
|
-
for label_idx in range(self.n_labels)
|
|
586
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
587
|
-
]
|
|
588
|
-
|
|
589
|
-
metrics[MetricType.mARAveragedOverScores] = [
|
|
590
|
-
mARAveragedOverScores(
|
|
591
|
-
value=float(mean_average_recall_averaged_over_scores),
|
|
592
|
-
score_thresholds=score_thresholds,
|
|
593
|
-
iou_thresholds=iou_thresholds,
|
|
594
|
-
)
|
|
595
|
-
]
|
|
596
|
-
|
|
597
|
-
metrics[MetricType.Accuracy] = [
|
|
598
|
-
Accuracy(
|
|
599
|
-
value=float(accuracy[iou_idx, score_idx]),
|
|
600
|
-
iou_threshold=iou_thresholds[iou_idx],
|
|
601
|
-
score_threshold=score_thresholds[score_idx],
|
|
602
|
-
)
|
|
603
|
-
for iou_idx in range(accuracy.shape[0])
|
|
604
|
-
for score_idx in range(accuracy.shape[1])
|
|
605
|
-
]
|
|
606
|
-
|
|
607
|
-
metrics[MetricType.PrecisionRecallCurve] = [
|
|
608
|
-
PrecisionRecallCurve(
|
|
609
|
-
precisions=pr_curves[iou_idx, label_idx, :, 0]
|
|
610
|
-
.astype(float)
|
|
611
|
-
.tolist(),
|
|
612
|
-
scores=pr_curves[iou_idx, label_idx, :, 1]
|
|
613
|
-
.astype(float)
|
|
614
|
-
.tolist(),
|
|
615
|
-
iou_threshold=iou_threshold,
|
|
616
|
-
label=label,
|
|
617
|
-
)
|
|
618
|
-
for iou_idx, iou_threshold in enumerate(iou_thresholds)
|
|
619
|
-
for label_idx, label in self.index_to_label.items()
|
|
620
|
-
if int(label_metadata[label_idx, 0]) > 0
|
|
621
|
-
]
|
|
622
|
-
|
|
623
|
-
for label_idx, label in self.index_to_label.items():
|
|
624
|
-
|
|
625
|
-
if label_metadata[label_idx, 0] == 0:
|
|
626
|
-
continue
|
|
627
|
-
|
|
628
|
-
for score_idx, score_threshold in enumerate(score_thresholds):
|
|
629
|
-
for iou_idx, iou_threshold in enumerate(iou_thresholds):
|
|
630
|
-
|
|
631
|
-
row = precision_recall[iou_idx][score_idx][label_idx]
|
|
632
|
-
kwargs = {
|
|
633
|
-
"label": label,
|
|
634
|
-
"iou_threshold": iou_threshold,
|
|
635
|
-
"score_threshold": score_threshold,
|
|
636
|
-
}
|
|
637
|
-
metrics[MetricType.Counts].append(
|
|
638
|
-
Counts(
|
|
639
|
-
tp=int(row[0]),
|
|
640
|
-
fp=int(row[1]),
|
|
641
|
-
fn=int(row[2]),
|
|
642
|
-
**kwargs,
|
|
643
|
-
)
|
|
644
|
-
)
|
|
645
|
-
|
|
646
|
-
metrics[MetricType.Precision].append(
|
|
647
|
-
Precision(
|
|
648
|
-
value=float(row[3]),
|
|
649
|
-
**kwargs,
|
|
650
|
-
)
|
|
651
|
-
)
|
|
652
|
-
metrics[MetricType.Recall].append(
|
|
653
|
-
Recall(
|
|
654
|
-
value=float(row[4]),
|
|
655
|
-
**kwargs,
|
|
656
|
-
)
|
|
657
|
-
)
|
|
658
|
-
metrics[MetricType.F1].append(
|
|
659
|
-
F1(
|
|
660
|
-
value=float(row[5]),
|
|
661
|
-
**kwargs,
|
|
662
|
-
)
|
|
663
|
-
)
|
|
664
|
-
|
|
665
|
-
if as_dict:
|
|
666
|
-
return {
|
|
667
|
-
mtype: [metric.to_dict() for metric in mvalues]
|
|
668
|
-
for mtype, mvalues in metrics.items()
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
return metrics
|
|
239
|
+
return unpack_precision_recall_into_metric_lists(
|
|
240
|
+
results=results,
|
|
241
|
+
label_metadata=label_metadata,
|
|
242
|
+
iou_thresholds=iou_thresholds,
|
|
243
|
+
score_thresholds=score_thresholds,
|
|
244
|
+
index_to_label=self.index_to_label,
|
|
245
|
+
)
|
|
672
246
|
|
|
673
247
|
def compute_confusion_matrix(
|
|
674
248
|
self,
|
|
@@ -676,27 +250,24 @@ class Evaluator:
|
|
|
676
250
|
score_thresholds: list[float] = [0.5],
|
|
677
251
|
number_of_examples: int = 0,
|
|
678
252
|
filter_: Filter | None = None,
|
|
679
|
-
|
|
680
|
-
) -> list:
|
|
253
|
+
) -> list[Metric]:
|
|
681
254
|
"""
|
|
682
255
|
Computes confusion matrices at various thresholds.
|
|
683
256
|
|
|
684
257
|
Parameters
|
|
685
258
|
----------
|
|
686
259
|
iou_thresholds : list[float]
|
|
687
|
-
A list of
|
|
260
|
+
A list of IOU thresholds to compute metrics over.
|
|
688
261
|
score_thresholds : list[float]
|
|
689
262
|
A list of score thresholds to compute metrics over.
|
|
690
263
|
number_of_examples : int, default=0
|
|
691
264
|
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
692
265
|
filter_ : Filter, optional
|
|
693
266
|
An optional filter object.
|
|
694
|
-
as_dict : bool, default=False
|
|
695
|
-
An option to return metrics as dictionaries.
|
|
696
267
|
|
|
697
268
|
Returns
|
|
698
269
|
-------
|
|
699
|
-
list[
|
|
270
|
+
list[Metric]
|
|
700
271
|
List of confusion matrices per threshold pair.
|
|
701
272
|
"""
|
|
702
273
|
|
|
@@ -709,11 +280,7 @@ class Evaluator:
|
|
|
709
280
|
if detailed_pairs.size == 0:
|
|
710
281
|
return list()
|
|
711
282
|
|
|
712
|
-
(
|
|
713
|
-
confusion_matrix,
|
|
714
|
-
hallucinations,
|
|
715
|
-
missing_predictions,
|
|
716
|
-
) = compute_confusion_matrix(
|
|
283
|
+
results = compute_confusion_matrix(
|
|
717
284
|
data=detailed_pairs,
|
|
718
285
|
label_metadata=label_metadata,
|
|
719
286
|
iou_thresholds=np.array(iou_thresholds),
|
|
@@ -721,39 +288,16 @@ class Evaluator:
|
|
|
721
288
|
n_examples=number_of_examples,
|
|
722
289
|
)
|
|
723
290
|
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
number_of_labels=n_labels,
|
|
735
|
-
number_of_examples=number_of_examples,
|
|
736
|
-
),
|
|
737
|
-
hallucinations=self._unpack_hallucinations(
|
|
738
|
-
hallucinations=hallucinations[iou_idx, score_idx, :, :],
|
|
739
|
-
number_of_labels=n_labels,
|
|
740
|
-
number_of_examples=number_of_examples,
|
|
741
|
-
),
|
|
742
|
-
missing_predictions=self._unpack_missing_predictions(
|
|
743
|
-
missing_predictions=missing_predictions[
|
|
744
|
-
iou_idx, score_idx, :, :
|
|
745
|
-
],
|
|
746
|
-
number_of_labels=n_labels,
|
|
747
|
-
number_of_examples=number_of_examples,
|
|
748
|
-
),
|
|
749
|
-
)
|
|
750
|
-
for iou_idx in range(n_ious)
|
|
751
|
-
for score_idx in range(n_scores)
|
|
752
|
-
]
|
|
753
|
-
|
|
754
|
-
if as_dict:
|
|
755
|
-
return [m.to_dict() for m in matrices]
|
|
756
|
-
return matrices
|
|
291
|
+
return unpack_confusion_matrix_into_metric_list(
|
|
292
|
+
results=results,
|
|
293
|
+
iou_thresholds=iou_thresholds,
|
|
294
|
+
score_thresholds=score_thresholds,
|
|
295
|
+
number_of_examples=number_of_examples,
|
|
296
|
+
index_to_uid=self.index_to_uid,
|
|
297
|
+
index_to_label=self.index_to_label,
|
|
298
|
+
groundtruth_examples=self.groundtruth_examples,
|
|
299
|
+
prediction_examples=self.prediction_examples,
|
|
300
|
+
)
|
|
757
301
|
|
|
758
302
|
def evaluate(
|
|
759
303
|
self,
|
|
@@ -761,43 +305,40 @@ class Evaluator:
|
|
|
761
305
|
score_thresholds: list[float] = [0.5],
|
|
762
306
|
number_of_examples: int = 0,
|
|
763
307
|
filter_: Filter | None = None,
|
|
764
|
-
|
|
765
|
-
) -> dict[MetricType, list]:
|
|
308
|
+
) -> dict[MetricType, list[Metric]]:
|
|
766
309
|
"""
|
|
767
310
|
Computes all avaiable metrics.
|
|
768
311
|
|
|
769
312
|
Parameters
|
|
770
313
|
----------
|
|
771
314
|
iou_thresholds : list[float]
|
|
772
|
-
A list of
|
|
315
|
+
A list of IOU thresholds to compute metrics over.
|
|
773
316
|
score_thresholds : list[float]
|
|
774
317
|
A list of score thresholds to compute metrics over.
|
|
775
318
|
number_of_examples : int, default=0
|
|
776
319
|
Maximum number of annotation examples to return in ConfusionMatrix.
|
|
777
320
|
filter_ : Filter, optional
|
|
778
321
|
An optional filter object.
|
|
779
|
-
as_dict : bool, default=False
|
|
780
|
-
An option to return metrics as dictionaries.
|
|
781
322
|
|
|
782
323
|
Returns
|
|
783
324
|
-------
|
|
784
|
-
dict[MetricType, list]
|
|
785
|
-
|
|
325
|
+
dict[MetricType, list[Metric]]
|
|
326
|
+
Lists of metrics organized by metric type.
|
|
786
327
|
"""
|
|
787
|
-
|
|
328
|
+
metrics = self.compute_precision_recall(
|
|
788
329
|
iou_thresholds=iou_thresholds,
|
|
789
330
|
score_thresholds=score_thresholds,
|
|
790
331
|
filter_=filter_,
|
|
791
|
-
as_dict=as_dict,
|
|
792
332
|
)
|
|
793
|
-
|
|
333
|
+
|
|
334
|
+
metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
|
|
794
335
|
iou_thresholds=iou_thresholds,
|
|
795
336
|
score_thresholds=score_thresholds,
|
|
796
337
|
number_of_examples=number_of_examples,
|
|
797
338
|
filter_=filter_,
|
|
798
|
-
as_dict=as_dict,
|
|
799
339
|
)
|
|
800
|
-
|
|
340
|
+
|
|
341
|
+
return metrics
|
|
801
342
|
|
|
802
343
|
|
|
803
344
|
class DataLoader:
|