valor-lite 0.33.6__py3-none-any.whl → 0.33.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,10 +4,14 @@ from dataclasses import dataclass
4
4
  @dataclass
5
5
  class Classification:
6
6
  uid: str
7
- groundtruths: list[tuple[str, str]]
8
- predictions: list[tuple[str, str]]
7
+ groundtruth: str
8
+ predictions: list[str]
9
9
  scores: list[float]
10
10
 
11
11
  def __post_init__(self):
12
+ if not isinstance(self.groundtruth, str):
13
+ raise ValueError(
14
+ "A classification must contain a single groundtruth."
15
+ )
12
16
  if len(self.predictions) != len(self.scores):
13
17
  raise ValueError("There must be a score per prediction label.")
@@ -3,28 +3,18 @@ from numpy.typing import NDArray
3
3
 
4
4
 
5
5
  def _compute_rocauc(
6
- data: NDArray[np.floating],
6
+ data: NDArray[np.float64],
7
7
  label_metadata: NDArray[np.int32],
8
8
  n_datums: int,
9
9
  n_labels: int,
10
- n_label_keys: int,
11
10
  mask_matching_labels: NDArray[np.bool_],
12
11
  pd_labels: NDArray[np.int32],
13
12
  ):
14
13
  """
15
14
  Compute ROCAUC and mean ROCAUC.
16
15
  """
17
- count_labels_per_key = np.bincount(label_metadata[:, 2])
18
- count_groundtruths_per_key = np.bincount(
19
- label_metadata[:, 2],
20
- weights=label_metadata[:, 0],
21
- minlength=n_label_keys,
22
- )
23
-
24
16
  positive_count = label_metadata[:, 0]
25
- negative_count = (
26
- count_groundtruths_per_key[label_metadata[:, 2]] - label_metadata[:, 0]
27
- )
17
+ negative_count = label_metadata[:, 1] - label_metadata[:, 0]
28
18
 
29
19
  true_positives = np.zeros((n_labels, n_datums), dtype=np.int32)
30
20
  false_positives = np.zeros_like(true_positives)
@@ -35,7 +25,6 @@ def _compute_rocauc(
35
25
  continue
36
26
 
37
27
  mask_pds = pd_labels == label_idx
38
-
39
28
  true_positives[label_idx] = mask_matching_labels[mask_pds]
40
29
  false_positives[label_idx] = ~mask_matching_labels[mask_pds]
41
30
  scores[label_idx] = data[mask_pds, 3]
@@ -70,32 +59,25 @@ def _compute_rocauc(
70
59
  rocauc = np.trapz(x=fpr, y=tpr, axis=1) # type: ignore - numpy will be switching to `trapezoid` in the future.
71
60
 
72
61
  # compute mean rocauc
73
- summed_rocauc = np.bincount(label_metadata[:, 2], weights=rocauc)
74
- mean_rocauc = np.zeros(n_label_keys, dtype=np.float64)
75
- np.divide(
76
- summed_rocauc,
77
- count_labels_per_key,
78
- where=count_labels_per_key > 1e-9,
79
- out=mean_rocauc,
80
- )
62
+ mean_rocauc = rocauc.mean()
81
63
 
82
64
  return rocauc, mean_rocauc
83
65
 
84
66
 
85
67
  def compute_metrics(
86
- data: NDArray[np.floating],
68
+ data: NDArray[np.float64],
87
69
  label_metadata: NDArray[np.int32],
88
- score_thresholds: NDArray[np.floating],
70
+ score_thresholds: NDArray[np.float64],
89
71
  hardmax: bool,
90
72
  n_datums: int,
91
73
  ) -> tuple[
92
74
  NDArray[np.int32],
93
- NDArray[np.floating],
94
- NDArray[np.floating],
95
- NDArray[np.floating],
96
- NDArray[np.floating],
97
- NDArray[np.floating],
98
- NDArray[np.floating],
75
+ NDArray[np.float64],
76
+ NDArray[np.float64],
77
+ NDArray[np.float64],
78
+ NDArray[np.float64],
79
+ NDArray[np.float64],
80
+ float,
99
81
  ]:
100
82
  """
101
83
  Computes classification metrics.
@@ -110,14 +92,14 @@ def compute_metrics(
110
92
 
111
93
  Parameters
112
94
  ----------
113
- data : NDArray[np.floating]
95
+ data : NDArray[np.float64]
114
96
  A sorted array of classification pairs.
115
97
  label_metadata : NDArray[np.int32]
116
98
  An array containing metadata related to labels.
117
- score_thresholds : NDArray[np.floating]
99
+ score_thresholds : NDArray[np.float64]
118
100
  A 1-D array contains score thresholds to compute metrics over.
119
101
  hardmax : bool
120
- Option to only allow a single positive prediction per label key.
102
+ Option to only allow a single positive prediction.
121
103
  n_datums : int
122
104
  The number of datums being operated over.
123
105
 
@@ -125,22 +107,21 @@ def compute_metrics(
125
107
  -------
126
108
  NDArray[np.int32]
127
109
  TP, FP, FN, TN counts.
128
- NDArray[np.floating]
110
+ NDArray[np.float64]
129
111
  Precision.
130
- NDArray[np.floating]
112
+ NDArray[np.float64]
131
113
  Recall.
132
- NDArray[np.floating]
114
+ NDArray[np.float64]
133
115
  Accuracy
134
- NDArray[np.floating]
116
+ NDArray[np.float64]
135
117
  F1 Score
136
- NDArray[np.floating]
118
+ NDArray[np.float64]
137
119
  ROCAUC.
138
- NDArray[np.floating]
120
+ float
139
121
  mROCAUC.
140
122
  """
141
123
 
142
124
  n_labels = label_metadata.shape[0]
143
- n_label_keys = np.unique(label_metadata[:, 2]).size
144
125
  n_scores = score_thresholds.shape[0]
145
126
 
146
127
  pd_labels = data[:, 2].astype(int)
@@ -155,7 +136,6 @@ def compute_metrics(
155
136
  label_metadata=label_metadata,
156
137
  n_datums=n_datums,
157
138
  n_labels=n_labels,
158
- n_label_keys=n_label_keys,
159
139
  mask_matching_labels=mask_matching_labels,
160
140
  pd_labels=pd_labels,
161
141
  )
@@ -229,16 +209,16 @@ def compute_metrics(
229
209
 
230
210
 
231
211
  def _count_with_examples(
232
- data: NDArray[np.floating],
212
+ data: NDArray[np.float64],
233
213
  unique_idx: int | list[int],
234
214
  label_idx: int | list[int],
235
- ) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
215
+ ) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.int32]]:
236
216
  """
237
217
  Helper function for counting occurences of unique detailed pairs.
238
218
 
239
219
  Parameters
240
220
  ----------
241
- data : NDArray[np.floating]
221
+ data : NDArray[np.float64]
242
222
  A masked portion of a detailed pairs array.
243
223
  unique_idx : int | list[int]
244
224
  The index or indices upon which uniqueness is constrained.
@@ -247,7 +227,7 @@ def _count_with_examples(
247
227
 
248
228
  Returns
249
229
  -------
250
- NDArray[np.floating]
230
+ NDArray[np.float64]
251
231
  Examples drawn from the data input.
252
232
  NDArray[np.int32]
253
233
  Unique label indices.
@@ -267,13 +247,12 @@ def _count_with_examples(
267
247
 
268
248
 
269
249
  def compute_confusion_matrix(
270
- data: NDArray[np.floating],
250
+ data: NDArray[np.float64],
271
251
  label_metadata: NDArray[np.int32],
272
- score_thresholds: NDArray[np.floating],
252
+ score_thresholds: NDArray[np.float64],
273
253
  hardmax: bool,
274
254
  n_examples: int,
275
- ) -> tuple[NDArray[np.floating], NDArray[np.int32]]:
276
-
255
+ ) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
277
256
  """
278
257
  Compute detailed confusion matrix.
279
258
 
@@ -287,20 +266,20 @@ def compute_confusion_matrix(
287
266
 
288
267
  Parameters
289
268
  ----------
290
- data : NDArray[np.floating]
269
+ data : NDArray[np.float64]
291
270
  A sorted array summarizing the IOU calculations of one or more pairs.
292
271
  label_metadata : NDArray[np.int32]
293
272
  An array containing metadata related to labels.
294
- iou_thresholds : NDArray[np.floating]
273
+ iou_thresholds : NDArray[np.float64]
295
274
  A 1-D array containing IoU thresholds.
296
- score_thresholds : NDArray[np.floating]
275
+ score_thresholds : NDArray[np.float64]
297
276
  A 1-D array containing score thresholds.
298
277
  n_examples : int
299
278
  The maximum number of examples to return per count.
300
279
 
301
280
  Returns
302
281
  -------
303
- NDArray[np.floating]
282
+ NDArray[np.float64]
304
283
  Confusion matrix.
305
284
  NDArray[np.int32]
306
285
  Ground truths with missing predictions.
@@ -67,13 +67,8 @@ class Evaluator:
67
67
  self.index_to_uid: dict[int, str] = dict()
68
68
 
69
69
  # label reference
70
- self.label_to_index: dict[tuple[str, str], int] = dict()
71
- self.index_to_label: dict[int, tuple[str, str]] = dict()
72
-
73
- # label key reference
74
- self.index_to_label_key: dict[int, str] = dict()
75
- self.label_key_to_index: dict[str, int] = dict()
76
- self.label_index_to_label_key_index: dict[int, int] = dict()
70
+ self.label_to_index: dict[str, int] = dict()
71
+ self.index_to_label: dict[int, str] = dict()
77
72
 
78
73
  # computation caches
79
74
  self._detailed_pairs = np.array([])
@@ -81,7 +76,7 @@ class Evaluator:
81
76
  self._label_metadata_per_datum = np.array([], dtype=np.int32)
82
77
 
83
78
  @property
84
- def ignored_prediction_labels(self) -> list[tuple[str, str]]:
79
+ def ignored_prediction_labels(self) -> list[str]:
85
80
  """
86
81
  Prediction labels that are not present in the ground truth set.
87
82
  """
@@ -92,7 +87,7 @@ class Evaluator:
92
87
  ]
93
88
 
94
89
  @property
95
- def missing_prediction_labels(self) -> list[tuple[str, str]]:
90
+ def missing_prediction_labels(self) -> list[str]:
96
91
  """
97
92
  Ground truth labels that are not present in the prediction set.
98
93
  """
@@ -119,8 +114,7 @@ class Evaluator:
119
114
  def create_filter(
120
115
  self,
121
116
  datum_uids: list[str] | NDArray[np.int32] | None = None,
122
- labels: list[tuple[str, str]] | NDArray[np.int32] | None = None,
123
- label_keys: list[str] | NDArray[np.int32] | None = None,
117
+ labels: list[str] | NDArray[np.int32] | None = None,
124
118
  ) -> Filter:
125
119
  """
126
120
  Creates a boolean mask that can be passed to an evaluation.
@@ -129,10 +123,8 @@ class Evaluator:
129
123
  ----------
130
124
  datum_uids : list[str] | NDArray[np.int32], optional
131
125
  An optional list of string uids or a numpy array of uid indices.
132
- labels : list[tuple[str, str]] | NDArray[np.int32], optional
126
+ labels : list[str] | NDArray[np.int32], optional
133
127
  An optional list of labels or a numpy array of label indices.
134
- label_keys : list[str] | NDArray[np.int32], optional
135
- An optional list of label keys or a numpy array of label key indices.
136
128
 
137
129
  Returns
138
130
  -------
@@ -179,36 +171,18 @@ class Evaluator:
179
171
  mask[labels] = True
180
172
  mask_labels &= mask
181
173
 
182
- if label_keys is not None:
183
- if isinstance(label_keys, list):
184
- label_keys = np.array(
185
- [self.label_key_to_index[key] for key in label_keys]
186
- )
187
- label_indices = np.where(
188
- np.isclose(self._label_metadata[:, 2], label_keys)
189
- )[0]
190
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
191
- mask[
192
- np.isin(self._detailed_pairs[:, 1].astype(int), label_indices)
193
- ] = True
194
- mask_pairs &= mask
195
-
196
- mask = np.zeros_like(mask_labels, dtype=np.bool_)
197
- mask[label_indices] = True
198
- mask_labels &= mask
199
-
200
174
  mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
201
175
  label_metadata_per_datum = self._label_metadata_per_datum.copy()
202
176
  label_metadata_per_datum[:, ~mask] = 0
203
177
 
204
178
  label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
205
- label_metadata[:, :2] = np.transpose(
179
+ label_metadata = np.transpose(
206
180
  np.sum(
207
181
  label_metadata_per_datum,
208
182
  axis=1,
209
183
  )
210
184
  )
211
- label_metadata[:, 2] = self._label_metadata[:, 2]
185
+
212
186
  n_datums = int(np.sum(label_metadata[:, 0]))
213
187
 
214
188
  return Filter(
@@ -241,6 +215,8 @@ class Evaluator:
241
215
  Maximum number of annotation examples to return in ConfusionMatrix.
242
216
  filter_ : Filter, optional
243
217
  An optional filter object.
218
+ as_dict : bool, default=False
219
+ An option to return metrics as dictionaries.
244
220
 
245
221
  Returns
246
222
  -------
@@ -286,10 +262,8 @@ class Evaluator:
286
262
 
287
263
  metrics[MetricType.mROCAUC] = [
288
264
  mROCAUC(
289
- value=mean_rocauc[label_key_idx],
290
- label_key=self.index_to_label_key[label_key_idx],
265
+ value=mean_rocauc,
291
266
  )
292
- for label_key_idx in range(len(self.label_key_to_index))
293
267
  ]
294
268
 
295
269
  for label_idx, label in self.index_to_label.items():
@@ -364,8 +338,7 @@ class Evaluator:
364
338
 
365
339
  def _unpack_confusion_matrix(
366
340
  self,
367
- confusion_matrix: NDArray[np.floating],
368
- label_key_idx: int,
341
+ confusion_matrix: NDArray[np.float64],
369
342
  number_of_labels: int,
370
343
  number_of_examples: int,
371
344
  ) -> dict[
@@ -405,8 +378,8 @@ class Evaluator:
405
378
  )
406
379
 
407
380
  return {
408
- self.index_to_label[gt_label_idx][1]: {
409
- self.index_to_label[pd_label_idx][1]: {
381
+ self.index_to_label[gt_label_idx]: {
382
+ self.index_to_label[pd_label_idx]: {
410
383
  "count": max(
411
384
  int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
412
385
  0,
@@ -428,22 +401,13 @@ class Evaluator:
428
401
  ],
429
402
  }
430
403
  for pd_label_idx in range(number_of_labels)
431
- if (
432
- self.label_index_to_label_key_index[pd_label_idx]
433
- == label_key_idx
434
- )
435
404
  }
436
405
  for gt_label_idx in range(number_of_labels)
437
- if (
438
- self.label_index_to_label_key_index[gt_label_idx]
439
- == label_key_idx
440
- )
441
406
  }
442
407
 
443
408
  def _unpack_missing_predictions(
444
409
  self,
445
410
  missing_predictions: NDArray[np.int32],
446
- label_key_idx: int,
447
411
  number_of_labels: int,
448
412
  number_of_examples: int,
449
413
  ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
@@ -461,7 +425,7 @@ class Evaluator:
461
425
  )
462
426
 
463
427
  return {
464
- self.index_to_label[gt_label_idx][1]: {
428
+ self.index_to_label[gt_label_idx]: {
465
429
  "count": max(
466
430
  int(missing_predictions[gt_label_idx, 0]),
467
431
  0,
@@ -477,15 +441,11 @@ class Evaluator:
477
441
  ],
478
442
  }
479
443
  for gt_label_idx in range(number_of_labels)
480
- if (
481
- self.label_index_to_label_key_index[gt_label_idx]
482
- == label_key_idx
483
- )
484
444
  }
485
445
 
486
446
  def _compute_confusion_matrix(
487
447
  self,
488
- data: NDArray[np.floating],
448
+ data: NDArray[np.float64],
489
449
  label_metadata: NDArray[np.int32],
490
450
  score_thresholds: list[float],
491
451
  hardmax: bool,
@@ -496,7 +456,7 @@ class Evaluator:
496
456
 
497
457
  Parameters
498
458
  ----------
499
- data : NDArray[np.floating]
459
+ data : NDArray[np.float64]
500
460
  A data array containing classification pairs.
501
461
  label_metadata : NDArray[np.int32]
502
462
  An integer array containing label metadata.
@@ -510,7 +470,7 @@ class Evaluator:
510
470
  Returns
511
471
  -------
512
472
  list[ConfusionMatrix]
513
- A list of ConfusionMatrix per label key.
473
+ A list of ConfusionMatrix objects.
514
474
  """
515
475
 
516
476
  if data.size == 0:
@@ -528,22 +488,18 @@ class Evaluator:
528
488
  return [
529
489
  ConfusionMatrix(
530
490
  score_threshold=score_thresholds[score_idx],
531
- label_key=label_key,
532
491
  number_of_examples=number_of_examples,
533
492
  confusion_matrix=self._unpack_confusion_matrix(
534
493
  confusion_matrix=confusion_matrix[score_idx, :, :, :],
535
- label_key_idx=label_key_idx,
536
494
  number_of_labels=n_labels,
537
495
  number_of_examples=number_of_examples,
538
496
  ),
539
497
  missing_predictions=self._unpack_missing_predictions(
540
498
  missing_predictions=missing_predictions[score_idx, :, :],
541
- label_key_idx=label_key_idx,
542
499
  number_of_labels=n_labels,
543
500
  number_of_examples=number_of_examples,
544
501
  ),
545
502
  )
546
- for label_key_idx, label_key in self.index_to_label_key.items()
547
503
  for score_idx in range(n_scores)
548
504
  ]
549
505
 
@@ -578,77 +534,50 @@ class DataLoader:
578
534
  self._evaluator.index_to_uid[index] = uid
579
535
  return self._evaluator.uid_to_index[uid]
580
536
 
581
- def _add_label(self, label: tuple[str, str]) -> tuple[int, int]:
537
+ def _add_label(self, label: str) -> int:
582
538
  """
583
539
  Helper function for adding a label to the cache.
584
540
 
585
541
  Parameters
586
542
  ----------
587
- label : tuple[str, str]
588
- The label as a tuple in format (key, value).
543
+ label : str
544
+ A string representing a label.
589
545
 
590
546
  Returns
591
547
  -------
592
548
  int
593
549
  Label index.
594
- int
595
- Label key index.
596
550
  """
597
551
  label_id = len(self._evaluator.index_to_label)
598
- label_key_id = len(self._evaluator.index_to_label_key)
599
552
  if label not in self._evaluator.label_to_index:
600
553
  self._evaluator.label_to_index[label] = label_id
601
554
  self._evaluator.index_to_label[label_id] = label
602
555
 
603
- # update label key index
604
- if label[0] not in self._evaluator.label_key_to_index:
605
- self._evaluator.label_key_to_index[label[0]] = label_key_id
606
- self._evaluator.index_to_label_key[label_key_id] = label[0]
607
- label_key_id += 1
608
-
609
- self._evaluator.label_index_to_label_key_index[
610
- label_id
611
- ] = self._evaluator.label_key_to_index[label[0]]
612
556
  label_id += 1
613
557
 
614
- return (
615
- self._evaluator.label_to_index[label],
616
- self._evaluator.label_key_to_index[label[0]],
617
- )
558
+ return self._evaluator.label_to_index[label]
618
559
 
619
560
  def _add_data(
620
561
  self,
621
562
  uid_index: int,
622
- keyed_groundtruths: dict[int, int],
623
- keyed_predictions: dict[int, list[tuple[int, float]]],
563
+ groundtruth: int,
564
+ predictions: list[tuple[int, float]],
624
565
  ):
625
- gt_keys = set(keyed_groundtruths.keys())
626
- pd_keys = set(keyed_predictions.keys())
627
- joint_keys = gt_keys.intersection(pd_keys)
628
-
629
- gt_unique_keys = gt_keys - pd_keys
630
- pd_unique_keys = pd_keys - gt_keys
631
- if gt_unique_keys or pd_unique_keys:
632
- raise ValueError(
633
- "Label keys must match between ground truths and predictions."
634
- )
635
566
 
636
567
  pairs = list()
637
- for key in joint_keys:
638
- scores = np.array([score for _, score in keyed_predictions[key]])
639
- max_score_idx = np.argmax(scores)
640
-
641
- glabel = keyed_groundtruths[key]
642
- for idx, (plabel, score) in enumerate(keyed_predictions[key]):
643
- pairs.append(
644
- (
645
- float(uid_index),
646
- float(glabel),
647
- float(plabel),
648
- float(score),
649
- float(max_score_idx == idx),
650
- )
568
+ scores = np.array([score for _, score in predictions])
569
+ max_score_idx = np.argmax(scores)
570
+
571
+ for idx, (plabel, score) in enumerate(predictions):
572
+ pairs.append(
573
+ (
574
+ float(uid_index),
575
+ float(groundtruth),
576
+ float(plabel),
577
+ float(score),
578
+ float(max_score_idx == idx),
651
579
  )
580
+ )
652
581
 
653
582
  if self._evaluator._detailed_pairs.size == 0:
654
583
  self._evaluator._detailed_pairs = np.array(pairs)
@@ -680,27 +609,29 @@ class DataLoader:
680
609
  disable_tqdm = not show_progress
681
610
  for classification in tqdm(classifications, disable=disable_tqdm):
682
611
 
612
+ if len(classification.predictions) == 0:
613
+ raise ValueError(
614
+ "Classifications must contain at least one prediction."
615
+ )
683
616
  # update metadata
684
617
  self._evaluator.n_datums += 1
685
- self._evaluator.n_groundtruths += len(classification.groundtruths)
618
+ self._evaluator.n_groundtruths += 1
686
619
  self._evaluator.n_predictions += len(classification.predictions)
687
620
 
688
621
  # update datum uid index
689
622
  uid_index = self._add_datum(uid=classification.uid)
690
623
 
691
624
  # cache labels and annotations
692
- keyed_groundtruths = defaultdict(int)
693
- keyed_predictions = defaultdict(list)
694
- for glabel in classification.groundtruths:
695
- label_idx, label_key_idx = self._add_label(glabel)
696
- self.groundtruth_count[label_idx][uid_index] += 1
697
- keyed_groundtruths[label_key_idx] = label_idx
698
- for idx, (plabel, pscore) in enumerate(
699
- zip(classification.predictions, classification.scores)
625
+ groundtruth = self._add_label(classification.groundtruth)
626
+ self.groundtruth_count[groundtruth][uid_index] += 1
627
+
628
+ predictions = list()
629
+ for plabel, pscore in zip(
630
+ classification.predictions, classification.scores
700
631
  ):
701
- label_idx, label_key_idx = self._add_label(plabel)
632
+ label_idx = self._add_label(plabel)
702
633
  self.prediction_count[label_idx][uid_index] += 1
703
- keyed_predictions[label_key_idx].append(
634
+ predictions.append(
704
635
  (
705
636
  label_idx,
706
637
  pscore,
@@ -709,8 +640,8 @@ class DataLoader:
709
640
 
710
641
  self._add_data(
711
642
  uid_index=uid_index,
712
- keyed_groundtruths=keyed_groundtruths,
713
- keyed_predictions=keyed_predictions,
643
+ groundtruth=groundtruth,
644
+ predictions=predictions,
714
645
  )
715
646
 
716
647
  def add_data_from_valor_dict(
@@ -743,31 +674,38 @@ class DataLoader:
743
674
  uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
744
675
 
745
676
  # cache labels and annotations
746
- keyed_groundtruths = defaultdict(int)
747
- keyed_predictions = defaultdict(list)
677
+ predictions = list()
678
+ groundtruths = None
748
679
  for gann in groundtruth["annotations"]:
749
680
  for valor_label in gann["labels"]:
750
- glabel = (valor_label["key"], valor_label["value"])
751
- label_idx, label_key_idx = self._add_label(glabel)
681
+ glabel = f'{valor_label["key"]}_{valor_label["value"]}'
682
+ label_idx = self._add_label(glabel)
752
683
  self.groundtruth_count[label_idx][uid_index] += 1
753
- keyed_groundtruths[label_key_idx] = label_idx
684
+ groundtruths = label_idx
754
685
  for pann in prediction["annotations"]:
755
686
  for valor_label in pann["labels"]:
756
- plabel = (valor_label["key"], valor_label["value"])
687
+ plabel = f'{valor_label["key"]}_{valor_label["value"]}'
757
688
  pscore = valor_label["score"]
758
- label_idx, label_key_idx = self._add_label(plabel)
689
+ label_idx = self._add_label(plabel)
759
690
  self.prediction_count[label_idx][uid_index] += 1
760
- keyed_predictions[label_key_idx].append(
691
+ predictions.append(
761
692
  (
762
693
  label_idx,
763
694
  pscore,
764
695
  )
765
696
  )
766
697
 
698
+ # fix type error where groundtruths can possibly be unbound now that it's a float
699
+ # in practice, this error should never be hit since groundtruths can't be empty without throwing a ValueError earlier in the flow
700
+ if groundtruths is None:
701
+ raise ValueError(
702
+ "Expected a value for groundtruths, but got None."
703
+ )
704
+
767
705
  self._add_data(
768
706
  uid_index=uid_index,
769
- keyed_groundtruths=keyed_groundtruths,
770
- keyed_predictions=keyed_predictions,
707
+ groundtruth=groundtruths,
708
+ predictions=predictions,
771
709
  )
772
710
 
773
711
  def finalize(self) -> Evaluator:
@@ -820,7 +758,6 @@ class DataLoader:
820
758
  1, :, label_idx
821
759
  ]
822
760
  ),
823
- self._evaluator.label_index_to_label_key_index[label_idx],
824
761
  ]
825
762
  for label_idx in range(n_labels)
826
763
  ],