valor-lite 0.33.7__py3-none-any.whl → 0.33.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,10 +4,14 @@ from dataclasses import dataclass
4
4
  @dataclass
5
5
  class Classification:
6
6
  uid: str
7
- groundtruths: list[tuple[str, str]]
8
- predictions: list[tuple[str, str]]
7
+ groundtruth: str
8
+ predictions: list[str]
9
9
  scores: list[float]
10
10
 
11
11
  def __post_init__(self):
12
+ if not isinstance(self.groundtruth, str):
13
+ raise ValueError(
14
+ "A classification must contain a single groundtruth."
15
+ )
12
16
  if len(self.predictions) != len(self.scores):
13
17
  raise ValueError("There must be a score per prediction label.")
@@ -3,28 +3,18 @@ from numpy.typing import NDArray
3
3
 
4
4
 
5
5
  def _compute_rocauc(
6
- data: NDArray[np.floating],
6
+ data: NDArray[np.float64],
7
7
  label_metadata: NDArray[np.int32],
8
8
  n_datums: int,
9
9
  n_labels: int,
10
- n_label_keys: int,
11
10
  mask_matching_labels: NDArray[np.bool_],
12
11
  pd_labels: NDArray[np.int32],
13
12
  ):
14
13
  """
15
14
  Compute ROCAUC and mean ROCAUC.
16
15
  """
17
- count_labels_per_key = np.bincount(label_metadata[:, 2])
18
- count_groundtruths_per_key = np.bincount(
19
- label_metadata[:, 2],
20
- weights=label_metadata[:, 0],
21
- minlength=n_label_keys,
22
- )
23
-
24
16
  positive_count = label_metadata[:, 0]
25
- negative_count = (
26
- count_groundtruths_per_key[label_metadata[:, 2]] - label_metadata[:, 0]
27
- )
17
+ negative_count = label_metadata[:, 1] - label_metadata[:, 0]
28
18
 
29
19
  true_positives = np.zeros((n_labels, n_datums), dtype=np.int32)
30
20
  false_positives = np.zeros_like(true_positives)
@@ -35,7 +25,6 @@ def _compute_rocauc(
35
25
  continue
36
26
 
37
27
  mask_pds = pd_labels == label_idx
38
-
39
28
  true_positives[label_idx] = mask_matching_labels[mask_pds]
40
29
  false_positives[label_idx] = ~mask_matching_labels[mask_pds]
41
30
  scores[label_idx] = data[mask_pds, 3]
@@ -70,32 +59,25 @@ def _compute_rocauc(
70
59
  rocauc = np.trapz(x=fpr, y=tpr, axis=1) # type: ignore - numpy will be switching to `trapezoid` in the future.
71
60
 
72
61
  # compute mean rocauc
73
- summed_rocauc = np.bincount(label_metadata[:, 2], weights=rocauc)
74
- mean_rocauc = np.zeros(n_label_keys, dtype=np.float64)
75
- np.divide(
76
- summed_rocauc,
77
- count_labels_per_key,
78
- where=count_labels_per_key > 1e-9,
79
- out=mean_rocauc,
80
- )
62
+ mean_rocauc = rocauc.mean()
81
63
 
82
64
  return rocauc, mean_rocauc
83
65
 
84
66
 
85
67
  def compute_metrics(
86
- data: NDArray[np.floating],
68
+ data: NDArray[np.float64],
87
69
  label_metadata: NDArray[np.int32],
88
- score_thresholds: NDArray[np.floating],
70
+ score_thresholds: NDArray[np.float64],
89
71
  hardmax: bool,
90
72
  n_datums: int,
91
73
  ) -> tuple[
92
74
  NDArray[np.int32],
93
- NDArray[np.floating],
94
- NDArray[np.floating],
95
- NDArray[np.floating],
96
- NDArray[np.floating],
97
- NDArray[np.floating],
98
- NDArray[np.floating],
75
+ NDArray[np.float64],
76
+ NDArray[np.float64],
77
+ NDArray[np.float64],
78
+ NDArray[np.float64],
79
+ NDArray[np.float64],
80
+ float,
99
81
  ]:
100
82
  """
101
83
  Computes classification metrics.
@@ -110,14 +92,14 @@ def compute_metrics(
110
92
 
111
93
  Parameters
112
94
  ----------
113
- data : NDArray[np.floating]
95
+ data : NDArray[np.float64]
114
96
  A sorted array of classification pairs.
115
97
  label_metadata : NDArray[np.int32]
116
98
  An array containing metadata related to labels.
117
- score_thresholds : NDArray[np.floating]
99
+ score_thresholds : NDArray[np.float64]
118
100
  A 1-D array contains score thresholds to compute metrics over.
119
101
  hardmax : bool
120
- Option to only allow a single positive prediction per label key.
102
+ Option to only allow a single positive prediction.
121
103
  n_datums : int
122
104
  The number of datums being operated over.
123
105
 
@@ -125,22 +107,21 @@ def compute_metrics(
125
107
  -------
126
108
  NDArray[np.int32]
127
109
  TP, FP, FN, TN counts.
128
- NDArray[np.floating]
110
+ NDArray[np.float64]
129
111
  Precision.
130
- NDArray[np.floating]
112
+ NDArray[np.float64]
131
113
  Recall.
132
- NDArray[np.floating]
114
+ NDArray[np.float64]
133
115
  Accuracy
134
- NDArray[np.floating]
116
+ NDArray[np.float64]
135
117
  F1 Score
136
- NDArray[np.floating]
118
+ NDArray[np.float64]
137
119
  ROCAUC.
138
- NDArray[np.floating]
120
+ float
139
121
  mROCAUC.
140
122
  """
141
123
 
142
124
  n_labels = label_metadata.shape[0]
143
- n_label_keys = np.unique(label_metadata[:, 2]).size
144
125
  n_scores = score_thresholds.shape[0]
145
126
 
146
127
  pd_labels = data[:, 2].astype(int)
@@ -155,7 +136,6 @@ def compute_metrics(
155
136
  label_metadata=label_metadata,
156
137
  n_datums=n_datums,
157
138
  n_labels=n_labels,
158
- n_label_keys=n_label_keys,
159
139
  mask_matching_labels=mask_matching_labels,
160
140
  pd_labels=pd_labels,
161
141
  )
@@ -229,16 +209,16 @@ def compute_metrics(
229
209
 
230
210
 
231
211
  def _count_with_examples(
232
- data: NDArray[np.floating],
212
+ data: NDArray[np.float64],
233
213
  unique_idx: int | list[int],
234
214
  label_idx: int | list[int],
235
- ) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
215
+ ) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.int32]]:
236
216
  """
237
217
  Helper function for counting occurences of unique detailed pairs.
238
218
 
239
219
  Parameters
240
220
  ----------
241
- data : NDArray[np.floating]
221
+ data : NDArray[np.float64]
242
222
  A masked portion of a detailed pairs array.
243
223
  unique_idx : int | list[int]
244
224
  The index or indices upon which uniqueness is constrained.
@@ -247,7 +227,7 @@ def _count_with_examples(
247
227
 
248
228
  Returns
249
229
  -------
250
- NDArray[np.floating]
230
+ NDArray[np.float64]
251
231
  Examples drawn from the data input.
252
232
  NDArray[np.int32]
253
233
  Unique label indices.
@@ -267,13 +247,12 @@ def _count_with_examples(
267
247
 
268
248
 
269
249
  def compute_confusion_matrix(
270
- data: NDArray[np.floating],
250
+ data: NDArray[np.float64],
271
251
  label_metadata: NDArray[np.int32],
272
- score_thresholds: NDArray[np.floating],
252
+ score_thresholds: NDArray[np.float64],
273
253
  hardmax: bool,
274
254
  n_examples: int,
275
- ) -> tuple[NDArray[np.floating], NDArray[np.int32]]:
276
-
255
+ ) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
277
256
  """
278
257
  Compute detailed confusion matrix.
279
258
 
@@ -287,20 +266,20 @@ def compute_confusion_matrix(
287
266
 
288
267
  Parameters
289
268
  ----------
290
- data : NDArray[np.floating]
269
+ data : NDArray[np.float64]
291
270
  A sorted array summarizing the IOU calculations of one or more pairs.
292
271
  label_metadata : NDArray[np.int32]
293
272
  An array containing metadata related to labels.
294
- iou_thresholds : NDArray[np.floating]
273
+ iou_thresholds : NDArray[np.float64]
295
274
  A 1-D array containing IoU thresholds.
296
- score_thresholds : NDArray[np.floating]
275
+ score_thresholds : NDArray[np.float64]
297
276
  A 1-D array containing score thresholds.
298
277
  n_examples : int
299
278
  The maximum number of examples to return per count.
300
279
 
301
280
  Returns
302
281
  -------
303
- NDArray[np.floating]
282
+ NDArray[np.float64]
304
283
  Confusion matrix.
305
284
  NDArray[np.int32]
306
285
  Ground truths with missing predictions.
@@ -67,13 +67,8 @@ class Evaluator:
67
67
  self.index_to_uid: dict[int, str] = dict()
68
68
 
69
69
  # label reference
70
- self.label_to_index: dict[tuple[str, str], int] = dict()
71
- self.index_to_label: dict[int, tuple[str, str]] = dict()
72
-
73
- # label key reference
74
- self.index_to_label_key: dict[int, str] = dict()
75
- self.label_key_to_index: dict[str, int] = dict()
76
- self.label_index_to_label_key_index: dict[int, int] = dict()
70
+ self.label_to_index: dict[str, int] = dict()
71
+ self.index_to_label: dict[int, str] = dict()
77
72
 
78
73
  # computation caches
79
74
  self._detailed_pairs = np.array([])
@@ -81,7 +76,7 @@ class Evaluator:
81
76
  self._label_metadata_per_datum = np.array([], dtype=np.int32)
82
77
 
83
78
  @property
84
- def ignored_prediction_labels(self) -> list[tuple[str, str]]:
79
+ def ignored_prediction_labels(self) -> list[str]:
85
80
  """
86
81
  Prediction labels that are not present in the ground truth set.
87
82
  """
@@ -92,7 +87,7 @@ class Evaluator:
92
87
  ]
93
88
 
94
89
  @property
95
- def missing_prediction_labels(self) -> list[tuple[str, str]]:
90
+ def missing_prediction_labels(self) -> list[str]:
96
91
  """
97
92
  Ground truth labels that are not present in the prediction set.
98
93
  """
@@ -119,8 +114,7 @@ class Evaluator:
119
114
  def create_filter(
120
115
  self,
121
116
  datum_uids: list[str] | NDArray[np.int32] | None = None,
122
- labels: list[tuple[str, str]] | NDArray[np.int32] | None = None,
123
- label_keys: list[str] | NDArray[np.int32] | None = None,
117
+ labels: list[str] | NDArray[np.int32] | None = None,
124
118
  ) -> Filter:
125
119
  """
126
120
  Creates a boolean mask that can be passed to an evaluation.
@@ -129,10 +123,8 @@ class Evaluator:
129
123
  ----------
130
124
  datum_uids : list[str] | NDArray[np.int32], optional
131
125
  An optional list of string uids or a numpy array of uid indices.
132
- labels : list[tuple[str, str]] | NDArray[np.int32], optional
126
+ labels : list[str] | NDArray[np.int32], optional
133
127
  An optional list of labels or a numpy array of label indices.
134
- label_keys : list[str] | NDArray[np.int32], optional
135
- An optional list of label keys or a numpy array of label key indices.
136
128
 
137
129
  Returns
138
130
  -------
@@ -179,36 +171,18 @@ class Evaluator:
179
171
  mask[labels] = True
180
172
  mask_labels &= mask
181
173
 
182
- if label_keys is not None:
183
- if isinstance(label_keys, list):
184
- label_keys = np.array(
185
- [self.label_key_to_index[key] for key in label_keys]
186
- )
187
- label_indices = np.where(
188
- np.isclose(self._label_metadata[:, 2], label_keys)
189
- )[0]
190
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
191
- mask[
192
- np.isin(self._detailed_pairs[:, 1].astype(int), label_indices)
193
- ] = True
194
- mask_pairs &= mask
195
-
196
- mask = np.zeros_like(mask_labels, dtype=np.bool_)
197
- mask[label_indices] = True
198
- mask_labels &= mask
199
-
200
174
  mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
201
175
  label_metadata_per_datum = self._label_metadata_per_datum.copy()
202
176
  label_metadata_per_datum[:, ~mask] = 0
203
177
 
204
178
  label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
205
- label_metadata[:, :2] = np.transpose(
179
+ label_metadata = np.transpose(
206
180
  np.sum(
207
181
  label_metadata_per_datum,
208
182
  axis=1,
209
183
  )
210
184
  )
211
- label_metadata[:, 2] = self._label_metadata[:, 2]
185
+
212
186
  n_datums = int(np.sum(label_metadata[:, 0]))
213
187
 
214
188
  return Filter(
@@ -288,10 +262,8 @@ class Evaluator:
288
262
 
289
263
  metrics[MetricType.mROCAUC] = [
290
264
  mROCAUC(
291
- value=mean_rocauc[label_key_idx],
292
- label_key=self.index_to_label_key[label_key_idx],
265
+ value=mean_rocauc,
293
266
  )
294
- for label_key_idx in range(len(self.label_key_to_index))
295
267
  ]
296
268
 
297
269
  for label_idx, label in self.index_to_label.items():
@@ -366,8 +338,7 @@ class Evaluator:
366
338
 
367
339
  def _unpack_confusion_matrix(
368
340
  self,
369
- confusion_matrix: NDArray[np.floating],
370
- label_key_idx: int,
341
+ confusion_matrix: NDArray[np.float64],
371
342
  number_of_labels: int,
372
343
  number_of_examples: int,
373
344
  ) -> dict[
@@ -407,8 +378,8 @@ class Evaluator:
407
378
  )
408
379
 
409
380
  return {
410
- self.index_to_label[gt_label_idx][1]: {
411
- self.index_to_label[pd_label_idx][1]: {
381
+ self.index_to_label[gt_label_idx]: {
382
+ self.index_to_label[pd_label_idx]: {
412
383
  "count": max(
413
384
  int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
414
385
  0,
@@ -430,22 +401,13 @@ class Evaluator:
430
401
  ],
431
402
  }
432
403
  for pd_label_idx in range(number_of_labels)
433
- if (
434
- self.label_index_to_label_key_index[pd_label_idx]
435
- == label_key_idx
436
- )
437
404
  }
438
405
  for gt_label_idx in range(number_of_labels)
439
- if (
440
- self.label_index_to_label_key_index[gt_label_idx]
441
- == label_key_idx
442
- )
443
406
  }
444
407
 
445
408
  def _unpack_missing_predictions(
446
409
  self,
447
410
  missing_predictions: NDArray[np.int32],
448
- label_key_idx: int,
449
411
  number_of_labels: int,
450
412
  number_of_examples: int,
451
413
  ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
@@ -463,7 +425,7 @@ class Evaluator:
463
425
  )
464
426
 
465
427
  return {
466
- self.index_to_label[gt_label_idx][1]: {
428
+ self.index_to_label[gt_label_idx]: {
467
429
  "count": max(
468
430
  int(missing_predictions[gt_label_idx, 0]),
469
431
  0,
@@ -479,15 +441,11 @@ class Evaluator:
479
441
  ],
480
442
  }
481
443
  for gt_label_idx in range(number_of_labels)
482
- if (
483
- self.label_index_to_label_key_index[gt_label_idx]
484
- == label_key_idx
485
- )
486
444
  }
487
445
 
488
446
  def _compute_confusion_matrix(
489
447
  self,
490
- data: NDArray[np.floating],
448
+ data: NDArray[np.float64],
491
449
  label_metadata: NDArray[np.int32],
492
450
  score_thresholds: list[float],
493
451
  hardmax: bool,
@@ -498,7 +456,7 @@ class Evaluator:
498
456
 
499
457
  Parameters
500
458
  ----------
501
- data : NDArray[np.floating]
459
+ data : NDArray[np.float64]
502
460
  A data array containing classification pairs.
503
461
  label_metadata : NDArray[np.int32]
504
462
  An integer array containing label metadata.
@@ -512,7 +470,7 @@ class Evaluator:
512
470
  Returns
513
471
  -------
514
472
  list[ConfusionMatrix]
515
- A list of ConfusionMatrix per label key.
473
+ A list of ConfusionMatrix objects.
516
474
  """
517
475
 
518
476
  if data.size == 0:
@@ -530,22 +488,18 @@ class Evaluator:
530
488
  return [
531
489
  ConfusionMatrix(
532
490
  score_threshold=score_thresholds[score_idx],
533
- label_key=label_key,
534
491
  number_of_examples=number_of_examples,
535
492
  confusion_matrix=self._unpack_confusion_matrix(
536
493
  confusion_matrix=confusion_matrix[score_idx, :, :, :],
537
- label_key_idx=label_key_idx,
538
494
  number_of_labels=n_labels,
539
495
  number_of_examples=number_of_examples,
540
496
  ),
541
497
  missing_predictions=self._unpack_missing_predictions(
542
498
  missing_predictions=missing_predictions[score_idx, :, :],
543
- label_key_idx=label_key_idx,
544
499
  number_of_labels=n_labels,
545
500
  number_of_examples=number_of_examples,
546
501
  ),
547
502
  )
548
- for label_key_idx, label_key in self.index_to_label_key.items()
549
503
  for score_idx in range(n_scores)
550
504
  ]
551
505
 
@@ -580,77 +534,50 @@ class DataLoader:
580
534
  self._evaluator.index_to_uid[index] = uid
581
535
  return self._evaluator.uid_to_index[uid]
582
536
 
583
- def _add_label(self, label: tuple[str, str]) -> tuple[int, int]:
537
+ def _add_label(self, label: str) -> int:
584
538
  """
585
539
  Helper function for adding a label to the cache.
586
540
 
587
541
  Parameters
588
542
  ----------
589
- label : tuple[str, str]
590
- The label as a tuple in format (key, value).
543
+ label : str
544
+ A string representing a label.
591
545
 
592
546
  Returns
593
547
  -------
594
548
  int
595
549
  Label index.
596
- int
597
- Label key index.
598
550
  """
599
551
  label_id = len(self._evaluator.index_to_label)
600
- label_key_id = len(self._evaluator.index_to_label_key)
601
552
  if label not in self._evaluator.label_to_index:
602
553
  self._evaluator.label_to_index[label] = label_id
603
554
  self._evaluator.index_to_label[label_id] = label
604
555
 
605
- # update label key index
606
- if label[0] not in self._evaluator.label_key_to_index:
607
- self._evaluator.label_key_to_index[label[0]] = label_key_id
608
- self._evaluator.index_to_label_key[label_key_id] = label[0]
609
- label_key_id += 1
610
-
611
- self._evaluator.label_index_to_label_key_index[
612
- label_id
613
- ] = self._evaluator.label_key_to_index[label[0]]
614
556
  label_id += 1
615
557
 
616
- return (
617
- self._evaluator.label_to_index[label],
618
- self._evaluator.label_key_to_index[label[0]],
619
- )
558
+ return self._evaluator.label_to_index[label]
620
559
 
621
560
  def _add_data(
622
561
  self,
623
562
  uid_index: int,
624
- keyed_groundtruths: dict[int, int],
625
- keyed_predictions: dict[int, list[tuple[int, float]]],
563
+ groundtruth: int,
564
+ predictions: list[tuple[int, float]],
626
565
  ):
627
- gt_keys = set(keyed_groundtruths.keys())
628
- pd_keys = set(keyed_predictions.keys())
629
- joint_keys = gt_keys.intersection(pd_keys)
630
-
631
- gt_unique_keys = gt_keys - pd_keys
632
- pd_unique_keys = pd_keys - gt_keys
633
- if gt_unique_keys or pd_unique_keys:
634
- raise ValueError(
635
- "Label keys must match between ground truths and predictions."
636
- )
637
566
 
638
567
  pairs = list()
639
- for key in joint_keys:
640
- scores = np.array([score for _, score in keyed_predictions[key]])
641
- max_score_idx = np.argmax(scores)
642
-
643
- glabel = keyed_groundtruths[key]
644
- for idx, (plabel, score) in enumerate(keyed_predictions[key]):
645
- pairs.append(
646
- (
647
- float(uid_index),
648
- float(glabel),
649
- float(plabel),
650
- float(score),
651
- float(max_score_idx == idx),
652
- )
568
+ scores = np.array([score for _, score in predictions])
569
+ max_score_idx = np.argmax(scores)
570
+
571
+ for idx, (plabel, score) in enumerate(predictions):
572
+ pairs.append(
573
+ (
574
+ float(uid_index),
575
+ float(groundtruth),
576
+ float(plabel),
577
+ float(score),
578
+ float(max_score_idx == idx),
653
579
  )
580
+ )
654
581
 
655
582
  if self._evaluator._detailed_pairs.size == 0:
656
583
  self._evaluator._detailed_pairs = np.array(pairs)
@@ -682,27 +609,29 @@ class DataLoader:
682
609
  disable_tqdm = not show_progress
683
610
  for classification in tqdm(classifications, disable=disable_tqdm):
684
611
 
612
+ if len(classification.predictions) == 0:
613
+ raise ValueError(
614
+ "Classifications must contain at least one prediction."
615
+ )
685
616
  # update metadata
686
617
  self._evaluator.n_datums += 1
687
- self._evaluator.n_groundtruths += len(classification.groundtruths)
618
+ self._evaluator.n_groundtruths += 1
688
619
  self._evaluator.n_predictions += len(classification.predictions)
689
620
 
690
621
  # update datum uid index
691
622
  uid_index = self._add_datum(uid=classification.uid)
692
623
 
693
624
  # cache labels and annotations
694
- keyed_groundtruths = defaultdict(int)
695
- keyed_predictions = defaultdict(list)
696
- for glabel in classification.groundtruths:
697
- label_idx, label_key_idx = self._add_label(glabel)
698
- self.groundtruth_count[label_idx][uid_index] += 1
699
- keyed_groundtruths[label_key_idx] = label_idx
700
- for idx, (plabel, pscore) in enumerate(
701
- zip(classification.predictions, classification.scores)
625
+ groundtruth = self._add_label(classification.groundtruth)
626
+ self.groundtruth_count[groundtruth][uid_index] += 1
627
+
628
+ predictions = list()
629
+ for plabel, pscore in zip(
630
+ classification.predictions, classification.scores
702
631
  ):
703
- label_idx, label_key_idx = self._add_label(plabel)
632
+ label_idx = self._add_label(plabel)
704
633
  self.prediction_count[label_idx][uid_index] += 1
705
- keyed_predictions[label_key_idx].append(
634
+ predictions.append(
706
635
  (
707
636
  label_idx,
708
637
  pscore,
@@ -711,8 +640,8 @@ class DataLoader:
711
640
 
712
641
  self._add_data(
713
642
  uid_index=uid_index,
714
- keyed_groundtruths=keyed_groundtruths,
715
- keyed_predictions=keyed_predictions,
643
+ groundtruth=groundtruth,
644
+ predictions=predictions,
716
645
  )
717
646
 
718
647
  def add_data_from_valor_dict(
@@ -745,31 +674,38 @@ class DataLoader:
745
674
  uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
746
675
 
747
676
  # cache labels and annotations
748
- keyed_groundtruths = defaultdict(int)
749
- keyed_predictions = defaultdict(list)
677
+ predictions = list()
678
+ groundtruths = None
750
679
  for gann in groundtruth["annotations"]:
751
680
  for valor_label in gann["labels"]:
752
- glabel = (valor_label["key"], valor_label["value"])
753
- label_idx, label_key_idx = self._add_label(glabel)
681
+ glabel = f'{valor_label["key"]}_{valor_label["value"]}'
682
+ label_idx = self._add_label(glabel)
754
683
  self.groundtruth_count[label_idx][uid_index] += 1
755
- keyed_groundtruths[label_key_idx] = label_idx
684
+ groundtruths = label_idx
756
685
  for pann in prediction["annotations"]:
757
686
  for valor_label in pann["labels"]:
758
- plabel = (valor_label["key"], valor_label["value"])
687
+ plabel = f'{valor_label["key"]}_{valor_label["value"]}'
759
688
  pscore = valor_label["score"]
760
- label_idx, label_key_idx = self._add_label(plabel)
689
+ label_idx = self._add_label(plabel)
761
690
  self.prediction_count[label_idx][uid_index] += 1
762
- keyed_predictions[label_key_idx].append(
691
+ predictions.append(
763
692
  (
764
693
  label_idx,
765
694
  pscore,
766
695
  )
767
696
  )
768
697
 
698
+ # fix type error where groundtruths can possibly be unbound now that it's a float
699
+ # in practice, this error should never be hit since groundtruths can't be empty without throwing a ValueError earlier in the flow
700
+ if groundtruths is None:
701
+ raise ValueError(
702
+ "Expected a value for groundtruths, but got None."
703
+ )
704
+
769
705
  self._add_data(
770
706
  uid_index=uid_index,
771
- keyed_groundtruths=keyed_groundtruths,
772
- keyed_predictions=keyed_predictions,
707
+ groundtruth=groundtruths,
708
+ predictions=predictions,
773
709
  )
774
710
 
775
711
  def finalize(self) -> Evaluator:
@@ -822,7 +758,6 @@ class DataLoader:
822
758
  1, :, label_idx
823
759
  ]
824
760
  ),
825
- self._evaluator.label_index_to_label_key_index[label_idx],
826
761
  ]
827
762
  for label_idx in range(n_labels)
828
763
  ],