valor-lite 0.33.7__py3-none-any.whl → 0.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -67,13 +67,8 @@ class Evaluator:
67
67
  self.index_to_uid: dict[int, str] = dict()
68
68
 
69
69
  # label reference
70
- self.label_to_index: dict[tuple[str, str], int] = dict()
71
- self.index_to_label: dict[int, tuple[str, str]] = dict()
72
-
73
- # label key reference
74
- self.index_to_label_key: dict[int, str] = dict()
75
- self.label_key_to_index: dict[str, int] = dict()
76
- self.label_index_to_label_key_index: dict[int, int] = dict()
70
+ self.label_to_index: dict[str, int] = dict()
71
+ self.index_to_label: dict[int, str] = dict()
77
72
 
78
73
  # computation caches
79
74
  self._detailed_pairs = np.array([])
@@ -81,7 +76,7 @@ class Evaluator:
81
76
  self._label_metadata_per_datum = np.array([], dtype=np.int32)
82
77
 
83
78
  @property
84
- def ignored_prediction_labels(self) -> list[tuple[str, str]]:
79
+ def ignored_prediction_labels(self) -> list[str]:
85
80
  """
86
81
  Prediction labels that are not present in the ground truth set.
87
82
  """
@@ -92,7 +87,7 @@ class Evaluator:
92
87
  ]
93
88
 
94
89
  @property
95
- def missing_prediction_labels(self) -> list[tuple[str, str]]:
90
+ def missing_prediction_labels(self) -> list[str]:
96
91
  """
97
92
  Ground truth labels that are not present in the prediction set.
98
93
  """
@@ -119,8 +114,7 @@ class Evaluator:
119
114
  def create_filter(
120
115
  self,
121
116
  datum_uids: list[str] | NDArray[np.int32] | None = None,
122
- labels: list[tuple[str, str]] | NDArray[np.int32] | None = None,
123
- label_keys: list[str] | NDArray[np.int32] | None = None,
117
+ labels: list[str] | NDArray[np.int32] | None = None,
124
118
  ) -> Filter:
125
119
  """
126
120
  Creates a boolean mask that can be passed to an evaluation.
@@ -129,10 +123,8 @@ class Evaluator:
129
123
  ----------
130
124
  datum_uids : list[str] | NDArray[np.int32], optional
131
125
  An optional list of string uids or a numpy array of uid indices.
132
- labels : list[tuple[str, str]] | NDArray[np.int32], optional
126
+ labels : list[str] | NDArray[np.int32], optional
133
127
  An optional list of labels or a numpy array of label indices.
134
- label_keys : list[str] | NDArray[np.int32], optional
135
- An optional list of label keys or a numpy array of label key indices.
136
128
 
137
129
  Returns
138
130
  -------
@@ -179,36 +171,18 @@ class Evaluator:
179
171
  mask[labels] = True
180
172
  mask_labels &= mask
181
173
 
182
- if label_keys is not None:
183
- if isinstance(label_keys, list):
184
- label_keys = np.array(
185
- [self.label_key_to_index[key] for key in label_keys]
186
- )
187
- label_indices = np.where(
188
- np.isclose(self._label_metadata[:, 2], label_keys)
189
- )[0]
190
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
191
- mask[
192
- np.isin(self._detailed_pairs[:, 1].astype(int), label_indices)
193
- ] = True
194
- mask_pairs &= mask
195
-
196
- mask = np.zeros_like(mask_labels, dtype=np.bool_)
197
- mask[label_indices] = True
198
- mask_labels &= mask
199
-
200
174
  mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
201
175
  label_metadata_per_datum = self._label_metadata_per_datum.copy()
202
176
  label_metadata_per_datum[:, ~mask] = 0
203
177
 
204
178
  label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
205
- label_metadata[:, :2] = np.transpose(
179
+ label_metadata = np.transpose(
206
180
  np.sum(
207
181
  label_metadata_per_datum,
208
182
  axis=1,
209
183
  )
210
184
  )
211
- label_metadata[:, 2] = self._label_metadata[:, 2]
185
+
212
186
  n_datums = int(np.sum(label_metadata[:, 0]))
213
187
 
214
188
  return Filter(
@@ -217,12 +191,117 @@ class Evaluator:
217
191
  n_datums=n_datums,
218
192
  )
219
193
 
220
- def evaluate(
194
+ def _unpack_confusion_matrix(
195
+ self,
196
+ confusion_matrix: NDArray[np.float64],
197
+ number_of_labels: int,
198
+ number_of_examples: int,
199
+ ) -> dict[
200
+ str,
201
+ dict[
202
+ str,
203
+ dict[
204
+ str,
205
+ int
206
+ | list[
207
+ dict[
208
+ str,
209
+ str | float,
210
+ ]
211
+ ],
212
+ ],
213
+ ],
214
+ ]:
215
+ """
216
+ Unpacks a numpy array of confusion matrix counts and examples.
217
+ """
218
+
219
+ datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
220
+ confusion_matrix[
221
+ gt_label_idx,
222
+ pd_label_idx,
223
+ example_idx * 2 + 1,
224
+ ]
225
+ )
226
+
227
+ score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
228
+ confusion_matrix[
229
+ gt_label_idx,
230
+ pd_label_idx,
231
+ example_idx * 2 + 2,
232
+ ]
233
+ )
234
+
235
+ return {
236
+ self.index_to_label[gt_label_idx]: {
237
+ self.index_to_label[pd_label_idx]: {
238
+ "count": max(
239
+ int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
240
+ 0,
241
+ ),
242
+ "examples": [
243
+ {
244
+ "datum": self.index_to_uid[
245
+ datum_idx(
246
+ gt_label_idx, pd_label_idx, example_idx
247
+ )
248
+ ],
249
+ "score": score_idx(
250
+ gt_label_idx, pd_label_idx, example_idx
251
+ ),
252
+ }
253
+ for example_idx in range(number_of_examples)
254
+ if datum_idx(gt_label_idx, pd_label_idx, example_idx)
255
+ >= 0
256
+ ],
257
+ }
258
+ for pd_label_idx in range(number_of_labels)
259
+ }
260
+ for gt_label_idx in range(number_of_labels)
261
+ }
262
+
263
+ def _unpack_missing_predictions(
264
+ self,
265
+ missing_predictions: NDArray[np.int32],
266
+ number_of_labels: int,
267
+ number_of_examples: int,
268
+ ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
269
+ """
270
+ Unpacks a numpy array of missing prediction counts and examples.
271
+ """
272
+
273
+ datum_idx = (
274
+ lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
275
+ missing_predictions[
276
+ gt_label_idx,
277
+ example_idx + 1,
278
+ ]
279
+ )
280
+ )
281
+
282
+ return {
283
+ self.index_to_label[gt_label_idx]: {
284
+ "count": max(
285
+ int(missing_predictions[gt_label_idx, 0]),
286
+ 0,
287
+ ),
288
+ "examples": [
289
+ {
290
+ "datum": self.index_to_uid[
291
+ datum_idx(gt_label_idx, example_idx)
292
+ ]
293
+ }
294
+ for example_idx in range(number_of_examples)
295
+ if datum_idx(gt_label_idx, example_idx) >= 0
296
+ ],
297
+ }
298
+ for gt_label_idx in range(number_of_labels)
299
+ }
300
+
301
+ def compute_precision_recall(
221
302
  self,
222
- metrics_to_return: list[MetricType] = MetricType.base(),
223
303
  score_thresholds: list[float] = [0.0],
224
304
  hardmax: bool = True,
225
- number_of_examples: int = 0,
226
305
  filter_: Filter | None = None,
227
306
  as_dict: bool = False,
228
307
  ) -> dict[MetricType, list]:
@@ -231,14 +310,10 @@ class Evaluator:
231
310
 
232
311
  Parameters
233
312
  ----------
234
- metrics_to_return : list[MetricType]
235
- A list of metrics to return in the results.
236
313
  score_thresholds : list[float]
237
314
  A list of score thresholds to compute metrics over.
238
315
  hardmax : bool
239
316
  Toggles whether a hardmax is applied to predictions.
240
- number_of_examples : int, default=0
241
- Maximum number of annotation examples to return in ConfusionMatrix.
242
317
  filter_ : Filter, optional
243
318
  An optional filter object.
244
319
  as_dict : bool, default=False
@@ -279,7 +354,7 @@ class Evaluator:
279
354
 
280
355
  metrics[MetricType.ROCAUC] = [
281
356
  ROCAUC(
282
- value=rocauc[label_idx],
357
+ value=float(rocauc[label_idx]),
283
358
  label=self.index_to_label[label_idx],
284
359
  )
285
360
  for label_idx in range(label_metadata.shape[0])
@@ -288,10 +363,8 @@ class Evaluator:
288
363
 
289
364
  metrics[MetricType.mROCAUC] = [
290
365
  mROCAUC(
291
- value=mean_rocauc[label_key_idx],
292
- label_key=self.index_to_label_key[label_key_idx],
366
+ value=float(mean_rocauc),
293
367
  )
294
- for label_key_idx in range(len(self.label_key_to_index))
295
368
  ]
296
369
 
297
370
  for label_idx, label in self.index_to_label.items():
@@ -304,10 +377,10 @@ class Evaluator:
304
377
  row = counts[:, label_idx]
305
378
  metrics[MetricType.Counts].append(
306
379
  Counts(
307
- tp=row[:, 0].tolist(),
308
- fp=row[:, 1].tolist(),
309
- fn=row[:, 2].tolist(),
310
- tn=row[:, 3].tolist(),
380
+ tp=row[:, 0].astype(int).tolist(),
381
+ fp=row[:, 1].astype(int).tolist(),
382
+ fn=row[:, 2].astype(int).tolist(),
383
+ tn=row[:, 3].astype(int).tolist(),
311
384
  **kwargs,
312
385
  )
313
386
  )
@@ -318,44 +391,29 @@ class Evaluator:
318
391
 
319
392
  metrics[MetricType.Precision].append(
320
393
  Precision(
321
- value=precision[:, label_idx].tolist(),
394
+ value=precision[:, label_idx].astype(float).tolist(),
322
395
  **kwargs,
323
396
  )
324
397
  )
325
398
  metrics[MetricType.Recall].append(
326
399
  Recall(
327
- value=recall[:, label_idx].tolist(),
400
+ value=recall[:, label_idx].astype(float).tolist(),
328
401
  **kwargs,
329
402
  )
330
403
  )
331
404
  metrics[MetricType.Accuracy].append(
332
405
  Accuracy(
333
- value=accuracy[:, label_idx].tolist(),
406
+ value=accuracy[:, label_idx].astype(float).tolist(),
334
407
  **kwargs,
335
408
  )
336
409
  )
337
410
  metrics[MetricType.F1].append(
338
411
  F1(
339
- value=f1_score[:, label_idx].tolist(),
412
+ value=f1_score[:, label_idx].astype(float).tolist(),
340
413
  **kwargs,
341
414
  )
342
415
  )
343
416
 
344
- if MetricType.ConfusionMatrix in metrics_to_return:
345
- metrics[
346
- MetricType.ConfusionMatrix
347
- ] = self._compute_confusion_matrix(
348
- data=data,
349
- label_metadata=label_metadata,
350
- score_thresholds=score_thresholds,
351
- hardmax=hardmax,
352
- number_of_examples=number_of_examples,
353
- )
354
-
355
- for metric in set(metrics.keys()):
356
- if metric not in metrics_to_return:
357
- del metrics[metric]
358
-
359
417
  if as_dict:
360
418
  return {
361
419
  mtype: [metric.to_dict() for metric in mvalues]
@@ -364,157 +422,43 @@ class Evaluator:
364
422
 
365
423
  return metrics
366
424
 
367
- def _unpack_confusion_matrix(
368
- self,
369
- confusion_matrix: NDArray[np.floating],
370
- label_key_idx: int,
371
- number_of_labels: int,
372
- number_of_examples: int,
373
- ) -> dict[
374
- str,
375
- dict[
376
- str,
377
- dict[
378
- str,
379
- int
380
- | list[
381
- dict[
382
- str,
383
- str | float,
384
- ]
385
- ],
386
- ],
387
- ],
388
- ]:
389
- """
390
- Unpacks a numpy array of confusion matrix counts and examples.
391
- """
392
-
393
- datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
394
- confusion_matrix[
395
- gt_label_idx,
396
- pd_label_idx,
397
- example_idx * 2 + 1,
398
- ]
399
- )
400
-
401
- score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
402
- confusion_matrix[
403
- gt_label_idx,
404
- pd_label_idx,
405
- example_idx * 2 + 2,
406
- ]
407
- )
408
-
409
- return {
410
- self.index_to_label[gt_label_idx][1]: {
411
- self.index_to_label[pd_label_idx][1]: {
412
- "count": max(
413
- int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
414
- 0,
415
- ),
416
- "examples": [
417
- {
418
- "datum": self.index_to_uid[
419
- datum_idx(
420
- gt_label_idx, pd_label_idx, example_idx
421
- )
422
- ],
423
- "score": score_idx(
424
- gt_label_idx, pd_label_idx, example_idx
425
- ),
426
- }
427
- for example_idx in range(number_of_examples)
428
- if datum_idx(gt_label_idx, pd_label_idx, example_idx)
429
- >= 0
430
- ],
431
- }
432
- for pd_label_idx in range(number_of_labels)
433
- if (
434
- self.label_index_to_label_key_index[pd_label_idx]
435
- == label_key_idx
436
- )
437
- }
438
- for gt_label_idx in range(number_of_labels)
439
- if (
440
- self.label_index_to_label_key_index[gt_label_idx]
441
- == label_key_idx
442
- )
443
- }
444
-
445
- def _unpack_missing_predictions(
425
+ def compute_confusion_matrix(
446
426
  self,
447
- missing_predictions: NDArray[np.int32],
448
- label_key_idx: int,
449
- number_of_labels: int,
450
- number_of_examples: int,
451
- ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
452
- """
453
- Unpacks a numpy array of missing prediction counts and examples.
454
- """
455
-
456
- datum_idx = (
457
- lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
458
- missing_predictions[
459
- gt_label_idx,
460
- example_idx + 1,
461
- ]
462
- )
463
- )
464
-
465
- return {
466
- self.index_to_label[gt_label_idx][1]: {
467
- "count": max(
468
- int(missing_predictions[gt_label_idx, 0]),
469
- 0,
470
- ),
471
- "examples": [
472
- {
473
- "datum": self.index_to_uid[
474
- datum_idx(gt_label_idx, example_idx)
475
- ]
476
- }
477
- for example_idx in range(number_of_examples)
478
- if datum_idx(gt_label_idx, example_idx) >= 0
479
- ],
480
- }
481
- for gt_label_idx in range(number_of_labels)
482
- if (
483
- self.label_index_to_label_key_index[gt_label_idx]
484
- == label_key_idx
485
- )
486
- }
487
-
488
- def _compute_confusion_matrix(
489
- self,
490
- data: NDArray[np.floating],
491
- label_metadata: NDArray[np.int32],
492
- score_thresholds: list[float],
493
- hardmax: bool,
494
- number_of_examples: int,
495
- ) -> list[ConfusionMatrix]:
427
+ score_thresholds: list[float] = [0.0],
428
+ hardmax: bool = True,
429
+ number_of_examples: int = 0,
430
+ filter_: Filter | None = None,
431
+ as_dict: bool = False,
432
+ ) -> list:
496
433
  """
497
434
  Computes a detailed confusion matrix..
498
435
 
499
436
  Parameters
500
437
  ----------
501
- data : NDArray[np.floating]
502
- A data array containing classification pairs.
503
- label_metadata : NDArray[np.int32]
504
- An integer array containing label metadata.
505
438
  score_thresholds : list[float]
506
439
  A list of score thresholds to compute metrics over.
507
440
  hardmax : bool
508
441
  Toggles whether a hardmax is applied to predictions.
509
442
  number_of_examples : int, default=0
510
443
  The number of examples to return per count.
444
+ filter_ : Filter, optional
445
+ An optional filter object.
446
+ as_dict : bool, default=False
447
+ An option to return metrics as dictionaries.
511
448
 
512
449
  Returns
513
450
  -------
514
- list[ConfusionMatrix]
515
- A list of ConfusionMatrix per label key.
451
+ list[ConfusionMatrix] | list[dict]
452
+ A list of confusion matrices.
516
453
  """
517
454
 
455
+ # apply filters
456
+ data = self._detailed_pairs
457
+ label_metadata = self._label_metadata
458
+ if filter_ is not None:
459
+ data = data[filter_.indices]
460
+ label_metadata = filter_.label_metadata
461
+
518
462
  if data.size == 0:
519
463
  return list()
520
464
 
@@ -527,28 +471,74 @@ class Evaluator:
527
471
  )
528
472
 
529
473
  n_scores, n_labels, _, _ = confusion_matrix.shape
530
- return [
474
+ results = [
531
475
  ConfusionMatrix(
532
476
  score_threshold=score_thresholds[score_idx],
533
- label_key=label_key,
534
477
  number_of_examples=number_of_examples,
535
478
  confusion_matrix=self._unpack_confusion_matrix(
536
479
  confusion_matrix=confusion_matrix[score_idx, :, :, :],
537
- label_key_idx=label_key_idx,
538
480
  number_of_labels=n_labels,
539
481
  number_of_examples=number_of_examples,
540
482
  ),
541
483
  missing_predictions=self._unpack_missing_predictions(
542
484
  missing_predictions=missing_predictions[score_idx, :, :],
543
- label_key_idx=label_key_idx,
544
485
  number_of_labels=n_labels,
545
486
  number_of_examples=number_of_examples,
546
487
  ),
547
488
  )
548
- for label_key_idx, label_key in self.index_to_label_key.items()
549
489
  for score_idx in range(n_scores)
550
490
  ]
551
491
 
492
+ if as_dict:
493
+ return [m.to_dict() for m in results]
494
+
495
+ return results
496
+
497
+ def evaluate(
498
+ self,
499
+ score_thresholds: list[float] = [0.0],
500
+ hardmax: bool = True,
501
+ number_of_examples: int = 0,
502
+ filter_: Filter | None = None,
503
+ as_dict: bool = False,
504
+ ) -> dict[MetricType, list]:
505
+ """
506
+ Computes a detailed confusion matrix..
507
+
508
+ Parameters
509
+ ----------
510
+ score_thresholds : list[float]
511
+ A list of score thresholds to compute metrics over.
512
+ hardmax : bool
513
+ Toggles whether a hardmax is applied to predictions.
514
+ number_of_examples : int, default=0
515
+ The number of examples to return per count.
516
+ filter_ : Filter, optional
517
+ An optional filter object.
518
+ as_dict : bool, default=False
519
+ An option to return metrics as dictionaries.
520
+
521
+ Returns
522
+ -------
523
+ list[ConfusionMatrix] | list[dict]
524
+ A list of confusion matrices.
525
+ """
526
+
527
+ results = self.compute_precision_recall(
528
+ score_thresholds=score_thresholds,
529
+ hardmax=hardmax,
530
+ filter_=filter_,
531
+ as_dict=as_dict,
532
+ )
533
+ results[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
534
+ score_thresholds=score_thresholds,
535
+ hardmax=hardmax,
536
+ number_of_examples=number_of_examples,
537
+ filter_=filter_,
538
+ as_dict=as_dict,
539
+ )
540
+ return results
541
+
552
542
 
553
543
  class DataLoader:
554
544
  """
@@ -580,77 +570,50 @@ class DataLoader:
580
570
  self._evaluator.index_to_uid[index] = uid
581
571
  return self._evaluator.uid_to_index[uid]
582
572
 
583
- def _add_label(self, label: tuple[str, str]) -> tuple[int, int]:
573
+ def _add_label(self, label: str) -> int:
584
574
  """
585
575
  Helper function for adding a label to the cache.
586
576
 
587
577
  Parameters
588
578
  ----------
589
- label : tuple[str, str]
590
- The label as a tuple in format (key, value).
579
+ label : str
580
+ A string representing a label.
591
581
 
592
582
  Returns
593
583
  -------
594
584
  int
595
585
  Label index.
596
- int
597
- Label key index.
598
586
  """
599
587
  label_id = len(self._evaluator.index_to_label)
600
- label_key_id = len(self._evaluator.index_to_label_key)
601
588
  if label not in self._evaluator.label_to_index:
602
589
  self._evaluator.label_to_index[label] = label_id
603
590
  self._evaluator.index_to_label[label_id] = label
604
591
 
605
- # update label key index
606
- if label[0] not in self._evaluator.label_key_to_index:
607
- self._evaluator.label_key_to_index[label[0]] = label_key_id
608
- self._evaluator.index_to_label_key[label_key_id] = label[0]
609
- label_key_id += 1
610
-
611
- self._evaluator.label_index_to_label_key_index[
612
- label_id
613
- ] = self._evaluator.label_key_to_index[label[0]]
614
592
  label_id += 1
615
593
 
616
- return (
617
- self._evaluator.label_to_index[label],
618
- self._evaluator.label_key_to_index[label[0]],
619
- )
594
+ return self._evaluator.label_to_index[label]
620
595
 
621
596
  def _add_data(
622
597
  self,
623
598
  uid_index: int,
624
- keyed_groundtruths: dict[int, int],
625
- keyed_predictions: dict[int, list[tuple[int, float]]],
599
+ groundtruth: int,
600
+ predictions: list[tuple[int, float]],
626
601
  ):
627
- gt_keys = set(keyed_groundtruths.keys())
628
- pd_keys = set(keyed_predictions.keys())
629
- joint_keys = gt_keys.intersection(pd_keys)
630
-
631
- gt_unique_keys = gt_keys - pd_keys
632
- pd_unique_keys = pd_keys - gt_keys
633
- if gt_unique_keys or pd_unique_keys:
634
- raise ValueError(
635
- "Label keys must match between ground truths and predictions."
636
- )
637
602
 
638
603
  pairs = list()
639
- for key in joint_keys:
640
- scores = np.array([score for _, score in keyed_predictions[key]])
641
- max_score_idx = np.argmax(scores)
642
-
643
- glabel = keyed_groundtruths[key]
644
- for idx, (plabel, score) in enumerate(keyed_predictions[key]):
645
- pairs.append(
646
- (
647
- float(uid_index),
648
- float(glabel),
649
- float(plabel),
650
- float(score),
651
- float(max_score_idx == idx),
652
- )
604
+ scores = np.array([score for _, score in predictions])
605
+ max_score_idx = np.argmax(scores)
606
+
607
+ for idx, (plabel, score) in enumerate(predictions):
608
+ pairs.append(
609
+ (
610
+ float(uid_index),
611
+ float(groundtruth),
612
+ float(plabel),
613
+ float(score),
614
+ float(max_score_idx == idx),
653
615
  )
616
+ )
654
617
 
655
618
  if self._evaluator._detailed_pairs.size == 0:
656
619
  self._evaluator._detailed_pairs = np.array(pairs)
@@ -682,27 +645,29 @@ class DataLoader:
682
645
  disable_tqdm = not show_progress
683
646
  for classification in tqdm(classifications, disable=disable_tqdm):
684
647
 
648
+ if len(classification.predictions) == 0:
649
+ raise ValueError(
650
+ "Classifications must contain at least one prediction."
651
+ )
685
652
  # update metadata
686
653
  self._evaluator.n_datums += 1
687
- self._evaluator.n_groundtruths += len(classification.groundtruths)
654
+ self._evaluator.n_groundtruths += 1
688
655
  self._evaluator.n_predictions += len(classification.predictions)
689
656
 
690
657
  # update datum uid index
691
658
  uid_index = self._add_datum(uid=classification.uid)
692
659
 
693
660
  # cache labels and annotations
694
- keyed_groundtruths = defaultdict(int)
695
- keyed_predictions = defaultdict(list)
696
- for glabel in classification.groundtruths:
697
- label_idx, label_key_idx = self._add_label(glabel)
698
- self.groundtruth_count[label_idx][uid_index] += 1
699
- keyed_groundtruths[label_key_idx] = label_idx
700
- for idx, (plabel, pscore) in enumerate(
701
- zip(classification.predictions, classification.scores)
661
+ groundtruth = self._add_label(classification.groundtruth)
662
+ self.groundtruth_count[groundtruth][uid_index] += 1
663
+
664
+ predictions = list()
665
+ for plabel, pscore in zip(
666
+ classification.predictions, classification.scores
702
667
  ):
703
- label_idx, label_key_idx = self._add_label(plabel)
668
+ label_idx = self._add_label(plabel)
704
669
  self.prediction_count[label_idx][uid_index] += 1
705
- keyed_predictions[label_key_idx].append(
670
+ predictions.append(
706
671
  (
707
672
  label_idx,
708
673
  pscore,
@@ -711,65 +676,8 @@ class DataLoader:
711
676
 
712
677
  self._add_data(
713
678
  uid_index=uid_index,
714
- keyed_groundtruths=keyed_groundtruths,
715
- keyed_predictions=keyed_predictions,
716
- )
717
-
718
- def add_data_from_valor_dict(
719
- self,
720
- classifications: list[tuple[dict, dict]],
721
- show_progress: bool = False,
722
- ):
723
- """
724
- Adds Valor-format classifications to the cache.
725
-
726
- Parameters
727
- ----------
728
- classifications : list[tuple[dict, dict]]
729
- A list of groundtruth, prediction pairs in Valor-format dictionaries.
730
- show_progress : bool, default=False
731
- Toggle for tqdm progress bar.
732
- """
733
-
734
- disable_tqdm = not show_progress
735
- for groundtruth, prediction in tqdm(
736
- classifications, disable=disable_tqdm
737
- ):
738
-
739
- # update metadata
740
- self._evaluator.n_datums += 1
741
- self._evaluator.n_groundtruths += len(groundtruth["annotations"])
742
- self._evaluator.n_predictions += len(prediction["annotations"])
743
-
744
- # update datum uid index
745
- uid_index = self._add_datum(uid=groundtruth["datum"]["uid"])
746
-
747
- # cache labels and annotations
748
- keyed_groundtruths = defaultdict(int)
749
- keyed_predictions = defaultdict(list)
750
- for gann in groundtruth["annotations"]:
751
- for valor_label in gann["labels"]:
752
- glabel = (valor_label["key"], valor_label["value"])
753
- label_idx, label_key_idx = self._add_label(glabel)
754
- self.groundtruth_count[label_idx][uid_index] += 1
755
- keyed_groundtruths[label_key_idx] = label_idx
756
- for pann in prediction["annotations"]:
757
- for valor_label in pann["labels"]:
758
- plabel = (valor_label["key"], valor_label["value"])
759
- pscore = valor_label["score"]
760
- label_idx, label_key_idx = self._add_label(plabel)
761
- self.prediction_count[label_idx][uid_index] += 1
762
- keyed_predictions[label_key_idx].append(
763
- (
764
- label_idx,
765
- pscore,
766
- )
767
- )
768
-
769
- self._add_data(
770
- uid_index=uid_index,
771
- keyed_groundtruths=keyed_groundtruths,
772
- keyed_predictions=keyed_predictions,
679
+ groundtruth=groundtruth,
680
+ predictions=predictions,
773
681
  )
774
682
 
775
683
  def finalize(self) -> Evaluator:
@@ -822,7 +730,6 @@ class DataLoader:
822
730
  1, :, label_idx
823
731
  ]
824
732
  ),
825
- self._evaluator.label_index_to_label_key_index[label_idx],
826
733
  ]
827
734
  for label_idx in range(n_labels)
828
735
  ],