valor-lite 0.35.0__py3-none-any.whl → 0.36.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

@@ -1,5 +1,5 @@
1
- from collections import defaultdict
2
- from dataclasses import dataclass
1
+ import warnings
2
+ from dataclasses import asdict, dataclass
3
3
 
4
4
  import numpy as np
5
5
  from numpy.typing import NDArray
@@ -8,7 +8,9 @@ from tqdm import tqdm
8
8
  from valor_lite.classification.annotation import Classification
9
9
  from valor_lite.classification.computation import (
10
10
  compute_confusion_matrix,
11
+ compute_label_metadata,
11
12
  compute_precision_recall_rocauc,
13
+ filter_cache,
12
14
  )
13
15
  from valor_lite.classification.metric import Metric, MetricType
14
16
  from valor_lite.classification.utilities import (
@@ -37,11 +39,51 @@ filtered_metrics = evaluator.evaluate(filter_mask=filter_mask)
37
39
  """
38
40
 
39
41
 
42
+ @dataclass
43
+ class Metadata:
44
+ number_of_datums: int = 0
45
+ number_of_ground_truths: int = 0
46
+ number_of_predictions: int = 0
47
+ number_of_labels: int = 0
48
+
49
+ @classmethod
50
+ def create(
51
+ cls,
52
+ detailed_pairs: NDArray[np.float64],
53
+ number_of_datums: int,
54
+ number_of_labels: int,
55
+ ):
56
+ # count number of unique ground truths
57
+ mask_valid_gts = detailed_pairs[:, 1] >= 0
58
+ unique_ids = np.unique(
59
+ detailed_pairs[np.ix_(mask_valid_gts, (0, 1))], # type: ignore - np.ix_ typing
60
+ axis=0,
61
+ )
62
+ number_of_ground_truths = int(unique_ids.shape[0])
63
+
64
+ # count number of unqiue predictions
65
+ mask_valid_pds = detailed_pairs[:, 2] >= 0
66
+ unique_ids = np.unique(
67
+ detailed_pairs[np.ix_(mask_valid_pds, (0, 2))], axis=0 # type: ignore - np.ix_ typing
68
+ )
69
+ number_of_predictions = int(unique_ids.shape[0])
70
+
71
+ return cls(
72
+ number_of_datums=number_of_datums,
73
+ number_of_ground_truths=number_of_ground_truths,
74
+ number_of_predictions=number_of_predictions,
75
+ number_of_labels=number_of_labels,
76
+ )
77
+
78
+ def to_dict(self) -> dict[str, int | bool]:
79
+ return asdict(self)
80
+
81
+
40
82
  @dataclass
41
83
  class Filter:
42
- indices: NDArray[np.intp]
43
- label_metadata: NDArray[np.int32]
44
- n_datums: int
84
+ datum_mask: NDArray[np.bool_]
85
+ valid_label_indices: NDArray[np.int32] | None
86
+ metadata: Metadata
45
87
 
46
88
 
47
89
  class Evaluator:
@@ -50,25 +92,21 @@ class Evaluator:
50
92
  """
51
93
 
52
94
  def __init__(self):
95
+ # external references
96
+ self.datum_id_to_index: dict[str, int] = {}
97
+ self.label_to_index: dict[str, int] = {}
53
98
 
54
- # metadata
55
- self.n_datums = 0
56
- self.n_groundtruths = 0
57
- self.n_predictions = 0
58
- self.n_labels = 0
59
-
60
- # datum reference
61
- self.uid_to_index: dict[str, int] = dict()
62
- self.index_to_uid: dict[int, str] = dict()
99
+ self.index_to_datum_id: list[str] = []
100
+ self.index_to_label: list[str] = []
63
101
 
64
- # label reference
65
- self.label_to_index: dict[str, int] = dict()
66
- self.index_to_label: dict[int, str] = dict()
67
-
68
- # computation caches
102
+ # internal caches
69
103
  self._detailed_pairs = np.array([])
70
104
  self._label_metadata = np.array([], dtype=np.int32)
71
- self._label_metadata_per_datum = np.array([], dtype=np.int32)
105
+ self._metadata = Metadata()
106
+
107
+ @property
108
+ def metadata(self) -> Metadata:
109
+ return self._metadata
72
110
 
73
111
  @property
74
112
  def ignored_prediction_labels(self) -> list[str]:
@@ -92,97 +130,120 @@ class Evaluator:
92
130
  self.index_to_label[label_id] for label_id in (glabels - plabels)
93
131
  ]
94
132
 
95
- @property
96
- def metadata(self) -> dict:
97
- """
98
- Evaluation metadata.
99
- """
100
- return {
101
- "n_datums": self.n_datums,
102
- "n_groundtruths": self.n_groundtruths,
103
- "n_predictions": self.n_predictions,
104
- "n_labels": self.n_labels,
105
- "ignored_prediction_labels": self.ignored_prediction_labels,
106
- "missing_prediction_labels": self.missing_prediction_labels,
107
- }
108
-
109
133
  def create_filter(
110
134
  self,
111
- datum_uids: list[str] | NDArray[np.int32] | None = None,
112
- labels: list[str] | NDArray[np.int32] | None = None,
135
+ datum_ids: list[str] | None = None,
136
+ labels: list[str] | None = None,
113
137
  ) -> Filter:
114
138
  """
115
- Creates a boolean mask that can be passed to an evaluation.
139
+ Creates a filter object.
116
140
 
117
141
  Parameters
118
142
  ----------
119
- datum_uids : list[str] | NDArray[np.int32], optional
120
- An optional list of string uids or a numpy array of uid indices.
121
- labels : list[str] | NDArray[np.int32], optional
122
- An optional list of labels or a numpy array of label indices.
143
+ datum_uids : list[str], optional
144
+ An optional list of string uids representing datums.
145
+ labels : list[str], optional
146
+ An optional list of labels.
123
147
 
124
148
  Returns
125
149
  -------
126
150
  Filter
127
- A filter object that can be passed to the `evaluate` method.
151
+ The filter object representing the input parameters.
128
152
  """
129
- n_rows = self._detailed_pairs.shape[0]
130
-
131
- n_datums = self._label_metadata_per_datum.shape[1]
132
- n_labels = self._label_metadata_per_datum.shape[2]
133
-
134
- mask_pairs = np.ones((n_rows, 1), dtype=np.bool_)
135
- mask_datums = np.ones(n_datums, dtype=np.bool_)
136
- mask_labels = np.ones(n_labels, dtype=np.bool_)
137
-
138
- if datum_uids is not None:
139
- if isinstance(datum_uids, list):
140
- datum_uids = np.array(
141
- [self.uid_to_index[uid] for uid in datum_uids],
142
- dtype=np.int32,
153
+ # create datum mask
154
+ n_pairs = self._detailed_pairs.shape[0]
155
+ datum_mask = np.ones(n_pairs, dtype=np.bool_)
156
+ if datum_ids is not None:
157
+ if not datum_ids:
158
+ warnings.warn("no valid filtered pairs")
159
+ return Filter(
160
+ datum_mask=np.zeros_like(datum_mask),
161
+ valid_label_indices=None,
162
+ metadata=Metadata(),
143
163
  )
144
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
145
- mask[
146
- np.isin(self._detailed_pairs[:, 0].astype(int), datum_uids)
147
- ] = True
148
- mask_pairs &= mask
149
-
150
- mask = np.zeros_like(mask_datums, dtype=np.bool_)
151
- mask[datum_uids] = True
152
- mask_datums &= mask
164
+ valid_datum_indices = np.array(
165
+ [self.datum_id_to_index[uid] for uid in datum_ids],
166
+ dtype=np.int32,
167
+ )
168
+ datum_mask = np.isin(
169
+ self._detailed_pairs[:, 0], valid_datum_indices
170
+ )
153
171
 
172
+ # collect valid label indices
173
+ valid_label_indices = None
154
174
  if labels is not None:
155
- if isinstance(labels, list):
156
- labels = np.array(
157
- [self.label_to_index[label] for label in labels]
175
+ if not labels:
176
+ warnings.warn("no valid filtered pairs")
177
+ return Filter(
178
+ datum_mask=datum_mask,
179
+ valid_label_indices=np.array([], dtype=np.int32),
180
+ metadata=Metadata(),
158
181
  )
159
- mask = np.zeros_like(mask_pairs, dtype=np.bool_)
160
- mask[
161
- np.isin(self._detailed_pairs[:, 1].astype(int), labels)
162
- ] = True
163
- mask_pairs &= mask
164
-
165
- mask = np.zeros_like(mask_labels, dtype=np.bool_)
166
- mask[labels] = True
167
- mask_labels &= mask
168
-
169
- mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
170
- label_metadata_per_datum = self._label_metadata_per_datum.copy()
171
- label_metadata_per_datum[:, ~mask] = 0
172
-
173
- label_metadata: NDArray[np.int32] = np.transpose(
174
- np.sum(
175
- label_metadata_per_datum,
176
- axis=1,
182
+ valid_label_indices = np.array(
183
+ [self.label_to_index[label] for label in labels] + [-1]
177
184
  )
185
+
186
+ filtered_detailed_pairs, _ = filter_cache(
187
+ detailed_pairs=self._detailed_pairs,
188
+ datum_mask=datum_mask,
189
+ valid_label_indices=valid_label_indices,
190
+ n_labels=self.metadata.number_of_labels,
178
191
  )
179
192
 
180
- n_datums = int(np.sum(label_metadata[:, 0]))
193
+ number_of_datums = (
194
+ len(datum_ids)
195
+ if datum_ids is not None
196
+ else self.metadata.number_of_datums
197
+ )
181
198
 
182
199
  return Filter(
183
- indices=np.where(mask_pairs)[0],
184
- label_metadata=label_metadata,
185
- n_datums=n_datums,
200
+ datum_mask=datum_mask,
201
+ valid_label_indices=valid_label_indices,
202
+ metadata=Metadata.create(
203
+ detailed_pairs=filtered_detailed_pairs,
204
+ number_of_datums=number_of_datums,
205
+ number_of_labels=self.metadata.number_of_labels,
206
+ ),
207
+ )
208
+
209
+ def filter(
210
+ self, filter_: Filter
211
+ ) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
212
+ """
213
+ Performs filtering over the internal cache.
214
+
215
+ Parameters
216
+ ----------
217
+ filter_ : Filter
218
+ The filter object representation.
219
+
220
+ Returns
221
+ -------
222
+ NDArray[float64]
223
+ The filtered detailed pairs.
224
+ NDArray[int32]
225
+ The filtered label metadata.
226
+ """
227
+ empty_datum_mask = not filter_.datum_mask.any()
228
+ empty_label_mask = (
229
+ filter_.valid_label_indices.size == 0
230
+ if filter_.valid_label_indices is not None
231
+ else False
232
+ )
233
+ if empty_datum_mask or empty_label_mask:
234
+ if empty_datum_mask:
235
+ warnings.warn("filter removes all datums")
236
+ if empty_label_mask:
237
+ warnings.warn("filter removes all labels")
238
+ return (
239
+ np.array([], dtype=np.float64),
240
+ np.zeros((self.metadata.number_of_labels, 2), dtype=np.int32),
241
+ )
242
+ return filter_cache(
243
+ detailed_pairs=self._detailed_pairs,
244
+ datum_mask=filter_.datum_mask,
245
+ valid_label_indices=filter_.valid_label_indices,
246
+ n_labels=self.metadata.number_of_labels,
186
247
  )
187
248
 
188
249
  def compute_precision_recall_rocauc(
@@ -201,31 +262,29 @@ class Evaluator:
201
262
  hardmax : bool
202
263
  Toggles whether a hardmax is applied to predictions.
203
264
  filter_ : Filter, optional
204
- An optional filter object.
265
+ Applies a filter to the internal cache.
205
266
 
206
267
  Returns
207
268
  -------
208
269
  dict[MetricType, list]
209
270
  A dictionary mapping MetricType enumerations to lists of computed metrics.
210
271
  """
211
-
212
272
  # apply filters
213
- data = self._detailed_pairs
214
- label_metadata = self._label_metadata
215
- n_datums = self.n_datums
216
273
  if filter_ is not None:
217
- data = data[filter_.indices]
218
- label_metadata = filter_.label_metadata
219
- n_datums = filter_.n_datums
274
+ detailed_pairs, label_metadata = self.filter(filter_=filter_)
275
+ n_datums = filter_.metadata.number_of_datums
276
+ else:
277
+ detailed_pairs = self._detailed_pairs
278
+ label_metadata = self._label_metadata
279
+ n_datums = self.metadata.number_of_datums
220
280
 
221
281
  results = compute_precision_recall_rocauc(
222
- data=data,
282
+ detailed_pairs=detailed_pairs,
223
283
  label_metadata=label_metadata,
224
284
  score_thresholds=np.array(score_thresholds),
225
285
  hardmax=hardmax,
226
286
  n_datums=n_datums,
227
287
  )
228
-
229
288
  return unpack_precision_recall_rocauc_into_metric_lists(
230
289
  results=results,
231
290
  score_thresholds=score_thresholds,
@@ -253,37 +312,35 @@ class Evaluator:
253
312
  number_of_examples : int, default=0
254
313
  The number of examples to return per count.
255
314
  filter_ : Filter, optional
256
- An optional filter object.
315
+ Applies a filter to the internal cache.
257
316
 
258
317
  Returns
259
318
  -------
260
319
  list[Metric]
261
320
  A list of confusion matrices.
262
321
  """
263
-
264
322
  # apply filters
265
- data = self._detailed_pairs
266
- label_metadata = self._label_metadata
267
323
  if filter_ is not None:
268
- data = data[filter_.indices]
269
- label_metadata = filter_.label_metadata
324
+ detailed_pairs, label_metadata = self.filter(filter_=filter_)
325
+ else:
326
+ detailed_pairs = self._detailed_pairs
327
+ label_metadata = self._label_metadata
270
328
 
271
- if data.size == 0:
329
+ if detailed_pairs.size == 0:
272
330
  return list()
273
331
 
274
332
  results = compute_confusion_matrix(
275
- data=data,
333
+ detailed_pairs=detailed_pairs,
276
334
  label_metadata=label_metadata,
277
335
  score_thresholds=np.array(score_thresholds),
278
336
  hardmax=hardmax,
279
337
  n_examples=number_of_examples,
280
338
  )
281
-
282
339
  return unpack_confusion_matrix_into_metric_list(
283
340
  results=results,
284
341
  score_thresholds=score_thresholds,
285
342
  number_of_examples=number_of_examples,
286
- index_to_uid=self.index_to_uid,
343
+ index_to_datum_id=self.index_to_datum_id,
287
344
  index_to_label=self.index_to_label,
288
345
  )
289
346
 
@@ -306,40 +363,26 @@ class Evaluator:
306
363
  number_of_examples : int, default=0
307
364
  The number of examples to return per count.
308
365
  filter_ : Filter, optional
309
- An optional filter object.
366
+ Applies a filter to the internal cache.
310
367
 
311
368
  Returns
312
369
  -------
313
370
  dict[MetricType, list[Metric]]
314
371
  Lists of metrics organized by metric type.
315
372
  """
316
-
317
373
  metrics = self.compute_precision_recall_rocauc(
318
374
  score_thresholds=score_thresholds,
319
375
  hardmax=hardmax,
320
376
  filter_=filter_,
321
377
  )
322
-
323
378
  metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
324
379
  score_thresholds=score_thresholds,
325
380
  hardmax=hardmax,
326
381
  number_of_examples=number_of_examples,
327
382
  filter_=filter_,
328
383
  )
329
-
330
384
  return metrics
331
385
 
332
-
333
- class DataLoader:
334
- """
335
- Classification DataLoader.
336
- """
337
-
338
- def __init__(self):
339
- self._evaluator = Evaluator()
340
- self.groundtruth_count = defaultdict(lambda: defaultdict(int))
341
- self.prediction_count = defaultdict(lambda: defaultdict(int))
342
-
343
386
  def _add_datum(self, uid: str) -> int:
344
387
  """
345
388
  Helper function for adding a datum to the cache.
@@ -354,11 +397,11 @@ class DataLoader:
354
397
  int
355
398
  The datum index.
356
399
  """
357
- if uid not in self._evaluator.uid_to_index:
358
- index = len(self._evaluator.uid_to_index)
359
- self._evaluator.uid_to_index[uid] = index
360
- self._evaluator.index_to_uid[index] = uid
361
- return self._evaluator.uid_to_index[uid]
400
+ if uid not in self.datum_id_to_index:
401
+ index = len(self.datum_id_to_index)
402
+ self.datum_id_to_index[uid] = index
403
+ self.index_to_datum_id.append(uid)
404
+ return self.datum_id_to_index[uid]
362
405
 
363
406
  def _add_label(self, label: str) -> int:
364
407
  """
@@ -374,47 +417,12 @@ class DataLoader:
374
417
  int
375
418
  Label index.
376
419
  """
377
- label_id = len(self._evaluator.index_to_label)
378
- if label not in self._evaluator.label_to_index:
379
- self._evaluator.label_to_index[label] = label_id
380
- self._evaluator.index_to_label[label_id] = label
381
-
420
+ label_id = len(self.index_to_label)
421
+ if label not in self.label_to_index:
422
+ self.label_to_index[label] = label_id
423
+ self.index_to_label.append(label)
382
424
  label_id += 1
383
-
384
- return self._evaluator.label_to_index[label]
385
-
386
- def _add_data(
387
- self,
388
- uid_index: int,
389
- groundtruth: int,
390
- predictions: list[tuple[int, float]],
391
- ):
392
-
393
- pairs = list()
394
- scores = np.array([score for _, score in predictions])
395
- max_score_idx = np.argmax(scores)
396
-
397
- for idx, (plabel, score) in enumerate(predictions):
398
- pairs.append(
399
- (
400
- float(uid_index),
401
- float(groundtruth),
402
- float(plabel),
403
- float(score),
404
- float(max_score_idx == idx),
405
- )
406
- )
407
-
408
- if self._evaluator._detailed_pairs.size == 0:
409
- self._evaluator._detailed_pairs = np.array(pairs)
410
- else:
411
- self._evaluator._detailed_pairs = np.concatenate(
412
- [
413
- self._evaluator._detailed_pairs,
414
- np.array(pairs),
415
- ],
416
- axis=0,
417
- )
425
+ return self.label_to_index[label]
418
426
 
419
427
  def add_data(
420
428
  self,
@@ -439,24 +447,18 @@ class DataLoader:
439
447
  raise ValueError(
440
448
  "Classifications must contain at least one prediction."
441
449
  )
442
- # update metadata
443
- self._evaluator.n_datums += 1
444
- self._evaluator.n_groundtruths += 1
445
- self._evaluator.n_predictions += len(classification.predictions)
446
450
 
447
451
  # update datum uid index
448
452
  uid_index = self._add_datum(uid=classification.uid)
449
453
 
450
454
  # cache labels and annotations
451
455
  groundtruth = self._add_label(classification.groundtruth)
452
- self.groundtruth_count[groundtruth][uid_index] += 1
453
456
 
454
457
  predictions = list()
455
458
  for plabel, pscore in zip(
456
459
  classification.predictions, classification.scores
457
460
  ):
458
461
  label_idx = self._add_label(plabel)
459
- self.prediction_count[label_idx][uid_index] += 1
460
462
  predictions.append(
461
463
  (
462
464
  label_idx,
@@ -464,13 +466,33 @@ class DataLoader:
464
466
  )
465
467
  )
466
468
 
467
- self._add_data(
468
- uid_index=uid_index,
469
- groundtruth=groundtruth,
470
- predictions=predictions,
471
- )
469
+ pairs = list()
470
+ scores = np.array([score for _, score in predictions])
471
+ max_score_idx = np.argmax(scores)
472
+
473
+ for idx, (plabel, score) in enumerate(predictions):
474
+ pairs.append(
475
+ (
476
+ float(uid_index),
477
+ float(groundtruth),
478
+ float(plabel),
479
+ float(score),
480
+ float(max_score_idx == idx),
481
+ )
482
+ )
472
483
 
473
- def finalize(self) -> Evaluator:
484
+ if self._detailed_pairs.size == 0:
485
+ self._detailed_pairs = np.array(pairs)
486
+ else:
487
+ self._detailed_pairs = np.concatenate(
488
+ [
489
+ self._detailed_pairs,
490
+ np.array(pairs),
491
+ ],
492
+ axis=0,
493
+ )
494
+
495
+ def finalize(self):
474
496
  """
475
497
  Performs data finalization and some preprocessing steps.
476
498
 
@@ -479,63 +501,34 @@ class DataLoader:
479
501
  Evaluator
480
502
  A ready-to-use evaluator object.
481
503
  """
482
-
483
- if self._evaluator._detailed_pairs.size == 0:
484
- raise ValueError("No data available to create evaluator.")
485
-
486
- n_datums = self._evaluator.n_datums
487
- n_labels = len(self._evaluator.index_to_label)
488
-
489
- self._evaluator.n_labels = n_labels
490
-
491
- self._evaluator._label_metadata_per_datum = np.zeros(
492
- (2, n_datums, n_labels), dtype=np.int32
504
+ if self._detailed_pairs.size == 0:
505
+ self._label_metadata = np.array([], dtype=np.int32)
506
+ warnings.warn("evaluator is empty")
507
+ return self
508
+
509
+ self._label_metadata = compute_label_metadata(
510
+ ids=self._detailed_pairs[:, :3].astype(np.int32),
511
+ n_labels=len(self.index_to_label),
493
512
  )
494
- for datum_idx in range(n_datums):
495
- for label_idx in range(n_labels):
496
- gt_count = (
497
- self.groundtruth_count[label_idx].get(datum_idx, 0)
498
- if label_idx in self.groundtruth_count
499
- else 0
500
- )
501
- pd_count = (
502
- self.prediction_count[label_idx].get(datum_idx, 0)
503
- if label_idx in self.prediction_count
504
- else 0
505
- )
506
- self._evaluator._label_metadata_per_datum[
507
- :, datum_idx, label_idx
508
- ] = np.array([gt_count, pd_count])
509
-
510
- self._evaluator._label_metadata = np.array(
511
- [
512
- [
513
- np.sum(
514
- self._evaluator._label_metadata_per_datum[
515
- 0, :, label_idx
516
- ]
517
- ),
518
- np.sum(
519
- self._evaluator._label_metadata_per_datum[
520
- 1, :, label_idx
521
- ]
522
- ),
523
- ]
524
- for label_idx in range(n_labels)
525
- ],
526
- dtype=np.int32,
527
- )
528
-
529
- # sort pairs by groundtruth, prediction, score
530
513
  indices = np.lexsort(
531
514
  (
532
- self._evaluator._detailed_pairs[:, 1],
533
- self._evaluator._detailed_pairs[:, 2],
534
- -self._evaluator._detailed_pairs[:, 3],
515
+ self._detailed_pairs[:, 1], # ground truth
516
+ self._detailed_pairs[:, 2], # prediction
517
+ -self._detailed_pairs[:, 3], # score
535
518
  )
536
519
  )
537
- self._evaluator._detailed_pairs = self._evaluator._detailed_pairs[
538
- indices
539
- ]
520
+ self._detailed_pairs = self._detailed_pairs[indices]
521
+ self._metadata = Metadata.create(
522
+ detailed_pairs=self._detailed_pairs,
523
+ number_of_datums=len(self.index_to_datum_id),
524
+ number_of_labels=len(self.index_to_label),
525
+ )
526
+ return self
527
+
528
+
529
+ class DataLoader(Evaluator):
530
+ """
531
+ Used for backwards compatibility as the Evaluator now handles ingestion.
532
+ """
540
533
 
541
- return self._evaluator
534
+ pass
@@ -335,8 +335,8 @@ class Metric(BaseMetric):
335
335
  The confusion matrix and related metrics for the classification task.
336
336
 
337
337
  This class encapsulates detailed information about the model's performance, including correct
338
- predictions, misclassifications, unmatched predictions (subset of false positives), and unmatched ground truths
339
- (subset of false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
338
+ predictions, misclassifications and unmatched ground truths (subset of false negatives).
339
+ It provides counts and examples for each category to facilitate in-depth analysis.
340
340
 
341
341
  Confusion Matrix Structure:
342
342
  {
@@ -345,10 +345,8 @@ class Metric(BaseMetric):
345
345
  'count': int,
346
346
  'examples': [
347
347
  {
348
- 'datum': str,
349
- 'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
350
- 'prediction': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
351
- 'score': float,
348
+ "datum_id": str,
349
+ "score": float
352
350
  },
353
351
  ...
354
352
  ],
@@ -364,8 +362,7 @@ class Metric(BaseMetric):
364
362
  'count': int,
365
363
  'examples': [
366
364
  {
367
- 'datum': str,
368
- 'groundtruth': dict, # {'xmin': float, 'xmax': float, 'ymin': float, 'ymax': float}
365
+ "datum_id": str
369
366
  },
370
367
  ...
371
368
  ],