valor-lite 0.33.12__py3-none-any.whl → 0.33.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

@@ -1,30 +1,17 @@
1
1
  from .annotation import Classification
2
- from .computation import compute_metrics
3
- from .manager import DataLoader, Evaluator
4
- from .metric import (
5
- F1,
6
- ROCAUC,
7
- Accuracy,
8
- ConfusionMatrix,
9
- Counts,
10
- MetricType,
11
- Precision,
12
- Recall,
13
- mROCAUC,
2
+ from .computation import (
3
+ compute_confusion_matrix,
4
+ compute_precision_recall_rocauc,
14
5
  )
6
+ from .manager import DataLoader, Evaluator
7
+ from .metric import Metric, MetricType
15
8
 
16
9
  __all__ = [
17
10
  "Classification",
18
- "compute_metrics",
11
+ "compute_precision_recall_rocauc",
12
+ "compute_confusion_matrix",
19
13
  "MetricType",
20
- "Counts",
21
- "Precision",
22
- "Recall",
23
- "Accuracy",
24
- "F1",
25
- "ROCAUC",
26
- "mROCAUC",
27
- "ConfusionMatrix",
28
14
  "DataLoader",
29
15
  "Evaluator",
16
+ "Metric",
30
17
  ]
@@ -64,7 +64,7 @@ def _compute_rocauc(
64
64
  return rocauc, mean_rocauc
65
65
 
66
66
 
67
- def compute_metrics(
67
+ def compute_precision_recall_rocauc(
68
68
  data: NDArray[np.float64],
69
69
  label_metadata: NDArray[np.int32],
70
70
  score_thresholds: NDArray[np.float64],
@@ -271,7 +271,7 @@ def compute_confusion_matrix(
271
271
  label_metadata : NDArray[np.int32]
272
272
  An array containing metadata related to labels.
273
273
  iou_thresholds : NDArray[np.float64]
274
- A 1-D array containing IoU thresholds.
274
+ A 1-D array containing IOU thresholds.
275
275
  score_thresholds : NDArray[np.float64]
276
276
  A 1-D array containing score thresholds.
277
277
  n_examples : int
@@ -7,18 +7,12 @@ from tqdm import tqdm
7
7
  from valor_lite.classification.annotation import Classification
8
8
  from valor_lite.classification.computation import (
9
9
  compute_confusion_matrix,
10
- compute_metrics,
10
+ compute_precision_recall_rocauc,
11
11
  )
12
- from valor_lite.classification.metric import (
13
- F1,
14
- ROCAUC,
15
- Accuracy,
16
- ConfusionMatrix,
17
- Counts,
18
- MetricType,
19
- Precision,
20
- Recall,
21
- mROCAUC,
12
+ from valor_lite.classification.metric import Metric, MetricType
13
+ from valor_lite.classification.utilities import (
14
+ unpack_confusion_matrix_into_metric_list,
15
+ unpack_precision_recall_rocauc_into_metric_lists,
22
16
  )
23
17
 
24
18
  """
@@ -191,119 +185,11 @@ class Evaluator:
191
185
  n_datums=n_datums,
192
186
  )
193
187
 
194
- def _unpack_confusion_matrix(
195
- self,
196
- confusion_matrix: NDArray[np.float64],
197
- number_of_labels: int,
198
- number_of_examples: int,
199
- ) -> dict[
200
- str,
201
- dict[
202
- str,
203
- dict[
204
- str,
205
- int
206
- | list[
207
- dict[
208
- str,
209
- str | float,
210
- ]
211
- ],
212
- ],
213
- ],
214
- ]:
215
- """
216
- Unpacks a numpy array of confusion matrix counts and examples.
217
- """
218
-
219
- datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
220
- confusion_matrix[
221
- gt_label_idx,
222
- pd_label_idx,
223
- example_idx * 2 + 1,
224
- ]
225
- )
226
-
227
- score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
228
- confusion_matrix[
229
- gt_label_idx,
230
- pd_label_idx,
231
- example_idx * 2 + 2,
232
- ]
233
- )
234
-
235
- return {
236
- self.index_to_label[gt_label_idx]: {
237
- self.index_to_label[pd_label_idx]: {
238
- "count": max(
239
- int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
240
- 0,
241
- ),
242
- "examples": [
243
- {
244
- "datum": self.index_to_uid[
245
- datum_idx(
246
- gt_label_idx, pd_label_idx, example_idx
247
- )
248
- ],
249
- "score": score_idx(
250
- gt_label_idx, pd_label_idx, example_idx
251
- ),
252
- }
253
- for example_idx in range(number_of_examples)
254
- if datum_idx(gt_label_idx, pd_label_idx, example_idx)
255
- >= 0
256
- ],
257
- }
258
- for pd_label_idx in range(number_of_labels)
259
- }
260
- for gt_label_idx in range(number_of_labels)
261
- }
262
-
263
- def _unpack_missing_predictions(
264
- self,
265
- missing_predictions: NDArray[np.int32],
266
- number_of_labels: int,
267
- number_of_examples: int,
268
- ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
269
- """
270
- Unpacks a numpy array of missing prediction counts and examples.
271
- """
272
-
273
- datum_idx = (
274
- lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
275
- missing_predictions[
276
- gt_label_idx,
277
- example_idx + 1,
278
- ]
279
- )
280
- )
281
-
282
- return {
283
- self.index_to_label[gt_label_idx]: {
284
- "count": max(
285
- int(missing_predictions[gt_label_idx, 0]),
286
- 0,
287
- ),
288
- "examples": [
289
- {
290
- "datum": self.index_to_uid[
291
- datum_idx(gt_label_idx, example_idx)
292
- ]
293
- }
294
- for example_idx in range(number_of_examples)
295
- if datum_idx(gt_label_idx, example_idx) >= 0
296
- ],
297
- }
298
- for gt_label_idx in range(number_of_labels)
299
- }
300
-
301
- def compute_precision_recall(
188
+ def compute_precision_recall_rocauc(
302
189
  self,
303
190
  score_thresholds: list[float] = [0.0],
304
191
  hardmax: bool = True,
305
192
  filter_: Filter | None = None,
306
- as_dict: bool = False,
307
193
  ) -> dict[MetricType, list]:
308
194
  """
309
195
  Performs an evaluation and returns metrics.
@@ -316,8 +202,6 @@ class Evaluator:
316
202
  Toggles whether a hardmax is applied to predictions.
317
203
  filter_ : Filter, optional
318
204
  An optional filter object.
319
- as_dict : bool, default=False
320
- An option to return metrics as dictionaries.
321
205
 
322
206
  Returns
323
207
  -------
@@ -334,15 +218,7 @@ class Evaluator:
334
218
  label_metadata = filter_.label_metadata
335
219
  n_datums = filter_.n_datums
336
220
 
337
- (
338
- counts,
339
- precision,
340
- recall,
341
- accuracy,
342
- f1_score,
343
- rocauc,
344
- mean_rocauc,
345
- ) = compute_metrics(
221
+ results = compute_precision_recall_rocauc(
346
222
  data=data,
347
223
  label_metadata=label_metadata,
348
224
  score_thresholds=np.array(score_thresholds),
@@ -350,79 +226,13 @@ class Evaluator:
350
226
  n_datums=n_datums,
351
227
  )
352
228
 
353
- metrics = defaultdict(list)
354
-
355
- metrics[MetricType.ROCAUC] = [
356
- ROCAUC(
357
- value=float(rocauc[label_idx]),
358
- label=self.index_to_label[label_idx],
359
- )
360
- for label_idx in range(label_metadata.shape[0])
361
- if label_metadata[label_idx, 0] > 0
362
- ]
363
-
364
- metrics[MetricType.mROCAUC] = [
365
- mROCAUC(
366
- value=float(mean_rocauc),
367
- )
368
- ]
369
-
370
- metrics[MetricType.Accuracy] = [
371
- Accuracy(
372
- value=accuracy.astype(float).tolist(),
373
- score_thresholds=score_thresholds,
374
- hardmax=hardmax,
375
- )
376
- ]
377
-
378
- for label_idx, label in self.index_to_label.items():
379
-
380
- kwargs = {
381
- "label": label,
382
- "score_thresholds": score_thresholds,
383
- "hardmax": hardmax,
384
- }
385
- row = counts[:, label_idx]
386
- metrics[MetricType.Counts].append(
387
- Counts(
388
- tp=row[:, 0].astype(int).tolist(),
389
- fp=row[:, 1].astype(int).tolist(),
390
- fn=row[:, 2].astype(int).tolist(),
391
- tn=row[:, 3].astype(int).tolist(),
392
- **kwargs,
393
- )
394
- )
395
-
396
- # if no groundtruths exists for a label, skip it.
397
- if label_metadata[label_idx, 0] == 0:
398
- continue
399
-
400
- metrics[MetricType.Precision].append(
401
- Precision(
402
- value=precision[:, label_idx].astype(float).tolist(),
403
- **kwargs,
404
- )
405
- )
406
- metrics[MetricType.Recall].append(
407
- Recall(
408
- value=recall[:, label_idx].astype(float).tolist(),
409
- **kwargs,
410
- )
411
- )
412
- metrics[MetricType.F1].append(
413
- F1(
414
- value=f1_score[:, label_idx].astype(float).tolist(),
415
- **kwargs,
416
- )
417
- )
418
-
419
- if as_dict:
420
- return {
421
- mtype: [metric.to_dict() for metric in mvalues]
422
- for mtype, mvalues in metrics.items()
423
- }
424
-
425
- return metrics
229
+ return unpack_precision_recall_rocauc_into_metric_lists(
230
+ results=results,
231
+ score_thresholds=score_thresholds,
232
+ hardmax=hardmax,
233
+ label_metadata=label_metadata,
234
+ index_to_label=self.index_to_label,
235
+ )
426
236
 
427
237
  def compute_confusion_matrix(
428
238
  self,
@@ -430,8 +240,7 @@ class Evaluator:
430
240
  hardmax: bool = True,
431
241
  number_of_examples: int = 0,
432
242
  filter_: Filter | None = None,
433
- as_dict: bool = False,
434
- ) -> list:
243
+ ) -> list[Metric]:
435
244
  """
436
245
  Computes a detailed confusion matrix..
437
246
 
@@ -445,12 +254,10 @@ class Evaluator:
445
254
  The number of examples to return per count.
446
255
  filter_ : Filter, optional
447
256
  An optional filter object.
448
- as_dict : bool, default=False
449
- An option to return metrics as dictionaries.
450
257
 
451
258
  Returns
452
259
  -------
453
- list[ConfusionMatrix] | list[dict]
260
+ list[Metric]
454
261
  A list of confusion matrices.
455
262
  """
456
263
 
@@ -464,7 +271,7 @@ class Evaluator:
464
271
  if data.size == 0:
465
272
  return list()
466
273
 
467
- confusion_matrix, missing_predictions = compute_confusion_matrix(
274
+ results = compute_confusion_matrix(
468
275
  data=data,
469
276
  label_metadata=label_metadata,
470
277
  score_thresholds=np.array(score_thresholds),
@@ -472,29 +279,13 @@ class Evaluator:
472
279
  n_examples=number_of_examples,
473
280
  )
474
281
 
475
- n_scores, n_labels, _, _ = confusion_matrix.shape
476
- results = [
477
- ConfusionMatrix(
478
- score_threshold=score_thresholds[score_idx],
479
- number_of_examples=number_of_examples,
480
- confusion_matrix=self._unpack_confusion_matrix(
481
- confusion_matrix=confusion_matrix[score_idx, :, :, :],
482
- number_of_labels=n_labels,
483
- number_of_examples=number_of_examples,
484
- ),
485
- missing_predictions=self._unpack_missing_predictions(
486
- missing_predictions=missing_predictions[score_idx, :, :],
487
- number_of_labels=n_labels,
488
- number_of_examples=number_of_examples,
489
- ),
490
- )
491
- for score_idx in range(n_scores)
492
- ]
493
-
494
- if as_dict:
495
- return [m.to_dict() for m in results]
496
-
497
- return results
282
+ return unpack_confusion_matrix_into_metric_list(
283
+ results=results,
284
+ score_thresholds=score_thresholds,
285
+ number_of_examples=number_of_examples,
286
+ index_to_uid=self.index_to_uid,
287
+ index_to_label=self.index_to_label,
288
+ )
498
289
 
499
290
  def evaluate(
500
291
  self,
@@ -502,8 +293,7 @@ class Evaluator:
502
293
  hardmax: bool = True,
503
294
  number_of_examples: int = 0,
504
295
  filter_: Filter | None = None,
505
- as_dict: bool = False,
506
- ) -> dict[MetricType, list]:
296
+ ) -> dict[MetricType, list[Metric]]:
507
297
  """
508
298
  Computes a detailed confusion matrix..
509
299
 
@@ -517,29 +307,27 @@ class Evaluator:
517
307
  The number of examples to return per count.
518
308
  filter_ : Filter, optional
519
309
  An optional filter object.
520
- as_dict : bool, default=False
521
- An option to return metrics as dictionaries.
522
310
 
523
311
  Returns
524
312
  -------
525
- list[ConfusionMatrix] | list[dict]
526
- A list of confusion matrices.
313
+ dict[MetricType, list[Metric]]
314
+ Lists of metrics organized by metric type.
527
315
  """
528
316
 
529
- results = self.compute_precision_recall(
317
+ metrics = self.compute_precision_recall_rocauc(
530
318
  score_thresholds=score_thresholds,
531
319
  hardmax=hardmax,
532
320
  filter_=filter_,
533
- as_dict=as_dict,
534
321
  )
535
- results[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
322
+
323
+ metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
536
324
  score_thresholds=score_thresholds,
537
325
  hardmax=hardmax,
538
326
  number_of_examples=number_of_examples,
539
327
  filter_=filter_,
540
- as_dict=as_dict,
541
328
  )
542
- return results
329
+
330
+ return metrics
543
331
 
544
332
 
545
333
  class DataLoader: