valor-lite 0.36.2__tar.gz → 0.36.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {valor_lite-0.36.2 → valor_lite-0.36.3}/PKG-INFO +1 -1
  2. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/classification/computation.py +25 -132
  3. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/classification/manager.py +18 -22
  4. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/classification/metric.py +0 -4
  5. valor_lite-0.36.3/valor_lite/classification/utilities.py +211 -0
  6. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/exceptions.py +3 -3
  7. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/object_detection/manager.py +13 -15
  8. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/semantic_segmentation/manager.py +4 -4
  9. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite.egg-info/PKG-INFO +1 -1
  10. valor_lite-0.36.2/valor_lite/classification/utilities.py +0 -229
  11. {valor_lite-0.36.2 → valor_lite-0.36.3}/README.md +0 -0
  12. {valor_lite-0.36.2 → valor_lite-0.36.3}/pyproject.toml +0 -0
  13. {valor_lite-0.36.2 → valor_lite-0.36.3}/setup.cfg +0 -0
  14. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/LICENSE +0 -0
  15. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/__init__.py +0 -0
  16. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/classification/__init__.py +0 -0
  17. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/classification/annotation.py +0 -0
  18. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/classification/numpy_compatibility.py +0 -0
  19. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/object_detection/__init__.py +0 -0
  20. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/object_detection/annotation.py +0 -0
  21. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/object_detection/computation.py +0 -0
  22. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/object_detection/metric.py +0 -0
  23. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/object_detection/utilities.py +0 -0
  24. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/profiling.py +0 -0
  25. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/schemas.py +0 -0
  26. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/semantic_segmentation/__init__.py +0 -0
  27. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/semantic_segmentation/annotation.py +0 -0
  28. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/semantic_segmentation/benchmark.py +0 -0
  29. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/semantic_segmentation/computation.py +0 -0
  30. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/semantic_segmentation/metric.py +0 -0
  31. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/semantic_segmentation/utilities.py +0 -0
  32. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/__init__.py +0 -0
  33. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/annotation.py +0 -0
  34. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/computation.py +0 -0
  35. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/llm/__init__.py +0 -0
  36. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/llm/exceptions.py +0 -0
  37. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/llm/generation.py +0 -0
  38. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/llm/instructions.py +0 -0
  39. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/llm/integrations.py +0 -0
  40. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/llm/utilities.py +0 -0
  41. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/llm/validators.py +0 -0
  42. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/manager.py +0 -0
  43. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite/text_generation/metric.py +0 -0
  44. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite.egg-info/SOURCES.txt +0 -0
  45. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite.egg-info/dependency_links.txt +0 -0
  46. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite.egg-info/requires.txt +0 -0
  47. {valor_lite-0.36.2 → valor_lite-0.36.3}/valor_lite.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: valor-lite
3
- Version: 0.36.2
3
+ Version: 0.36.3
4
4
  Summary: Evaluate machine learning models.
5
5
  Project-URL: homepage, https://www.striveworks.com
6
6
  Requires-Python: >=3.10
@@ -1,3 +1,5 @@
1
+ from enum import IntFlag, auto
2
+
1
3
  import numpy as np
2
4
  from numpy.typing import NDArray
3
5
 
@@ -318,56 +320,20 @@ def compute_precision_recall_rocauc(
318
320
  )
319
321
 
320
322
 
321
- def _count_with_examples(
322
- data: NDArray[np.float64],
323
- unique_idx: int | list[int],
324
- label_idx: int | list[int],
325
- ) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.intp]]:
326
- """
327
- Helper function for counting occurences of unique detailed pairs.
328
-
329
- Parameters
330
- ----------
331
- data : NDArray[np.float64]
332
- A masked portion of a detailed pairs array.
333
- unique_idx : int | list[int]
334
- The index or indices upon which uniqueness is constrained.
335
- label_idx : int | list[int]
336
- The index or indices within the unique index or indices that encode labels.
337
-
338
- Returns
339
- -------
340
- NDArray[np.float64]
341
- Examples drawn from the data input.
342
- NDArray[np.int32]
343
- Unique label indices.
344
- NDArray[np.intp]
345
- Counts for each unique label index.
346
- """
347
- unique_rows, indices = np.unique(
348
- data.astype(int)[:, unique_idx],
349
- return_index=True,
350
- axis=0,
351
- )
352
- examples = data[indices]
353
- labels, counts = np.unique(
354
- unique_rows[:, label_idx], return_counts=True, axis=0
355
- )
356
- return examples, labels, counts
323
+ class PairClassification(IntFlag):
324
+ TP = auto()
325
+ FP_FN_MISCLF = auto()
326
+ FN_UNMATCHED = auto()
357
327
 
358
328
 
359
329
  def compute_confusion_matrix(
360
330
  detailed_pairs: NDArray[np.float64],
361
- label_metadata: NDArray[np.int32],
362
331
  score_thresholds: NDArray[np.float64],
363
332
  hardmax: bool,
364
- n_examples: int,
365
- ) -> tuple[NDArray[np.float64], NDArray[np.int32]]:
333
+ ) -> NDArray[np.uint8]:
366
334
  """
367
335
  Compute detailed confusion matrix.
368
336
 
369
- Takes data with shape (N, 5):
370
-
371
337
  Parameters
372
338
  ----------
373
339
  detailed_pairs : NDArray[np.float64]
@@ -377,37 +343,22 @@ def compute_confusion_matrix(
377
343
  Index 2 - Prediction Label Index
378
344
  Index 3 - Score
379
345
  Index 4 - Hard Max Score
380
- label_metadata : NDArray[np.int32]
381
- A 2-D array containing metadata related to labels with shape (n_labels, 2).
382
- Index 0 - GroundTruth Label Count
383
- Index 1 - Prediction Label Count
384
346
  iou_thresholds : NDArray[np.float64]
385
347
  A 1-D array containing IOU thresholds.
386
348
  score_thresholds : NDArray[np.float64]
387
349
  A 1-D array containing score thresholds.
388
- n_examples : int
389
- The maximum number of examples to return per count.
390
350
 
391
351
  Returns
392
352
  -------
393
- NDArray[np.float64]
394
- Confusion matrix.
395
- NDArray[np.int32]
396
- Unmatched Ground Truths.
353
+ NDArray[uint8]
354
+ Row-wise classification of pairs.
397
355
  """
398
-
399
- n_labels = label_metadata.shape[0]
356
+ n_pairs = detailed_pairs.shape[0]
400
357
  n_scores = score_thresholds.shape[0]
401
358
 
402
- confusion_matrix = np.full(
403
- (n_scores, n_labels, n_labels, 2 * n_examples + 1),
404
- fill_value=-1.0,
405
- dtype=np.float32,
406
- )
407
- unmatched_ground_truths = np.full(
408
- (n_scores, n_labels, n_examples + 1),
409
- fill_value=-1,
410
- dtype=np.int32,
359
+ pair_classifications = np.zeros(
360
+ (n_scores, n_pairs),
361
+ dtype=np.uint8,
411
362
  )
412
363
 
413
364
  mask_label_match = np.isclose(detailed_pairs[:, 1], detailed_pairs[:, 2])
@@ -420,9 +371,9 @@ def compute_confusion_matrix(
420
371
  if hardmax:
421
372
  mask_score &= detailed_pairs[:, 4] > 0.5
422
373
 
423
- mask_tp = mask_label_match & mask_score
424
- mask_misclf = ~mask_label_match & mask_score
425
- mask_misprd = ~(
374
+ mask_true_positives = mask_label_match & mask_score
375
+ mask_misclassifications = ~mask_label_match & mask_score
376
+ mask_unmatched_groundtruths = ~(
426
377
  (
427
378
  groundtruths.reshape(-1, 1, 2)
428
379
  == groundtruths[mask_score].reshape(1, -1, 2)
@@ -431,73 +382,15 @@ def compute_confusion_matrix(
431
382
  .any(axis=1)
432
383
  )
433
384
 
434
- tp_examples, tp_labels, tp_counts = _count_with_examples(
435
- data=detailed_pairs[mask_tp],
436
- unique_idx=[0, 2],
437
- label_idx=1,
438
- )
439
- misclf_examples, misclf_labels, misclf_counts = _count_with_examples(
440
- data=detailed_pairs[mask_misclf],
441
- unique_idx=[0, 1, 2],
442
- label_idx=[1, 2],
385
+ # classify pairings
386
+ pair_classifications[score_idx, mask_true_positives] |= np.uint8(
387
+ PairClassification.TP
443
388
  )
444
- misprd_examples, misprd_labels, misprd_counts = _count_with_examples(
445
- data=detailed_pairs[mask_misprd],
446
- unique_idx=[0, 1],
447
- label_idx=1,
389
+ pair_classifications[score_idx, mask_misclassifications] |= np.uint8(
390
+ PairClassification.FP_FN_MISCLF
448
391
  )
392
+ pair_classifications[
393
+ score_idx, mask_unmatched_groundtruths
394
+ ] |= np.uint8(PairClassification.FN_UNMATCHED)
449
395
 
450
- confusion_matrix[score_idx, tp_labels, tp_labels, 0] = tp_counts
451
- confusion_matrix[
452
- score_idx, misclf_labels[:, 0], misclf_labels[:, 1], 0
453
- ] = misclf_counts
454
-
455
- unmatched_ground_truths[score_idx, misprd_labels, 0] = misprd_counts
456
-
457
- if n_examples > 0:
458
- for label_idx in range(n_labels):
459
- # true-positive examples
460
- mask_tp_label = tp_examples[:, 2] == label_idx
461
- if mask_tp_label.sum() > 0:
462
- tp_label_examples = tp_examples[mask_tp_label][:n_examples]
463
- confusion_matrix[
464
- score_idx,
465
- label_idx,
466
- label_idx,
467
- 1 : 2 * tp_label_examples.shape[0] + 1,
468
- ] = tp_label_examples[:, [0, 3]].flatten()
469
-
470
- # misclassification examples
471
- mask_misclf_gt_label = misclf_examples[:, 1] == label_idx
472
- if mask_misclf_gt_label.sum() > 0:
473
- for pd_label_idx in range(n_labels):
474
- mask_misclf_pd_label = (
475
- misclf_examples[:, 2] == pd_label_idx
476
- )
477
- mask_misclf_label_combo = (
478
- mask_misclf_gt_label & mask_misclf_pd_label
479
- )
480
- if mask_misclf_label_combo.sum() > 0:
481
- misclf_label_examples = misclf_examples[
482
- mask_misclf_label_combo
483
- ][:n_examples]
484
- confusion_matrix[
485
- score_idx,
486
- label_idx,
487
- pd_label_idx,
488
- 1 : 2 * misclf_label_examples.shape[0] + 1,
489
- ] = misclf_label_examples[:, [0, 3]].flatten()
490
-
491
- # unmatched ground truth examples
492
- mask_misprd_label = misprd_examples[:, 1] == label_idx
493
- if misprd_examples.size > 0:
494
- misprd_label_examples = misprd_examples[mask_misprd_label][
495
- :n_examples
496
- ]
497
- unmatched_ground_truths[
498
- score_idx,
499
- label_idx,
500
- 1 : misprd_label_examples.shape[0] + 1,
501
- ] = misprd_label_examples[:, 0].flatten()
502
-
503
- return confusion_matrix, unmatched_ground_truths # type: ignore[reportReturnType]
396
+ return pair_classifications
@@ -16,7 +16,7 @@ from valor_lite.classification.utilities import (
16
16
  unpack_confusion_matrix_into_metric_list,
17
17
  unpack_precision_recall_rocauc_into_metric_lists,
18
18
  )
19
- from valor_lite.exceptions import EmptyEvaluatorException, EmptyFilterException
19
+ from valor_lite.exceptions import EmptyEvaluatorError, EmptyFilterError
20
20
 
21
21
  """
22
22
  Usage
@@ -88,14 +88,14 @@ class Filter:
88
88
  def __post_init__(self):
89
89
  # validate datum mask
90
90
  if not self.datum_mask.any():
91
- raise EmptyFilterException("filter removes all datums")
91
+ raise EmptyFilterError("filter removes all datums")
92
92
 
93
93
  # validate label indices
94
94
  if (
95
95
  self.valid_label_indices is not None
96
96
  and self.valid_label_indices.size == 0
97
97
  ):
98
- raise EmptyFilterException("filter removes all labels")
98
+ raise EmptyFilterError("filter removes all labels")
99
99
 
100
100
 
101
101
  class Evaluator:
@@ -292,7 +292,6 @@ class Evaluator:
292
292
  self,
293
293
  score_thresholds: list[float] = [0.0],
294
294
  hardmax: bool = True,
295
- number_of_examples: int = 0,
296
295
  filter_: Filter | None = None,
297
296
  ) -> list[Metric]:
298
297
  """
@@ -304,8 +303,6 @@ class Evaluator:
304
303
  A list of score thresholds to compute metrics over.
305
304
  hardmax : bool
306
305
  Toggles whether a hardmax is applied to predictions.
307
- number_of_examples : int, default=0
308
- The number of examples to return per count.
309
306
  filter_ : Filter, optional
310
307
  Applies a filter to the internal cache.
311
308
 
@@ -316,25 +313,22 @@ class Evaluator:
316
313
  """
317
314
  # apply filters
318
315
  if filter_ is not None:
319
- detailed_pairs, label_metadata = self.filter(filter_=filter_)
316
+ detailed_pairs, _ = self.filter(filter_=filter_)
320
317
  else:
321
318
  detailed_pairs = self._detailed_pairs
322
- label_metadata = self._label_metadata
323
319
 
324
320
  if detailed_pairs.size == 0:
325
321
  return list()
326
322
 
327
- results = compute_confusion_matrix(
323
+ result = compute_confusion_matrix(
328
324
  detailed_pairs=detailed_pairs,
329
- label_metadata=label_metadata,
330
325
  score_thresholds=np.array(score_thresholds),
331
326
  hardmax=hardmax,
332
- n_examples=number_of_examples,
333
327
  )
334
328
  return unpack_confusion_matrix_into_metric_list(
335
- results=results,
329
+ detailed_pairs=detailed_pairs,
330
+ result=result,
336
331
  score_thresholds=score_thresholds,
337
- number_of_examples=number_of_examples,
338
332
  index_to_datum_id=self.index_to_datum_id,
339
333
  index_to_label=self.index_to_label,
340
334
  )
@@ -343,7 +337,6 @@ class Evaluator:
343
337
  self,
344
338
  score_thresholds: list[float] = [0.0],
345
339
  hardmax: bool = True,
346
- number_of_examples: int = 0,
347
340
  filter_: Filter | None = None,
348
341
  ) -> dict[MetricType, list[Metric]]:
349
342
  """
@@ -355,8 +348,6 @@ class Evaluator:
355
348
  A list of score thresholds to compute metrics over.
356
349
  hardmax : bool
357
350
  Toggles whether a hardmax is applied to predictions.
358
- number_of_examples : int, default=0
359
- The number of examples to return per count.
360
351
  filter_ : Filter, optional
361
352
  Applies a filter to the internal cache.
362
353
 
@@ -373,7 +364,6 @@ class Evaluator:
373
364
  metrics[MetricType.ConfusionMatrix] = self.compute_confusion_matrix(
374
365
  score_thresholds=score_thresholds,
375
366
  hardmax=hardmax,
376
- number_of_examples=number_of_examples,
377
367
  filter_=filter_,
378
368
  )
379
369
  return metrics
@@ -391,11 +381,17 @@ class Evaluator:
391
381
  -------
392
382
  int
393
383
  The datum index.
384
+
385
+ Raises
386
+ ------
387
+ ValueError
388
+ If datum id already exists.
394
389
  """
395
- if uid not in self.datum_id_to_index:
396
- index = len(self.datum_id_to_index)
397
- self.datum_id_to_index[uid] = index
398
- self.index_to_datum_id.append(uid)
390
+ if uid in self.datum_id_to_index:
391
+ raise ValueError("datum with id '{uid}' already exists")
392
+ index = len(self.datum_id_to_index)
393
+ self.datum_id_to_index[uid] = index
394
+ self.index_to_datum_id.append(uid)
399
395
  return self.datum_id_to_index[uid]
400
396
 
401
397
  def _add_label(self, label: str) -> int:
@@ -497,7 +493,7 @@ class Evaluator:
497
493
  A ready-to-use evaluator object.
498
494
  """
499
495
  if self._detailed_pairs.size == 0:
500
- raise EmptyEvaluatorException()
496
+ raise EmptyEvaluatorError()
501
497
 
502
498
  self._label_metadata = compute_label_metadata(
503
499
  ids=self._detailed_pairs[:, :3].astype(np.int32),
@@ -329,7 +329,6 @@ class Metric(BaseMetric):
329
329
  ],
330
330
  ],
331
331
  score_threshold: float,
332
- maximum_number_of_examples: int,
333
332
  ):
334
333
  """
335
334
  The confusion matrix and related metrics for the classification task.
@@ -382,8 +381,6 @@ class Metric(BaseMetric):
382
381
  Each example includes the datum UID.
383
382
  score_threshold : float
384
383
  The confidence score threshold used to filter predictions.
385
- maximum_number_of_examples : int
386
- The maximum number of examples per element.
387
384
 
388
385
  Returns
389
386
  -------
@@ -397,6 +394,5 @@ class Metric(BaseMetric):
397
394
  },
398
395
  parameters={
399
396
  "score_threshold": score_threshold,
400
- "maximum_number_of_examples": maximum_number_of_examples,
401
397
  },
402
398
  )
@@ -0,0 +1,211 @@
1
+ from collections import defaultdict
2
+
3
+ import numpy as np
4
+ from numpy.typing import NDArray
5
+
6
+ from valor_lite.classification.computation import PairClassification
7
+ from valor_lite.classification.metric import Metric, MetricType
8
+
9
+
10
+ def unpack_precision_recall_rocauc_into_metric_lists(
11
+ results: tuple[
12
+ NDArray[np.int32],
13
+ NDArray[np.float64],
14
+ NDArray[np.float64],
15
+ NDArray[np.float64],
16
+ NDArray[np.float64],
17
+ NDArray[np.float64],
18
+ float,
19
+ ],
20
+ score_thresholds: list[float],
21
+ hardmax: bool,
22
+ label_metadata: NDArray[np.int32],
23
+ index_to_label: list[str],
24
+ ) -> dict[MetricType, list[Metric]]:
25
+ (
26
+ counts,
27
+ precision,
28
+ recall,
29
+ accuracy,
30
+ f1_score,
31
+ rocauc,
32
+ mean_rocauc,
33
+ ) = results
34
+
35
+ metrics = defaultdict(list)
36
+
37
+ metrics[MetricType.ROCAUC] = [
38
+ Metric.roc_auc(
39
+ value=float(rocauc[label_idx]),
40
+ label=label,
41
+ )
42
+ for label_idx, label in enumerate(index_to_label)
43
+ if label_metadata[label_idx, 0] > 0
44
+ ]
45
+
46
+ metrics[MetricType.mROCAUC] = [
47
+ Metric.mean_roc_auc(
48
+ value=float(mean_rocauc),
49
+ )
50
+ ]
51
+
52
+ metrics[MetricType.Accuracy] = [
53
+ Metric.accuracy(
54
+ value=float(accuracy[score_idx]),
55
+ score_threshold=score_threshold,
56
+ hardmax=hardmax,
57
+ )
58
+ for score_idx, score_threshold in enumerate(score_thresholds)
59
+ ]
60
+
61
+ for label_idx, label in enumerate(index_to_label):
62
+ for score_idx, score_threshold in enumerate(score_thresholds):
63
+
64
+ kwargs = {
65
+ "label": label,
66
+ "hardmax": hardmax,
67
+ "score_threshold": score_threshold,
68
+ }
69
+ row = counts[:, label_idx]
70
+ metrics[MetricType.Counts].append(
71
+ Metric.counts(
72
+ tp=int(row[score_idx, 0]),
73
+ fp=int(row[score_idx, 1]),
74
+ fn=int(row[score_idx, 2]),
75
+ tn=int(row[score_idx, 3]),
76
+ **kwargs,
77
+ )
78
+ )
79
+
80
+ # if no groundtruths exists for a label, skip it.
81
+ if label_metadata[label_idx, 0] == 0:
82
+ continue
83
+
84
+ metrics[MetricType.Precision].append(
85
+ Metric.precision(
86
+ value=float(precision[score_idx, label_idx]),
87
+ **kwargs,
88
+ )
89
+ )
90
+ metrics[MetricType.Recall].append(
91
+ Metric.recall(
92
+ value=float(recall[score_idx, label_idx]),
93
+ **kwargs,
94
+ )
95
+ )
96
+ metrics[MetricType.F1].append(
97
+ Metric.f1_score(
98
+ value=float(f1_score[score_idx, label_idx]),
99
+ **kwargs,
100
+ )
101
+ )
102
+ return metrics
103
+
104
+
105
+ def _create_empty_confusion_matrix(index_to_labels: list[str]):
106
+ unmatched_ground_truths = dict()
107
+ confusion_matrix = dict()
108
+ for label in index_to_labels:
109
+ unmatched_ground_truths[label] = {"count": 0, "examples": []}
110
+ confusion_matrix[label] = {}
111
+ for plabel in index_to_labels:
112
+ confusion_matrix[label][plabel] = {"count": 0, "examples": []}
113
+ return (
114
+ confusion_matrix,
115
+ unmatched_ground_truths,
116
+ )
117
+
118
+
119
+ def _unpack_confusion_matrix(
120
+ ids: NDArray[np.int32],
121
+ scores: NDArray[np.float64],
122
+ mask_matched: NDArray[np.bool_],
123
+ mask_fn_unmatched: NDArray[np.bool_],
124
+ index_to_datum_id: list[str],
125
+ index_to_label: list[str],
126
+ score_threshold: float,
127
+ ):
128
+ (
129
+ confusion_matrix,
130
+ unmatched_ground_truths,
131
+ ) = _create_empty_confusion_matrix(index_to_label)
132
+
133
+ unique_matches, unique_match_indices = np.unique(
134
+ ids[np.ix_(mask_matched, (0, 1, 2))], # type: ignore - numpy ix_ typing
135
+ axis=0,
136
+ return_index=True,
137
+ )
138
+ (
139
+ unique_unmatched_groundtruths,
140
+ unique_unmatched_groundtruth_indices,
141
+ ) = np.unique(
142
+ ids[np.ix_(mask_fn_unmatched, (0, 1))], # type: ignore - numpy ix_ typing
143
+ axis=0,
144
+ return_index=True,
145
+ )
146
+
147
+ n_matched = unique_matches.shape[0]
148
+ n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
149
+ n_max = max(n_matched, n_unmatched_groundtruths)
150
+
151
+ for idx in range(n_max):
152
+ if idx < n_matched:
153
+ glabel = index_to_label[unique_matches[idx, 1]]
154
+ plabel = index_to_label[unique_matches[idx, 2]]
155
+ confusion_matrix[glabel][plabel]["count"] += 1
156
+ confusion_matrix[glabel][plabel]["examples"].append(
157
+ {
158
+ "datum_id": index_to_datum_id[unique_matches[idx, 0]],
159
+ "score": float(scores[unique_match_indices[idx]]),
160
+ }
161
+ )
162
+ if idx < n_unmatched_groundtruths:
163
+ label = index_to_label[unique_unmatched_groundtruths[idx, 1]]
164
+ unmatched_ground_truths[label]["count"] += 1
165
+ unmatched_ground_truths[label]["examples"].append(
166
+ {
167
+ "datum_id": index_to_datum_id[
168
+ unique_unmatched_groundtruths[idx, 0]
169
+ ],
170
+ }
171
+ )
172
+
173
+ return Metric.confusion_matrix(
174
+ confusion_matrix=confusion_matrix,
175
+ unmatched_ground_truths=unmatched_ground_truths,
176
+ score_threshold=score_threshold,
177
+ )
178
+
179
+
180
+ def unpack_confusion_matrix_into_metric_list(
181
+ result: NDArray[np.uint8],
182
+ detailed_pairs: NDArray[np.float64],
183
+ score_thresholds: list[float],
184
+ index_to_datum_id: list[str],
185
+ index_to_label: list[str],
186
+ ) -> list[Metric]:
187
+
188
+ ids = detailed_pairs[:, :3].astype(np.int32)
189
+
190
+ mask_matched = (
191
+ np.bitwise_and(
192
+ result, PairClassification.TP | PairClassification.FP_FN_MISCLF
193
+ )
194
+ > 0
195
+ )
196
+ mask_fn_unmatched = (
197
+ np.bitwise_and(result, PairClassification.FN_UNMATCHED) > 0
198
+ )
199
+
200
+ return [
201
+ _unpack_confusion_matrix(
202
+ ids=ids,
203
+ scores=detailed_pairs[:, 3],
204
+ mask_matched=mask_matched[score_idx, :],
205
+ mask_fn_unmatched=mask_fn_unmatched[score_idx, :],
206
+ index_to_datum_id=index_to_datum_id,
207
+ index_to_label=index_to_label,
208
+ score_threshold=score_threshold,
209
+ )
210
+ for score_idx, score_threshold in enumerate(score_thresholds)
211
+ ]
@@ -1,15 +1,15 @@
1
- class EmptyEvaluatorException(Exception):
1
+ class EmptyEvaluatorError(Exception):
2
2
  def __init__(self):
3
3
  super().__init__(
4
4
  "evaluator cannot be finalized as it contains no data"
5
5
  )
6
6
 
7
7
 
8
- class EmptyFilterException(Exception):
8
+ class EmptyFilterError(Exception):
9
9
  def __init__(self, message: str):
10
10
  super().__init__(message)
11
11
 
12
12
 
13
- class InternalCacheException(Exception):
13
+ class InternalCacheError(Exception):
14
14
  def __init__(self, message: str):
15
15
  super().__init__(message)
@@ -6,9 +6,9 @@ from numpy.typing import NDArray
6
6
  from tqdm import tqdm
7
7
 
8
8
  from valor_lite.exceptions import (
9
- EmptyEvaluatorException,
10
- EmptyFilterException,
11
- InternalCacheException,
9
+ EmptyEvaluatorError,
10
+ EmptyFilterError,
11
+ InternalCacheError,
12
12
  )
13
13
  from valor_lite.object_detection.annotation import (
14
14
  Bitmask,
@@ -102,13 +102,13 @@ class Filter:
102
102
  def __post_init__(self):
103
103
  # validate datums mask
104
104
  if not self.mask_datums.any():
105
- raise EmptyFilterException("filter removes all datums")
105
+ raise EmptyFilterError("filter removes all datums")
106
106
 
107
107
  # validate annotation masks
108
108
  no_gts = self.mask_groundtruths.all()
109
109
  no_pds = self.mask_predictions.all()
110
110
  if no_gts and no_pds:
111
- raise EmptyFilterException("filter removes all annotations")
111
+ raise EmptyFilterError("filter removes all annotations")
112
112
  elif no_gts:
113
113
  warnings.warn("filter removes all ground truths")
114
114
  elif no_pds:
@@ -197,7 +197,7 @@ class Evaluator:
197
197
  # filter datums
198
198
  if datum_ids is not None:
199
199
  if not datum_ids:
200
- raise EmptyFilterException("filter removes all datums")
200
+ raise EmptyFilterError("filter removes all datums")
201
201
  valid_datum_indices = np.array(
202
202
  [self.datum_id_to_index[uid] for uid in datum_ids],
203
203
  dtype=np.int32,
@@ -240,7 +240,7 @@ class Evaluator:
240
240
  # filter by labels
241
241
  if labels is not None:
242
242
  if not labels:
243
- raise EmptyFilterException("filter removes all labels")
243
+ raise EmptyFilterError("filter removes all labels")
244
244
  valid_label_indices = np.array(
245
245
  [self.label_to_index[label] for label in labels] + [-1]
246
246
  )
@@ -467,7 +467,7 @@ class DataLoader:
467
467
  if len(self._evaluator.datum_id_to_index) != len(
468
468
  self._evaluator.index_to_datum_id
469
469
  ):
470
- raise InternalCacheException("datum cache size mismatch")
470
+ raise InternalCacheError("datum cache size mismatch")
471
471
  idx = len(self._evaluator.datum_id_to_index)
472
472
  self._evaluator.datum_id_to_index[datum_id] = idx
473
473
  self._evaluator.index_to_datum_id.append(datum_id)
@@ -491,9 +491,7 @@ class DataLoader:
491
491
  if len(self._evaluator.groundtruth_id_to_index) != len(
492
492
  self._evaluator.index_to_groundtruth_id
493
493
  ):
494
- raise InternalCacheException(
495
- "ground truth cache size mismatch"
496
- )
494
+ raise InternalCacheError("ground truth cache size mismatch")
497
495
  idx = len(self._evaluator.groundtruth_id_to_index)
498
496
  self._evaluator.groundtruth_id_to_index[annotation_id] = idx
499
497
  self._evaluator.index_to_groundtruth_id.append(annotation_id)
@@ -517,7 +515,7 @@ class DataLoader:
517
515
  if len(self._evaluator.prediction_id_to_index) != len(
518
516
  self._evaluator.index_to_prediction_id
519
517
  ):
520
- raise InternalCacheException("prediction cache size mismatch")
518
+ raise InternalCacheError("prediction cache size mismatch")
521
519
  idx = len(self._evaluator.prediction_id_to_index)
522
520
  self._evaluator.prediction_id_to_index[annotation_id] = idx
523
521
  self._evaluator.index_to_prediction_id.append(annotation_id)
@@ -542,7 +540,7 @@ class DataLoader:
542
540
  if len(self._evaluator.label_to_index) != len(
543
541
  self._evaluator.index_to_label
544
542
  ):
545
- raise InternalCacheException("label cache size mismatch")
543
+ raise InternalCacheError("label cache size mismatch")
546
544
  self._evaluator.label_to_index[label] = label_id
547
545
  self._evaluator.index_to_label.append(label)
548
546
  label_id += 1
@@ -768,14 +766,14 @@ class DataLoader:
768
766
  A ready-to-use evaluator object.
769
767
  """
770
768
  if not self.pairs:
771
- raise EmptyEvaluatorException()
769
+ raise EmptyEvaluatorError()
772
770
 
773
771
  n_labels = len(self._evaluator.index_to_label)
774
772
  n_datums = len(self._evaluator.index_to_datum_id)
775
773
 
776
774
  self._evaluator._detailed_pairs = np.concatenate(self.pairs, axis=0)
777
775
  if self._evaluator._detailed_pairs.size == 0:
778
- raise EmptyEvaluatorException()
776
+ raise EmptyEvaluatorError()
779
777
 
780
778
  # order pairs by descending score, iou
781
779
  indices = np.lexsort(
@@ -4,7 +4,7 @@ import numpy as np
4
4
  from numpy.typing import NDArray
5
5
  from tqdm import tqdm
6
6
 
7
- from valor_lite.exceptions import EmptyEvaluatorException, EmptyFilterException
7
+ from valor_lite.exceptions import EmptyEvaluatorError, EmptyFilterError
8
8
  from valor_lite.semantic_segmentation.annotation import Segmentation
9
9
  from valor_lite.semantic_segmentation.computation import (
10
10
  compute_intermediate_confusion_matrices,
@@ -74,11 +74,11 @@ class Filter:
74
74
  def __post_init__(self):
75
75
  # validate datum mask
76
76
  if not self.datum_mask.any():
77
- raise EmptyFilterException("filter removes all datums")
77
+ raise EmptyFilterError("filter removes all datums")
78
78
 
79
79
  # validate label mask
80
80
  if self.label_mask.all():
81
- raise EmptyFilterException("filter removes all labels")
81
+ raise EmptyFilterError("filter removes all labels")
82
82
 
83
83
 
84
84
  class Evaluator:
@@ -403,7 +403,7 @@ class DataLoader:
403
403
  """
404
404
 
405
405
  if len(self.matrices) == 0:
406
- raise EmptyEvaluatorException()
406
+ raise EmptyEvaluatorError()
407
407
 
408
408
  n_labels = len(self._evaluator.index_to_label)
409
409
  n_datums = len(self._evaluator.index_to_datum_id)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: valor-lite
3
- Version: 0.36.2
3
+ Version: 0.36.3
4
4
  Summary: Evaluate machine learning models.
5
5
  Project-URL: homepage, https://www.striveworks.com
6
6
  Requires-Python: >=3.10
@@ -1,229 +0,0 @@
1
- from collections import defaultdict
2
-
3
- import numpy as np
4
- from numpy.typing import NDArray
5
-
6
- from valor_lite.classification.metric import Metric, MetricType
7
-
8
-
9
- def unpack_precision_recall_rocauc_into_metric_lists(
10
- results: tuple[
11
- NDArray[np.int32],
12
- NDArray[np.float64],
13
- NDArray[np.float64],
14
- NDArray[np.float64],
15
- NDArray[np.float64],
16
- NDArray[np.float64],
17
- float,
18
- ],
19
- score_thresholds: list[float],
20
- hardmax: bool,
21
- label_metadata: NDArray[np.int32],
22
- index_to_label: list[str],
23
- ) -> dict[MetricType, list[Metric]]:
24
- (
25
- counts,
26
- precision,
27
- recall,
28
- accuracy,
29
- f1_score,
30
- rocauc,
31
- mean_rocauc,
32
- ) = results
33
-
34
- metrics = defaultdict(list)
35
-
36
- metrics[MetricType.ROCAUC] = [
37
- Metric.roc_auc(
38
- value=float(rocauc[label_idx]),
39
- label=label,
40
- )
41
- for label_idx, label in enumerate(index_to_label)
42
- if label_metadata[label_idx, 0] > 0
43
- ]
44
-
45
- metrics[MetricType.mROCAUC] = [
46
- Metric.mean_roc_auc(
47
- value=float(mean_rocauc),
48
- )
49
- ]
50
-
51
- metrics[MetricType.Accuracy] = [
52
- Metric.accuracy(
53
- value=float(accuracy[score_idx]),
54
- score_threshold=score_threshold,
55
- hardmax=hardmax,
56
- )
57
- for score_idx, score_threshold in enumerate(score_thresholds)
58
- ]
59
-
60
- for label_idx, label in enumerate(index_to_label):
61
- for score_idx, score_threshold in enumerate(score_thresholds):
62
-
63
- kwargs = {
64
- "label": label,
65
- "hardmax": hardmax,
66
- "score_threshold": score_threshold,
67
- }
68
- row = counts[:, label_idx]
69
- metrics[MetricType.Counts].append(
70
- Metric.counts(
71
- tp=int(row[score_idx, 0]),
72
- fp=int(row[score_idx, 1]),
73
- fn=int(row[score_idx, 2]),
74
- tn=int(row[score_idx, 3]),
75
- **kwargs,
76
- )
77
- )
78
-
79
- # if no groundtruths exists for a label, skip it.
80
- if label_metadata[label_idx, 0] == 0:
81
- continue
82
-
83
- metrics[MetricType.Precision].append(
84
- Metric.precision(
85
- value=float(precision[score_idx, label_idx]),
86
- **kwargs,
87
- )
88
- )
89
- metrics[MetricType.Recall].append(
90
- Metric.recall(
91
- value=float(recall[score_idx, label_idx]),
92
- **kwargs,
93
- )
94
- )
95
- metrics[MetricType.F1].append(
96
- Metric.f1_score(
97
- value=float(f1_score[score_idx, label_idx]),
98
- **kwargs,
99
- )
100
- )
101
- return metrics
102
-
103
-
104
- def _unpack_confusion_matrix_value(
105
- confusion_matrix: NDArray[np.float64],
106
- number_of_labels: int,
107
- number_of_examples: int,
108
- index_to_datum_id: list[str],
109
- index_to_label: list[str],
110
- ) -> dict[str, dict[str, dict[str, int | list[dict[str, str | float]]]]]:
111
- """
112
- Unpacks a numpy array of confusion matrix counts and examples.
113
- """
114
-
115
- datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int( # noqa: E731 - lambda fn
116
- confusion_matrix[
117
- gt_label_idx,
118
- pd_label_idx,
119
- example_idx * 2 + 1,
120
- ]
121
- )
122
-
123
- score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float( # noqa: E731 - lambda fn
124
- confusion_matrix[
125
- gt_label_idx,
126
- pd_label_idx,
127
- example_idx * 2 + 2,
128
- ]
129
- )
130
-
131
- return {
132
- index_to_label[gt_label_idx]: {
133
- index_to_label[pd_label_idx]: {
134
- "count": max(
135
- int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
136
- 0,
137
- ),
138
- "examples": [
139
- {
140
- "datum_id": index_to_datum_id[
141
- datum_idx(gt_label_idx, pd_label_idx, example_idx)
142
- ],
143
- "score": score_idx(
144
- gt_label_idx, pd_label_idx, example_idx
145
- ),
146
- }
147
- for example_idx in range(number_of_examples)
148
- if datum_idx(gt_label_idx, pd_label_idx, example_idx) >= 0
149
- ],
150
- }
151
- for pd_label_idx in range(number_of_labels)
152
- }
153
- for gt_label_idx in range(number_of_labels)
154
- }
155
-
156
-
157
- def _unpack_unmatched_ground_truths_value(
158
- unmatched_ground_truths: NDArray[np.int32],
159
- number_of_labels: int,
160
- number_of_examples: int,
161
- index_to_datum_id: list[str],
162
- index_to_label: list[str],
163
- ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
164
- """
165
- Unpacks a numpy array of unmatched ground truth counts and examples.
166
- """
167
-
168
- datum_idx = (
169
- lambda gt_label_idx, example_idx: int( # noqa: E731 - lambda fn
170
- unmatched_ground_truths[
171
- gt_label_idx,
172
- example_idx + 1,
173
- ]
174
- )
175
- )
176
-
177
- return {
178
- index_to_label[gt_label_idx]: {
179
- "count": max(
180
- int(unmatched_ground_truths[gt_label_idx, 0]),
181
- 0,
182
- ),
183
- "examples": [
184
- {
185
- "datum_id": index_to_datum_id[
186
- datum_idx(gt_label_idx, example_idx)
187
- ]
188
- }
189
- for example_idx in range(number_of_examples)
190
- if datum_idx(gt_label_idx, example_idx) >= 0
191
- ],
192
- }
193
- for gt_label_idx in range(number_of_labels)
194
- }
195
-
196
-
197
- def unpack_confusion_matrix_into_metric_list(
198
- results: tuple[NDArray[np.float64], NDArray[np.int32]],
199
- score_thresholds: list[float],
200
- number_of_examples: int,
201
- index_to_datum_id: list[str],
202
- index_to_label: list[str],
203
- ) -> list[Metric]:
204
-
205
- (confusion_matrix, unmatched_ground_truths) = results
206
- _, n_labels, _, _ = confusion_matrix.shape
207
- return [
208
- Metric.confusion_matrix(
209
- score_threshold=score_threshold,
210
- maximum_number_of_examples=number_of_examples,
211
- confusion_matrix=_unpack_confusion_matrix_value(
212
- confusion_matrix=confusion_matrix[score_idx, :, :, :],
213
- number_of_labels=n_labels,
214
- number_of_examples=number_of_examples,
215
- index_to_label=index_to_label,
216
- index_to_datum_id=index_to_datum_id,
217
- ),
218
- unmatched_ground_truths=_unpack_unmatched_ground_truths_value(
219
- unmatched_ground_truths=unmatched_ground_truths[
220
- score_idx, :, :
221
- ],
222
- number_of_labels=n_labels,
223
- number_of_examples=number_of_examples,
224
- index_to_label=index_to_label,
225
- index_to_datum_id=index_to_datum_id,
226
- ),
227
- )
228
- for score_idx, score_threshold in enumerate(score_thresholds)
229
- ]
File without changes
File without changes
File without changes