PyPI - valor-lite - Versions diffs - 0.33.4__py3-none-any.whl → 0.33.5__py3-none-any.whl - Mend

valor-lite 0.33.4py3-none-any.whl → 0.33.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

valor_lite/detection/__init__.py +11 -6
valor_lite/detection/computation.py +208 -152
valor_lite/detection/manager.py +347 -128
valor_lite/detection/metric.py +60 -34
{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/METADATA +1 -1
valor_lite-0.33.5.dist-info/RECORD +12 -0
valor_lite-0.33.4.dist-info/RECORD +0 -12
{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/LICENSE +0 -0
{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/WHEEL +0 -0
{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/top_level.txt +0 -0

valor_lite/detection/__init__.py CHANGED Viewed

@@ -1,10 +1,13 @@
 from .annotation import Bitmask, BoundingBox, Detection, Polygon
 from .computation import (
-    compute_detailed_counts,
+    compute_bbox_iou,
+    compute_bitmask_iou,
+    compute_confusion_matrix,
     compute_metrics,
+    compute_polygon_iou,
     compute_ranked_pairs,
 )
-from .manager import DataLoader, Evaluator, compute_iou
+from .manager import DataLoader, Evaluator
 from .metric import (
     AP,
     AR,
@@ -12,8 +15,8 @@ from .metric import (
     Accuracy,
     APAveragedOverIOUs,
     ARAveragedOverScores,
+    ConfusionMatrix,
     Counts,
-    DetailedCounts,
     MetricType,
     Precision,
     PrecisionRecallCurve,
@@ -44,11 +47,13 @@ __all__ = [
     "ARAveragedOverScores",
     "mARAveragedOverScores",
     "PrecisionRecallCurve",
-    "DetailedCounts",
-    "compute_iou",
+    "ConfusionMatrix",
+    "compute_bbox_iou",
+    "compute_bitmask_iou",
+    "compute_polygon_iou",
     "compute_ranked_pairs",
     "compute_metrics",
-    "compute_detailed_counts",
+    "compute_confusion_matrix",
     "DataLoader",
     "Evaluator",
 ]

valor_lite/detection/computation.py CHANGED Viewed

@@ -492,13 +492,52 @@ def compute_metrics(
     )
-def compute_detailed_counts(
+def _count_with_examples(
+    data: NDArray[np.floating],
+    unique_idx: int | list[int],
+    label_idx: int | list[int],
+) -> tuple[NDArray[np.floating], NDArray[np.int32], NDArray[np.int32]]:
+    """
+    Helper function for counting occurences of unique detailed pairs.
+    Parameters
+    ----------
+    data : NDArray[np.floating]
+        A masked portion of a detailed pairs array.
+    unique_idx : int | list[int]
+        The index or indices upon which uniqueness is constrained.
+    label_idx : int | list[int]
+        The index or indices within the unique index or indices that encode labels.
+    Returns
+    -------
+    NDArray[np.floating]
+        Examples drawn from the data input.
+    NDArray[np.int32]
+        Unique label indices.
+    NDArray[np.int32]
+        Counts for each unique label index.
+    """
+    unique_rows, indices = np.unique(
+        data.astype(int)[:, unique_idx],
+        return_index=True,
+        axis=0,
+    )
+    examples = data[indices]
+    labels, counts = np.unique(
+        unique_rows[:, label_idx], return_counts=True, axis=0
+    )
+    return examples, labels, counts
+def compute_confusion_matrix(
     data: NDArray[np.floating],
     label_metadata: NDArray[np.int32],
     iou_thresholds: NDArray[np.floating],
     score_thresholds: NDArray[np.floating],
-    n_samples: int,
-) -> NDArray[np.int32]:
+    n_examples: int,
+) -> tuple[NDArray[np.floating], NDArray[np.floating], NDArray[np.int32]]:
     """
     Compute detailed counts.
@@ -512,19 +551,6 @@ def compute_detailed_counts(
     Index 5 - Prediction Label Index
     Index 6 - Score
-    Outputs an array with shape (N_IoUs, N_Score, N_Labels, 5 * n_samples + 5):
-    Index 0 - True Positive Count
-    ... Datum ID Examples
-    Index 2 * n_samples + 1 - False Positive Misclassification Count
-    ... Datum ID Examples
-    Index 4 * n_samples + 2 - False Positive Hallucination Count
-    ... Datum ID Examples
-    Index 6 * n_samples + 3 - False Negative Misclassification Count
-    ... Datum ID Examples
-    Index 8 * n_samples + 4 - False Negative Missing Prediction Count
-    ... Datum ID Examples
     Parameters
     ----------
     data : NDArray[np.floating]
@@ -535,28 +561,37 @@ def compute_detailed_counts(
         A 1-D array containing IoU thresholds.
     score_thresholds : NDArray[np.floating]
         A 1-D array containing score thresholds.
-    n_samples : int
-        The number of examples to return per count.
+    n_examples : int
+        The maximum number of examples to return per count.
     Returns
     -------
+    NDArray[np.floating]
+        Confusion matrix.
+    NDArray[np.floating]
+        Hallucinations.
     NDArray[np.int32]
-        The detailed counts with optional examples.
+        Missing Predictions.
     """
     n_labels = label_metadata.shape[0]
     n_ious = iou_thresholds.shape[0]
     n_scores = score_thresholds.shape[0]
-    n_metrics = 5 * (2 * n_samples + 1)
-    tp_idx = 0
-    fp_misclf_idx = 2 * n_samples + 1
-    fp_halluc_idx = 4 * n_samples + 2
-    fn_misclf_idx = 6 * n_samples + 3
-    fn_misprd_idx = 8 * n_samples + 4
-    detailed_pr_curve = -1 * np.ones(
-        (n_ious, n_scores, n_labels, n_metrics), dtype=np.int32
+    confusion_matrix = -1 * np.ones(
+        # (datum idx, gt idx, pd idx, pd score) * n_examples + count
+        (n_ious, n_scores, n_labels, n_labels, 4 * n_examples + 1),
+        dtype=np.float32,
+    )
+    hallucinations = -1 * np.ones(
+        # (datum idx, pd idx, pd score) * n_examples + count
+        (n_ious, n_scores, n_labels, 3 * n_examples + 1),
+        dtype=np.float32,
+    )
+    missing_predictions = -1 * np.ones(
+        # (datum idx, gt idx) * n_examples + count
+        (n_ious, n_scores, n_labels, 2 * n_examples + 1),
+        dtype=np.int32,
     )
     mask_gt_exists = data[:, 1] > -0.5
@@ -622,9 +657,9 @@ def compute_detailed_counts(
                 ~mask_groundtruths_with_passing_score & mask_gt_exists
             )
+            # create category masks
             mask_tp = mask_score & mask_iou & mask_gt_pd_match
-            mask_fp_misclf = mask_score & mask_iou & mask_gt_pd_mismatch
-            mask_fn_misclf = mask_iou & (
+            mask_misclf = mask_iou & (
                 (
                     ~mask_score
                     & mask_gt_pd_match
@@ -632,143 +667,164 @@ def compute_detailed_counts(
                 )
                 | (mask_score & mask_gt_pd_mismatch)
             )
-            mask_fp_halluc = mask_score & mask_predictions_without_passing_ious
-            mask_fn_misprd = (
+            mask_halluc = mask_score & mask_predictions_without_passing_ious
+            mask_misprd = (
                 mask_groundtruths_without_passing_ious
                 | mask_groundtruths_without_passing_score
             )
-            tp_pds = np.unique(data[mask_tp][:, [0, 2, 5]], axis=0)
-            tp_gts = np.unique(data[mask_tp][:, [0, 1, 4]], axis=0)
-            fp_misclf = np.unique(data[mask_fp_misclf][:, [0, 2, 5]], axis=0)
-            fp_halluc = np.unique(data[mask_fp_halluc][:, [0, 2, 5]], axis=0)
-            fn_misclf = np.unique(data[mask_fn_misclf][:, [0, 1, 4]], axis=0)
-            fn_misprd = np.unique(data[mask_fn_misprd][:, [0, 1, 4]], axis=0)
-            mask_fp_misclf_is_tp = (
-                (fp_misclf.reshape(-1, 1, 3) == tp_pds.reshape(1, -1, 3))
+            # filter out true-positives from misclf and misprd
+            mask_gts_with_tp_override = (
+                (
+                    data[mask_misclf][:, [0, 1]].reshape(-1, 1, 2)
+                    == data[mask_tp][:, [0, 1]].reshape(1, -1, 2)
+                )
                 .all(axis=2)
                 .any(axis=1)
             )
-            mask_fn_misclf_is_tp = (
-                (fn_misclf.reshape(-1, 1, 3) == tp_gts.reshape(1, -1, 3))
+            mask_pds_with_tp_override = (
+                (
+                    data[mask_misclf][:, [0, 2]].reshape(-1, 1, 2)
+                    == data[mask_tp][:, [0, 2]].reshape(1, -1, 2)
+                )
                 .all(axis=2)
                 .any(axis=1)
             )
+            mask_misprd[mask_misclf] |= (
+                ~mask_gts_with_tp_override & mask_pds_with_tp_override
+            )
+            mask_misclf[mask_misclf] &= (
+                ~mask_gts_with_tp_override & ~mask_pds_with_tp_override
+            )
-            tp = tp_pds
-            fp_misclf = fp_misclf[~mask_fp_misclf_is_tp]
-            fp_halluc = fp_halluc
-            fn_misclf = fn_misclf[~mask_fn_misclf_is_tp]
-            fn_misprd = fn_misprd
-            tp_count = np.bincount(tp[:, 2].astype(int), minlength=n_labels)
-            fp_misclf_count = np.bincount(
-                fp_misclf[:, 2].astype(int), minlength=n_labels
+            # count true positives
+            tp_examples, tp_labels, tp_counts = _count_with_examples(
+                data[mask_tp],
+                unique_idx=[0, 2, 5],
+                label_idx=2,
             )
-            fp_halluc_count = np.bincount(
-                fp_halluc[:, 2].astype(int), minlength=n_labels
+            # count misclassifications
+            (
+                misclf_examples,
+                misclf_labels,
+                misclf_counts,
+            ) = _count_with_examples(
+                data[mask_misclf], unique_idx=[0, 1, 2, 4, 5], label_idx=[3, 4]
             )
-            fn_misclf_count = np.bincount(
-                fn_misclf[:, 2].astype(int), minlength=n_labels
+            # count hallucinations
+            (
+                halluc_examples,
+                halluc_labels,
+                halluc_counts,
+            ) = _count_with_examples(
+                data[mask_halluc], unique_idx=[0, 2, 5], label_idx=2
             )
-            fn_misprd_count = np.bincount(
-                fn_misprd[:, 2].astype(int), minlength=n_labels
+            # count missing predictions
+            (
+                misprd_examples,
+                misprd_labels,
+                misprd_counts,
+            ) = _count_with_examples(
+                data[mask_misprd], unique_idx=[0, 1, 4], label_idx=2
             )
-            detailed_pr_curve[iou_idx, score_idx, :, tp_idx] = tp_count
-            detailed_pr_curve[
-                iou_idx, score_idx, :, fp_misclf_idx
-            ] = fp_misclf_count
-            detailed_pr_curve[
-                iou_idx, score_idx, :, fp_halluc_idx
-            ] = fp_halluc_count
-            detailed_pr_curve[
-                iou_idx, score_idx, :, fn_misclf_idx
-            ] = fn_misclf_count
-            detailed_pr_curve[
-                iou_idx, score_idx, :, fn_misprd_idx
-            ] = fn_misprd_count
-            if n_samples > 0:
+            # store the counts
+            confusion_matrix[
+                iou_idx, score_idx, tp_labels, tp_labels, 0
+            ] = tp_counts
+            confusion_matrix[
+                iou_idx,
+                score_idx,
+                misclf_labels[:, 0],
+                misclf_labels[:, 1],
+                0,
+            ] = misclf_counts
+            hallucinations[
+                iou_idx,
+                score_idx,
+                halluc_labels,
+                0,
+            ] = halluc_counts
+            missing_predictions[
+                iou_idx,
+                score_idx,
+                misprd_labels,
+                0,
+            ] = misprd_counts
+            # store examples
+            if n_examples > 0:
                 for label_idx in range(n_labels):
-                    tp_examples = (
-                        tp[tp[:, 2].astype(int) == label_idx][
-                            :n_samples, [0, 1]
-                        ]
-                        .astype(int)
-                        .flatten()
-                    )
-                    fp_misclf_examples = (
-                        fp_misclf[fp_misclf[:, 2].astype(int) == label_idx][
-                            :n_samples, [0, 1]
-                        ]
-                        .astype(int)
-                        .flatten()
-                    )
-                    fp_halluc_examples = (
-                        fp_halluc[fp_halluc[:, 2].astype(int) == label_idx][
-                            :n_samples, [0, 1]
-                        ]
-                        .astype(int)
-                        .flatten()
-                    )
-                    fn_misclf_examples = (
-                        fn_misclf[fn_misclf[:, 2].astype(int) == label_idx][
-                            :n_samples, [0, 1]
-                        ]
-                        .astype(int)
-                        .flatten()
-                    )
-                    fn_misprd_examples = (
-                        fn_misprd[fn_misprd[:, 2].astype(int) == label_idx][
-                            :n_samples, [0, 1]
+                    # true-positive examples
+                    mask_tp_label = tp_examples[:, 5] == label_idx
+                    if mask_tp_label.sum() > 0:
+                        tp_label_examples = tp_examples[mask_tp_label][
+                            :n_examples
                         ]
-                        .astype(int)
-                        .flatten()
-                    )
-                    detailed_pr_curve[
-                        iou_idx,
-                        score_idx,
-                        label_idx,
-                        tp_idx + 1 : tp_idx + 1 + tp_examples.shape[0],
-                    ] = tp_examples
-                    detailed_pr_curve[
-                        iou_idx,
-                        score_idx,
-                        label_idx,
-                        fp_misclf_idx
-                        + 1 : fp_misclf_idx
-                        + 1
-                        + fp_misclf_examples.shape[0],
-                    ] = fp_misclf_examples
-                    detailed_pr_curve[
-                        iou_idx,
-                        score_idx,
-                        label_idx,
-                        fp_halluc_idx
-                        + 1 : fp_halluc_idx
-                        + 1
-                        + fp_halluc_examples.shape[0],
-                    ] = fp_halluc_examples
-                    detailed_pr_curve[
-                        iou_idx,
-                        score_idx,
-                        label_idx,
-                        fn_misclf_idx
-                        + 1 : fn_misclf_idx
-                        + 1
-                        + fn_misclf_examples.shape[0],
-                    ] = fn_misclf_examples
-                    detailed_pr_curve[
-                        iou_idx,
-                        score_idx,
-                        label_idx,
-                        fn_misprd_idx
-                        + 1 : fn_misprd_idx
-                        + 1
-                        + fn_misprd_examples.shape[0],
-                    ] = fn_misprd_examples
-    return detailed_pr_curve
+                        confusion_matrix[
+                            iou_idx,
+                            score_idx,
+                            label_idx,
+                            label_idx,
+                            1 : 4 * tp_label_examples.shape[0] + 1,
+                        ] = tp_label_examples[:, [0, 1, 2, 6]].flatten()
+                    # misclassification examples
+                    mask_misclf_gt_label = misclf_examples[:, 4] == label_idx
+                    if mask_misclf_gt_label.sum() > 0:
+                        for pd_label_idx in range(n_labels):
+                            mask_misclf_pd_label = (
+                                misclf_examples[:, 5] == pd_label_idx
+                            )
+                            mask_misclf_label_combo = (
+                                mask_misclf_gt_label & mask_misclf_pd_label
+                            )
+                            if mask_misclf_label_combo.sum() > 0:
+                                misclf_label_examples = misclf_examples[
+                                    mask_misclf_label_combo
+                                ][:n_examples]
+                                confusion_matrix[
+                                    iou_idx,
+                                    score_idx,
+                                    label_idx,
+                                    pd_label_idx,
+                                    1 : 4 * misclf_label_examples.shape[0] + 1,
+                                ] = misclf_label_examples[
+                                    :, [0, 1, 2, 6]
+                                ].flatten()
+                    # hallucination examples
+                    mask_halluc_label = halluc_examples[:, 5] == label_idx
+                    if mask_halluc_label.sum() > 0:
+                        halluc_label_examples = halluc_examples[
+                            mask_halluc_label
+                        ][:n_examples]
+                        hallucinations[
+                            iou_idx,
+                            score_idx,
+                            label_idx,
+                            1 : 3 * halluc_label_examples.shape[0] + 1,
+                        ] = halluc_label_examples[:, [0, 2, 6]].flatten()
+                    # missing prediction examples
+                    mask_misprd_label = misprd_examples[:, 4] == label_idx
+                    if misprd_examples.size > 0:
+                        misprd_label_examples = misprd_examples[
+                            mask_misprd_label
+                        ][:n_examples]
+                        missing_predictions[
+                            iou_idx,
+                            score_idx,
+                            label_idx,
+                            1 : 2 * misprd_label_examples.shape[0] + 1,
+                        ] = misprd_label_examples[:, [0, 1]].flatten()
+    return (
+        confusion_matrix,
+        hallucinations,
+        missing_predictions,
+    )

valor_lite/detection/manager.py CHANGED Viewed

@@ -14,7 +14,7 @@ from valor_lite.detection.annotation import (
 from valor_lite.detection.computation import (
     compute_bbox_iou,
     compute_bitmask_iou,
-    compute_detailed_counts,
+    compute_confusion_matrix,
     compute_metrics,
     compute_polygon_iou,
     compute_ranked_pairs,
@@ -26,8 +26,8 @@ from valor_lite.detection.metric import (
     Accuracy,
     APAveragedOverIOUs,
     ARAveragedOverScores,
+    ConfusionMatrix,
     Counts,
-    DetailedCounts,
     MetricType,
     Precision,
     PrecisionRecallCurve,
@@ -158,7 +158,8 @@ def compute_iou(
 @dataclass
 class Filter:
-    indices: NDArray[np.int32]
+    ranked_indices: NDArray[np.int32]
+    detailed_indices: NDArray[np.int32]
     label_metadata: NDArray[np.int32]
@@ -257,12 +258,14 @@ class Evaluator:
         Filter
             A filter object that can be passed to the `evaluate` method.
         """
-        n_rows = self._ranked_pairs.shape[0]
         n_datums = self._label_metadata_per_datum.shape[1]
         n_labels = self._label_metadata_per_datum.shape[2]
-        mask_pairs = np.ones((n_rows, 1), dtype=np.bool_)
+        mask_ranked = np.ones((self._ranked_pairs.shape[0], 1), dtype=np.bool_)
+        mask_detailed = np.ones(
+            (self._detailed_pairs.shape[0], 1), dtype=np.bool_
+        )
         mask_datums = np.ones(n_datums, dtype=np.bool_)
         mask_labels = np.ones(n_labels, dtype=np.bool_)
@@ -272,9 +275,12 @@ class Evaluator:
                     [self.uid_to_index[uid] for uid in datum_uids],
                     dtype=np.int32,
                 )
-            mask_pairs[
+            mask_ranked[
                 ~np.isin(self._ranked_pairs[:, 0].astype(int), datum_uids)
             ] = False
+            mask_detailed[
+                ~np.isin(self._detailed_pairs[:, 0].astype(int), datum_uids)
+            ] = False
             mask_datums[~np.isin(np.arange(n_datums), datum_uids)] = False
         if labels is not None:
@@ -282,9 +288,12 @@ class Evaluator:
                 labels = np.array(
                     [self.label_to_index[label] for label in labels]
                 )
-            mask_pairs[
+            mask_ranked[
                 ~np.isin(self._ranked_pairs[:, 4].astype(int), labels)
             ] = False
+            mask_detailed[
+                ~np.isin(self._detailed_pairs[:, 4].astype(int), labels)
+            ] = False
             mask_labels[~np.isin(np.arange(n_labels), labels)] = False
         if label_keys is not None:
@@ -297,14 +306,19 @@ class Evaluator:
                 if label_keys.size > 0
                 else np.array([])
             )
-            mask_pairs[
+            mask_ranked[
                 ~np.isin(self._ranked_pairs[:, 4].astype(int), label_indices)
             ] = False
+            mask_detailed[
+                ~np.isin(self._detailed_pairs[:, 4].astype(int), label_indices)
+            ] = False
             mask_labels[~np.isin(np.arange(n_labels), label_indices)] = False
-        mask = mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
+        mask_label_metadata = (
+            mask_datums[:, np.newaxis] & mask_labels[np.newaxis, :]
+        )
         label_metadata_per_datum = self._label_metadata_per_datum.copy()
-        label_metadata_per_datum[:, ~mask] = 0
+        label_metadata_per_datum[:, ~mask_label_metadata] = 0
         label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
         label_metadata[:, :2] = np.transpose(
@@ -316,7 +330,8 @@ class Evaluator:
         label_metadata[:, 2] = self._label_metadata[:, 2]
         return Filter(
-            indices=np.where(mask_pairs)[0],
+            ranked_indices=np.where(mask_ranked)[0],
+            detailed_indices=np.where(mask_detailed)[0],
             label_metadata=label_metadata,
         )
@@ -340,7 +355,7 @@ class Evaluator:
         score_thresholds : list[float]
             A list of score thresholds to compute metrics over.
         number_of_examples : int, default=0
-            Number of annotation examples to return in DetailedCounts.
+            Maximum number of annotation examples to return in ConfusionMatrix.
         filter_ : Filter, optional
             An optional filter object.
@@ -350,10 +365,12 @@ class Evaluator:
             A dictionary mapping MetricType enumerations to lists of computed metrics.
         """
-        data = self._ranked_pairs
+        ranked_pairs = self._ranked_pairs
+        detailed_pairs = self._detailed_pairs
         label_metadata = self._label_metadata
         if filter_ is not None:
-            data = data[filter_.indices]
+            ranked_pairs = ranked_pairs[filter_.ranked_indices]
+            detailed_pairs = detailed_pairs[filter_.detailed_indices]
             label_metadata = filter_.label_metadata
         (
@@ -372,7 +389,7 @@ class Evaluator:
             precision_recall,
             pr_curves,
         ) = compute_metrics(
-            data=data,
+            data=ranked_pairs,
             label_metadata=label_metadata,
             iou_thresholds=np.array(iou_thresholds),
             score_thresholds=np.array(score_thresholds),
@@ -527,11 +544,15 @@ class Evaluator:
                         )
                     )
-        if MetricType.DetailedCounts in metrics_to_return:
-            metrics[MetricType.DetailedCounts] = self._compute_detailed_counts(
+        if MetricType.ConfusionMatrix in metrics_to_return:
+            metrics[
+                MetricType.ConfusionMatrix
+            ] = self._compute_confusion_matrix(
+                data=detailed_pairs,
+                label_metadata=label_metadata,
                 iou_thresholds=iou_thresholds,
                 score_thresholds=score_thresholds,
-                n_samples=number_of_examples,
+                number_of_examples=number_of_examples,
             )
         for metric in set(metrics.keys()):
@@ -540,14 +561,281 @@ class Evaluator:
         return metrics
-    def _compute_detailed_counts(
+    def _unpack_confusion_matrix(
+        self,
+        confusion_matrix: NDArray[np.floating],
+        label_key_idx: int,
+        number_of_labels: int,
+        number_of_examples: int,
+    ) -> dict[
+        str,
+        dict[
+            str,
+            dict[
+                str,
+                int
+                | list[
+                    dict[
+                        str,
+                        str | float | tuple[float, float, float, float],
+                    ]
+                ],
+            ],
+        ],
+    ]:
+        """
+        Unpacks a numpy array of confusion matrix counts and examples.
+        """
+        datum_idx = lambda gt_label_idx, pd_label_idx, example_idx: int(  # noqa: E731 - lambda fn
+            confusion_matrix[
+                gt_label_idx,
+                pd_label_idx,
+                example_idx * 4 + 1,
+            ]
+        )
+        groundtruth_idx = lambda gt_label_idx, pd_label_idx, example_idx: int(  # noqa: E731 - lambda fn
+            confusion_matrix[
+                gt_label_idx,
+                pd_label_idx,
+                example_idx * 4 + 2,
+            ]
+        )
+        prediction_idx = lambda gt_label_idx, pd_label_idx, example_idx: int(  # noqa: E731 - lambda fn
+            confusion_matrix[
+                gt_label_idx,
+                pd_label_idx,
+                example_idx * 4 + 3,
+            ]
+        )
+        score_idx = lambda gt_label_idx, pd_label_idx, example_idx: float(  # noqa: E731 - lambda fn
+            confusion_matrix[
+                gt_label_idx,
+                pd_label_idx,
+                example_idx * 4 + 4,
+            ]
+        )
+        return {
+            self.index_to_label[gt_label_idx][1]: {
+                self.index_to_label[pd_label_idx][1]: {
+                    "count": max(
+                        int(confusion_matrix[gt_label_idx, pd_label_idx, 0]),
+                        0,
+                    ),
+                    "examples": [
+                        {
+                            "datum": self.index_to_uid[
+                                datum_idx(
+                                    gt_label_idx, pd_label_idx, example_idx
+                                )
+                            ],
+                            "groundtruth": tuple(
+                                self.groundtruth_examples[
+                                    datum_idx(
+                                        gt_label_idx,
+                                        pd_label_idx,
+                                        example_idx,
+                                    )
+                                ][
+                                    groundtruth_idx(
+                                        gt_label_idx,
+                                        pd_label_idx,
+                                        example_idx,
+                                    )
+                                ].tolist()
+                            ),
+                            "prediction": tuple(
+                                self.prediction_examples[
+                                    datum_idx(
+                                        gt_label_idx,
+                                        pd_label_idx,
+                                        example_idx,
+                                    )
+                                ][
+                                    prediction_idx(
+                                        gt_label_idx,
+                                        pd_label_idx,
+                                        example_idx,
+                                    )
+                                ].tolist()
+                            ),
+                            "score": score_idx(
+                                gt_label_idx, pd_label_idx, example_idx
+                            ),
+                        }
+                        for example_idx in range(number_of_examples)
+                        if datum_idx(gt_label_idx, pd_label_idx, example_idx)
+                        >= 0
+                    ],
+                }
+                for pd_label_idx in range(number_of_labels)
+                if (
+                    self.label_index_to_label_key_index[pd_label_idx]
+                    == label_key_idx
+                )
+            }
+            for gt_label_idx in range(number_of_labels)
+            if (
+                self.label_index_to_label_key_index[gt_label_idx]
+                == label_key_idx
+            )
+        }
+    def _unpack_hallucinations(
         self,
+        hallucinations: NDArray[np.floating],
+        label_key_idx: int,
+        number_of_labels: int,
+        number_of_examples: int,
+    ) -> dict[
+        str,
+        dict[
+            str,
+            int
+            | list[dict[str, str | float | tuple[float, float, float, float]]],
+        ],
+    ]:
+        """
+        Unpacks a numpy array of hallucination counts and examples.
+        """
+        datum_idx = (
+            lambda pd_label_idx, example_idx: int(  # noqa: E731 - lambda fn
+                hallucinations[
+                    pd_label_idx,
+                    example_idx * 3 + 1,
+                ]
+            )
+        )
+        prediction_idx = (
+            lambda pd_label_idx, example_idx: int(  # noqa: E731 - lambda fn
+                hallucinations[
+                    pd_label_idx,
+                    example_idx * 3 + 2,
+                ]
+            )
+        )
+        score_idx = (
+            lambda pd_label_idx, example_idx: float(  # noqa: E731 - lambda fn
+                hallucinations[
+                    pd_label_idx,
+                    example_idx * 3 + 3,
+                ]
+            )
+        )
+        return {
+            self.index_to_label[pd_label_idx][1]: {
+                "count": max(
+                    int(hallucinations[pd_label_idx, 0]),
+                    0,
+                ),
+                "examples": [
+                    {
+                        "datum": self.index_to_uid[
+                            datum_idx(pd_label_idx, example_idx)
+                        ],
+                        "prediction": tuple(
+                            self.prediction_examples[
+                                datum_idx(pd_label_idx, example_idx)
+                            ][
+                                prediction_idx(pd_label_idx, example_idx)
+                            ].tolist()
+                        ),
+                        "score": score_idx(pd_label_idx, example_idx),
+                    }
+                    for example_idx in range(number_of_examples)
+                    if datum_idx(pd_label_idx, example_idx) >= 0
+                ],
+            }
+            for pd_label_idx in range(number_of_labels)
+            if (
+                self.label_index_to_label_key_index[pd_label_idx]
+                == label_key_idx
+            )
+        }
+    def _unpack_missing_predictions(
+        self,
+        missing_predictions: NDArray[np.int32],
+        label_key_idx: int,
+        number_of_labels: int,
+        number_of_examples: int,
+    ) -> dict[
+        str,
+        dict[
+            str,
+            int | list[dict[str, str | tuple[float, float, float, float]]],
+        ],
+    ]:
+        """
+        Unpacks a numpy array of missing prediction counts and examples.
+        """
+        datum_idx = (
+            lambda gt_label_idx, example_idx: int(  # noqa: E731 - lambda fn
+                missing_predictions[
+                    gt_label_idx,
+                    example_idx * 2 + 1,
+                ]
+            )
+        )
+        groundtruth_idx = (
+            lambda gt_label_idx, example_idx: int(  # noqa: E731 - lambda fn
+                missing_predictions[
+                    gt_label_idx,
+                    example_idx * 2 + 2,
+                ]
+            )
+        )
+        return {
+            self.index_to_label[gt_label_idx][1]: {
+                "count": max(
+                    int(missing_predictions[gt_label_idx, 0]),
+                    0,
+                ),
+                "examples": [
+                    {
+                        "datum": self.index_to_uid[
+                            datum_idx(gt_label_idx, example_idx)
+                        ],
+                        "groundtruth": tuple(
+                            self.groundtruth_examples[
+                                datum_idx(gt_label_idx, example_idx)
+                            ][
+                                groundtruth_idx(gt_label_idx, example_idx)
+                            ].tolist()
+                        ),
+                    }
+                    for example_idx in range(number_of_examples)
+                    if datum_idx(gt_label_idx, example_idx) >= 0
+                ],
+            }
+            for gt_label_idx in range(number_of_labels)
+            if (
+                self.label_index_to_label_key_index[gt_label_idx]
+                == label_key_idx
+            )
+        }
+    def _compute_confusion_matrix(
+        self,
+        data: NDArray[np.floating],
+        label_metadata: NDArray[np.int32],
         iou_thresholds: list[float] = [0.5],
         score_thresholds: list[float] = [
             score / 10.0 for score in range(1, 11)
         ],
-        n_samples: int = 0,
-    ) -> list[DetailedCounts]:
+        number_of_examples: int = 0,
+    ) -> list[ConfusionMatrix]:
         """
         Computes detailed counting metrics.
@@ -557,132 +845,63 @@ class Evaluator:
             List of IoU thresholds to compute metrics for.
         score_thresholds : list[float], default=[0.1,0.2,...,1.0]
             List of confidence thresholds to compute metrics for.
-        n_samples : int, default=0
-            Number of datum samples to return per metric.
+        number_of_examples : int, default=0
+            Maximum number of annotation examples to return per metric.
         Returns
         -------
-        list[list[DetailedCounts]]
+        list[list[ConfusionMatrix]]
             Outer list is indexed by label, inner list is by IoU.
         """
-        if self._detailed_pairs.size == 0:
+        if data.size == 0:
             return list()
-        metrics = compute_detailed_counts(
-            data=self._detailed_pairs,
-            label_metadata=self._label_metadata,
+        (
+            confusion_matrix,
+            hallucinations,
+            missing_predictions,
+        ) = compute_confusion_matrix(
+            data=data,
+            label_metadata=label_metadata,
             iou_thresholds=np.array(iou_thresholds),
             score_thresholds=np.array(score_thresholds),
-            n_samples=n_samples,
+            n_examples=number_of_examples,
         )
-        tp_idx = 0
-        fp_misclf_idx = 2 * n_samples + 1
-        fp_halluc_idx = 4 * n_samples + 2
-        fn_misclf_idx = 6 * n_samples + 3
-        fn_misprd_idx = 8 * n_samples + 4
-        def _unpack_examples(
-            iou_idx: int,
-            label_idx: int,
-            type_idx: int,
-            example_source: dict[int, NDArray[np.float16]],
-        ) -> list[list[tuple[str, tuple[float, float, float, float]]]]:
-            """
-            Unpacks metric examples from computation.
-            """
-            type_idx += 1
-            results = list()
-            for score_idx in range(n_scores):
-                examples = list()
-                for example_idx in range(n_samples):
-                    datum_idx = metrics[
-                        iou_idx,
-                        score_idx,
-                        label_idx,
-                        type_idx + example_idx * 2,
-                    ]
-                    annotation_idx = metrics[
-                        iou_idx,
-                        score_idx,
-                        label_idx,
-                        type_idx + example_idx * 2 + 1,
-                    ]
-                    if datum_idx >= 0:
-                        examples.append(
-                            (
-                                self.index_to_uid[datum_idx],
-                                tuple(
-                                    example_source[datum_idx][
-                                        annotation_idx
-                                    ].tolist()
-                                ),
-                            )
-                        )
-                results.append(examples)
-            return results
-        n_ious, n_scores, n_labels, _ = metrics.shape
+        n_ious, n_scores, n_labels, _, _ = confusion_matrix.shape
         return [
-            DetailedCounts(
+            ConfusionMatrix(
                 iou_threshold=iou_thresholds[iou_idx],
-                label=self.index_to_label[label_idx],
-                score_thresholds=score_thresholds,
-                tp=metrics[iou_idx, :, label_idx, tp_idx].astype(int).tolist(),
-                fp_misclassification=metrics[
-                    iou_idx, :, label_idx, fp_misclf_idx
-                ]
-                .astype(int)
-                .tolist(),
-                fp_hallucination=metrics[iou_idx, :, label_idx, fp_halluc_idx]
-                .astype(int)
-                .tolist(),
-                fn_misclassification=metrics[
-                    iou_idx, :, label_idx, fn_misclf_idx
-                ]
-                .astype(int)
-                .tolist(),
-                fn_missing_prediction=metrics[
-                    iou_idx, :, label_idx, fn_misprd_idx
-                ]
-                .astype(int)
-                .tolist(),
-                tp_examples=_unpack_examples(
-                    iou_idx=iou_idx,
-                    label_idx=label_idx,
-                    type_idx=tp_idx,
-                    example_source=self.prediction_examples,
-                ),
-                fp_misclassification_examples=_unpack_examples(
-                    iou_idx=iou_idx,
-                    label_idx=label_idx,
-                    type_idx=fp_misclf_idx,
-                    example_source=self.prediction_examples,
-                ),
-                fp_hallucination_examples=_unpack_examples(
-                    iou_idx=iou_idx,
-                    label_idx=label_idx,
-                    type_idx=fp_halluc_idx,
-                    example_source=self.prediction_examples,
+                score_threshold=score_thresholds[score_idx],
+                label_key=label_key,
+                number_of_examples=number_of_examples,
+                confusion_matrix=self._unpack_confusion_matrix(
+                    confusion_matrix=confusion_matrix[
+                        iou_idx, score_idx, :, :, :
+                    ],
+                    label_key_idx=label_key_idx,
+                    number_of_labels=n_labels,
+                    number_of_examples=number_of_examples,
                 ),
-                fn_misclassification_examples=_unpack_examples(
-                    iou_idx=iou_idx,
-                    label_idx=label_idx,
-                    type_idx=fn_misclf_idx,
-                    example_source=self.groundtruth_examples,
+                hallucinations=self._unpack_hallucinations(
+                    hallucinations=hallucinations[iou_idx, score_idx, :, :],
+                    label_key_idx=label_key_idx,
+                    number_of_labels=n_labels,
+                    number_of_examples=number_of_examples,
                 ),
-                fn_missing_prediction_examples=_unpack_examples(
-                    iou_idx=iou_idx,
-                    label_idx=label_idx,
-                    type_idx=fn_misprd_idx,
-                    example_source=self.groundtruth_examples,
+                missing_predictions=self._unpack_missing_predictions(
+                    missing_predictions=missing_predictions[
+                        iou_idx, score_idx, :, :
+                    ],
+                    label_key_idx=label_key_idx,
+                    number_of_labels=n_labels,
+                    number_of_examples=number_of_examples,
                 ),
             )
-            for label_idx in range(n_labels)
+            for label_key_idx, label_key in self.index_to_label_key.items()
             for iou_idx in range(n_ious)
+            for score_idx in range(n_scores)
         ]

valor_lite/detection/metric.py CHANGED Viewed

@@ -19,7 +19,7 @@ class MetricType(str, Enum):
     ARAveragedOverScores = "ARAveragedOverScores"
     mARAveragedOverScores = "mARAveragedOverScores"
     PrecisionRecallCurve = "PrecisionRecallCurve"
-    DetailedCounts = "DetailedCounts"
+    ConfusionMatrix = "ConfusionMatrix"
     @classmethod
     def base_metrics(cls):
@@ -329,52 +329,78 @@ class PrecisionRecallCurve:
 @dataclass
-class DetailedCounts:
-    tp: list[int]
-    fp_misclassification: list[int]
-    fp_hallucination: list[int]
-    fn_misclassification: list[int]
-    fn_missing_prediction: list[int]
-    tp_examples: list[list[tuple[str, tuple[float, float, float, float]]]]
-    fp_misclassification_examples: list[
-        list[tuple[str, tuple[float, float, float, float]]]
+class ConfusionMatrix:
+    confusion_matrix: dict[
+        str,  # ground truth label value
+        dict[
+            str,  # prediction label value
+            dict[
+                str,  # either `count` or `examples`
+                int
+                | list[
+                    dict[
+                        str,  # either `datum`, `groundtruth`, `prediction` or score
+                        str  # datum uid
+                        | tuple[
+                            float, float, float, float
+                        ]  # bounding box (xmin, xmax, ymin, ymax)
+                        | float,  # prediction score
+                    ]
+                ],
+            ],
+        ],
     ]
-    fp_hallucination_examples: list[
-        list[tuple[str, tuple[float, float, float, float]]]
+    hallucinations: dict[
+        str,  # prediction label value
+        dict[
+            str,  # either `count` or `examples`
+            int
+            | list[
+                dict[
+                    str,  # either `datum`, `prediction` or score
+                    str  # datum uid
+                    | float  # prediction score
+                    | tuple[
+                        float, float, float, float
+                    ],  # bounding box (xmin, xmax, ymin, ymax)
+                ]
+            ],
+        ],
     ]
-    fn_misclassification_examples: list[
-        list[tuple[str, tuple[float, float, float, float]]]
+    missing_predictions: dict[
+        str,  # ground truth label value
+        dict[
+            str,  # either `count` or `examples`
+            int
+            | list[
+                dict[
+                    str,  # either `datum` or `groundtruth`
+                    str  # datum uid
+                    | tuple[
+                        float, float, float, float
+                    ],  # bounding box (xmin, xmax, ymin, ymax)
+                ]
+            ],
+        ],
     ]
-    fn_missing_prediction_examples: list[
-        list[tuple[str, tuple[float, float, float, float]]]
-    ]
-    score_thresholds: list[float]
+    score_threshold: float
     iou_threshold: float
-    label: tuple[str, str]
+    label_key: str
+    number_of_examples: int
     @property
     def metric(self) -> Metric:
         return Metric(
             type=type(self).__name__,
             value={
-                "tp": self.tp,
-                "fp_misclassification": self.fp_misclassification,
-                "fp_hallucination": self.fp_hallucination,
-                "fn_misclassification": self.fn_misclassification,
-                "fn_missing_prediction": self.fn_missing_prediction,
-                "tp_examples": self.tp_examples,
-                "fp_misclassification_examples": self.fp_misclassification_examples,
-                "fp_hallucination_examples": self.fp_hallucination_examples,
-                "fn_misclassification_examples": self.fn_misclassification_examples,
-                "fn_missing_prediction_examples": self.fn_missing_prediction_examples,
+                "confusion_matrix": self.confusion_matrix,
+                "hallucinations": self.hallucinations,
+                "missing_predictions": self.missing_predictions,
             },
             parameters={
-                "score_thresholds": self.score_thresholds,
+                "score_threshold": self.score_threshold,
                 "iou_threshold": self.iou_threshold,
-                "label": {
-                    "key": self.label[0],
-                    "value": self.label[1],
-                },
+                "label_key": self.label_key,
             },
         )

{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: valor-lite
-Version: 0.33.4
+Version: 0.33.5
 Summary: Compute valor metrics directly in your client.
 License: MIT License

valor_lite-0.33.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
+valor_lite/detection/__init__.py,sha256=PiKfemo8FkZRzBhPSjhil8ahGURLy0Vk_iV25CB4UBU,1139
+valor_lite/detection/annotation.py,sha256=BspLc3SjWXj6qYlGGpzDPHEZ8j7CiFzIL5cNlk0WCAM,2732
+valor_lite/detection/computation.py,sha256=HDFfPTFQN2obm-g570KKDf7SP9V-h09OyMtFEJXsoTA,26323
+valor_lite/detection/manager.py,sha256=ld2ytAw96UOO25iTwnfvAI1D2UY2Z1AGmP7cyCrT-V4,52801
+valor_lite/detection/metric.py,sha256=RYKN17nEFRIZIqmotQa6OyNnU0nkjXyfFIclX_5hGpY,9933
+valor_lite-0.33.5.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
+valor_lite-0.33.5.dist-info/METADATA,sha256=WL0LQR2fT4CO4MuV0aXIkLPt3zQW2SsBS4MwcA_kHJY,1865
+valor_lite-0.33.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+valor_lite-0.33.5.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
+valor_lite-0.33.5.dist-info/RECORD,,

valor_lite-0.33.4.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-valor_lite/schemas.py,sha256=r4cC10w1xYsA785KmGE4ePeOX3wzEs846vT7QAiVg_I,293
-valor_lite/detection/__init__.py,sha256=taEB7NQBsyCSsMtvDA7E_FhDxMfJB1rax-Rl1ZtRMoE,1017
-valor_lite/detection/annotation.py,sha256=BspLc3SjWXj6qYlGGpzDPHEZ8j7CiFzIL5cNlk0WCAM,2732
-valor_lite/detection/computation.py,sha256=AsF9zb_c7XQ7z3LfOAtMPZDkmuCZmB8HeAMZJlCaO6U,24696
-valor_lite/detection/manager.py,sha256=vnouYdx_Ul9jz_pOYt8xfvdPrNy0S4SB838KXvtS1Bw,45301
-valor_lite/detection/metric.py,sha256=DLqpODJZOG7SCqt7TCgR4am68PQORRCIQW_SXiTb1IA,9473
-valor_lite-0.33.4.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
-valor_lite-0.33.4.dist-info/METADATA,sha256=Eqb7KlTizDcjIV7eWM67zgdbbbVICGURdGrbben2NrI,1865
-valor_lite-0.33.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-valor_lite-0.33.4.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
-valor_lite-0.33.4.dist-info/RECORD,,

{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{valor_lite-0.33.4.dist-info → valor_lite-0.33.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

valor-lite 0.33.4__py3-none-any.whl → 0.33.5__py3-none-any.whl

valor-lite 0.33.4py3-none-any.whl → 0.33.5py3-none-any.whl