PyPI - valor-lite - Versions diffs - 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl - Mend

valor-lite 0.36.5py3-none-any.whl → 0.37.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

valor_lite/cache/__init__.py +11 -0
valor_lite/cache/compute.py +211 -0
valor_lite/cache/ephemeral.py +302 -0
valor_lite/cache/persistent.py +536 -0
valor_lite/classification/__init__.py +5 -10
valor_lite/classification/annotation.py +4 -0
valor_lite/classification/computation.py +233 -251
valor_lite/classification/evaluator.py +882 -0
valor_lite/classification/loader.py +97 -0
valor_lite/classification/metric.py +141 -4
valor_lite/classification/shared.py +184 -0
valor_lite/classification/utilities.py +221 -118
valor_lite/exceptions.py +5 -0
valor_lite/object_detection/__init__.py +5 -4
valor_lite/object_detection/annotation.py +13 -1
valor_lite/object_detection/computation.py +367 -304
valor_lite/object_detection/evaluator.py +804 -0
valor_lite/object_detection/loader.py +292 -0
valor_lite/object_detection/metric.py +152 -3
valor_lite/object_detection/shared.py +206 -0
valor_lite/object_detection/utilities.py +182 -109
valor_lite/semantic_segmentation/__init__.py +5 -4
valor_lite/semantic_segmentation/annotation.py +7 -0
valor_lite/semantic_segmentation/computation.py +20 -110
valor_lite/semantic_segmentation/evaluator.py +414 -0
valor_lite/semantic_segmentation/loader.py +205 -0
valor_lite/semantic_segmentation/shared.py +149 -0
valor_lite/semantic_segmentation/utilities.py +6 -23
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
valor_lite-0.37.5.dist-info/RECORD +49 -0
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
valor_lite/classification/manager.py +0 -545
valor_lite/object_detection/manager.py +0 -865
valor_lite/profiling.py +0 -374
valor_lite/semantic_segmentation/benchmark.py +0 -237
valor_lite/semantic_segmentation/manager.py +0 -446
valor_lite-0.36.5.dist-info/RECORD +0 -41
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0

valor_lite/object_detection/computation.py CHANGED Viewed

@@ -1,9 +1,12 @@
 from enum import IntFlag, auto
 import numpy as np
+import pyarrow as pa
 import shapely
 from numpy.typing import NDArray
+EPSILON = 1e-9
 def compute_bbox_iou(data: NDArray[np.float64]) -> NDArray[np.float64]:
     """
@@ -70,7 +73,7 @@ def compute_bbox_iou(data: NDArray[np.float64]) -> NDArray[np.float64]:
     np.divide(
         intersection_area,
         union_area,
-        where=union_area >= 1e-9,
+        where=union_area >= EPSILON,
         out=ious,
     )
     return ious
@@ -117,7 +120,7 @@ def compute_bitmask_iou(data: NDArray[np.bool_]) -> NDArray[np.float64]:
     np.divide(
         intersection_,
         union_,
-        where=union_ >= 1e-9,
+        where=union_ >= EPSILON,
         out=ious,
     )
     return ious
@@ -167,291 +170,236 @@ def compute_polygon_iou(
     np.divide(
         intersection_areas,
         union_areas,
-        where=union_areas >= 1e-9,
+        where=union_areas >= EPSILON,
         out=ious,
     )
     return ious
-def compute_label_metadata(
-    ids: NDArray[np.int32],
-    n_labels: int,
-) -> NDArray[np.int32]:
+def rank_pairs(
+    sorted_pairs: NDArray[np.float64],
+) -> tuple[NDArray[np.float64], NDArray[np.intp]]:
     """
-    Computes label metadata returning a count of annotations per label.
+    Prunes and ranks prediction pairs.
+    Should result in a single pair per prediction annotation.
     Parameters
     ----------
-    detailed_pairs : NDArray[np.int32]
-        Detailed annotation pairings with shape (N, 7).
-            Index 0 - Datum Index
-            Index 1 - GroundTruth Index
-            Index 2 - Prediction Index
-            Index 3 - GroundTruth Label Index
-            Index 4 - Prediction Label Index
-    n_labels : int
-        The total number of unique labels.
+    sorted_pairs : NDArray[np.float64]
+        Ranked annotation pairs.
+        Index 0 - Datum Index
+        Index 1 - GroundTruth Index
+        Index 2 - Prediction Index
+        Index 3 - GroundTruth Label Index
+        Index 4 - Prediction Label Index
+        Index 5 - IOU
+        Index 6 - Score
     Returns
     -------
-    NDArray[np.int32]
-        The label metadata array with shape (n_labels, 2).
-            Index 0 - Ground truth label count
-            Index 1 - Prediction label count
+    NDArray[float64]
+        Ranked prediction pairs.
+    NDArray[intp]
+        Indices of ranked prediction pairs.
     """
-    label_metadata = np.zeros((n_labels, 2), dtype=np.int32)
-    ground_truth_pairs = ids[:, (0, 1, 3)]
-    ground_truth_pairs = ground_truth_pairs[ground_truth_pairs[:, 1] >= 0]
-    unique_pairs = np.unique(ground_truth_pairs, axis=0)
-    label_indices, unique_counts = np.unique(
-        unique_pairs[:, 2], return_counts=True
-    )
-    label_metadata[label_indices.astype(np.int32), 0] = unique_counts
-    prediction_pairs = ids[:, (0, 2, 4)]
-    prediction_pairs = prediction_pairs[prediction_pairs[:, 1] >= 0]
-    unique_pairs = np.unique(prediction_pairs, axis=0)
-    label_indices, unique_counts = np.unique(
-        unique_pairs[:, 2], return_counts=True
-    )
-    label_metadata[label_indices.astype(np.int32), 1] = unique_counts
-    return label_metadata
+    # remove unmatched ground truths
+    mask_predictions = sorted_pairs[:, 2] >= 0.0
+    pairs = sorted_pairs[mask_predictions]
+    indices = np.where(mask_predictions)[0]
+    # find best fits for prediction
+    mask_label_match = np.isclose(pairs[:, 3], pairs[:, 4])
+    matched_predictions = np.unique(pairs[mask_label_match, 2])
-def filter_cache(
-    detailed_pairs: NDArray[np.float64],
-    mask_datums: NDArray[np.bool_],
-    mask_predictions: NDArray[np.bool_],
-    mask_ground_truths: NDArray[np.bool_],
-    n_labels: int,
-) -> tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.int32],]:
-    """
-    Performs filtering on a detailed cache.
+    mask_unmatched_predictions = ~np.isin(pairs[:, 2], matched_predictions)
-    Parameters
-    ----------
-    detailed_pairs : NDArray[float64]
-        A list of sorted detailed pairs with size (N, 7).
-    mask_datums : NDArray[bool]
-        A boolean mask with size (N,).
-    mask_ground_truths : NDArray[bool]
-        A boolean mask with size (N,).
-    mask_predictions : NDArray[bool]
-        A boolean mask with size (N,).
-    n_labels : int
-        The total number of unique labels.
+    pairs = pairs[mask_label_match | mask_unmatched_predictions]
+    indices = indices[mask_label_match | mask_unmatched_predictions]
-    Returns
-    -------
-    NDArray[float64]
-        Filtered detailed pairs.
-    NDArray[float64]
-        Filtered ranked pairs.
-    NDArray[int32]
-        Label metadata.
-    """
-    # filter datums
-    detailed_pairs = detailed_pairs[mask_datums].copy()
-    # filter ground truths
-    if mask_ground_truths.any():
-        invalid_groundtruth_indices = np.where(mask_ground_truths)[0]
-        detailed_pairs[
-            invalid_groundtruth_indices[:, None], (1, 3, 5)
-        ] = np.array([[-1, -1, 0]])
-    # filter predictions
-    if mask_predictions.any():
-        invalid_prediction_indices = np.where(mask_predictions)[0]
-        detailed_pairs[
-            invalid_prediction_indices[:, None], (2, 4, 5, 6)
-        ] = np.array([[-1, -1, 0, -1]])
-    # filter null pairs
-    mask_null_pairs = np.all(
-        np.isclose(
-            detailed_pairs[:, 1:5],
-            np.array([-1.0, -1.0, -1.0, -1.0]),
-        ),
-        axis=1,
+    # only keep the highest ranked prediction (datum_id, prediction_id, predicted_label_id)
+    _, unique_indices = np.unique(
+        pairs[:, [0, 2, 4]], axis=0, return_index=True
     )
-    detailed_pairs = detailed_pairs[~mask_null_pairs]
+    pairs = pairs[unique_indices]
+    indices = indices[unique_indices]
-    # sorts by score, iou with ground truth id as a tie-breaker
-    indices = np.lexsort(
+    # np.unique orders its results by value, we need to sort the indices to maintain the results of the lexsort
+    sorted_indices = np.lexsort(
         (
-            detailed_pairs[:, 1],  # ground truth id
-            -detailed_pairs[:, 5],  # iou
-            -detailed_pairs[:, 6],  # score
+            -pairs[:, 5],  # iou
+            -pairs[:, 6],  # score
         )
     )
-    detailed_pairs = detailed_pairs[indices]
-    label_metadata = compute_label_metadata(
-        ids=detailed_pairs[:, :5].astype(np.int32),
-        n_labels=n_labels,
-    )
-    ranked_pairs = rank_pairs(
-        detailed_pairs=detailed_pairs,
-        label_metadata=label_metadata,
-    )
-    return (
-        detailed_pairs,
-        ranked_pairs,
-        label_metadata,
-    )
+    pairs = pairs[sorted_indices]
+    indices = indices[sorted_indices]
+    return pairs, indices
-def rank_pairs(
-    detailed_pairs: NDArray[np.float64],
-    label_metadata: NDArray[np.int32],
+def calculate_ranking_boundaries(
+    ranked_pairs: NDArray[np.float64],
 ) -> NDArray[np.float64]:
     """
-    Highly optimized pair ranking for computing precision and recall based metrics.
-    Only ground truths and predictions that provide unique information are kept. The unkept
-    pairs are represented via the label metadata array.
+    Determine IOU boundaries for computing AP across chunks.
     Parameters
     ----------
-    detailed_pairs : NDArray[np.float64]
-        Detailed annotation pairs with shape (n_pairs, 7).
-            Index 0 - Datum Index
-            Index 1 - GroundTruth Index
-            Index 2 - Prediction Index
-            Index 3 - GroundTruth Label Index
-            Index 4 - Prediction Label Index
-            Index 5 - IOU
-            Index 6 - Score
-    label_metadata : NDArray[np.int32]
-        Array containing label counts with shape (n_labels, 2)
-            Index 0 - Ground truth label count
-            Index 1 - Prediction label count
+    ranked_pairs : NDArray[np.float64]
+        Ranked annotation pairs.
+        Index 0 - Datum Index
+        Index 1 - GroundTruth Index
+        Index 2 - Prediction Index
+        Index 3 - GroundTruth Label Index
+        Index 4 - Prediction Label Index
+        Index 5 - IOU
+        Index 6 - Score
     Returns
     -------
     NDArray[np.float64]
-        Array of ranked pairs for precision-recall metric computation.
+        A 1-D array containing the lower IOU boundary for classifying pairs as true-positive across chunks.
     """
-    pairs = detailed_pairs
-    # remove null predictions
-    pairs = pairs[pairs[:, 2] >= 0.0]
-    # find best fits for prediction
-    mask_label_match = np.isclose(pairs[:, 3], pairs[:, 4])
-    matched_predictions = np.unique(pairs[mask_label_match, 2])
-    mask_unmatched_predictions = ~np.isin(pairs[:, 2], matched_predictions)
-    pairs = pairs[mask_label_match | mask_unmatched_predictions]
+    ids = ranked_pairs[:, (0, 1, 2, 3, 4)].astype(np.int64)
+    gts = ids[:, (0, 1, 3)]
+    gt_labels = ids[:, 3]
+    pd_labels = ids[:, 4]
+    ious = ranked_pairs[:, 5]
-    # remove predictions for labels that have no ground truths
-    for label_idx, count in enumerate(label_metadata[:, 0]):
-        if count > 0:
+    # set default boundary to 2.0 as it will be used to check lower boundary in range [0-1].
+    iou_boundary = np.ones_like(ious) * 2
+    mask_matching_labels = gt_labels == pd_labels
+    mask_valid_gts = gts[:, 1] >= 0
+    unique_gts = np.unique(gts[mask_valid_gts], axis=0)
+    for gt in unique_gts:
+        mask_gt = (gts == gt).all(axis=1)
+        mask_gt &= mask_matching_labels
+        if mask_gt.sum() <= 1:
+            iou_boundary[mask_gt] = 0.0
             continue
-        pairs = pairs[pairs[:, 4] != label_idx]
-    # only keep the highest ranked pair
-    _, indices = np.unique(pairs[:, [0, 2, 4]], axis=0, return_index=True)
-    pairs = pairs[indices]
+        running_max = np.maximum.accumulate(ious[mask_gt])
+        mask_rmax = np.isclose(running_max, ious[mask_gt])
+        mask_rmax[1:] &= running_max[1:] > running_max[:-1]
+        mask_gt[mask_gt] &= mask_rmax
+        indices = np.where(mask_gt)[0]
+        iou_boundary[indices[0]] = 0.0
+        iou_boundary[indices[1:]] = ious[indices[:-1]]
+    return iou_boundary
+def rank_table(tbl: pa.Table) -> pa.Table:
+    """Rank table for AP computation."""
+    numeric_columns = [
+        "datum_id",
+        "gt_id",
+        "pd_id",
+        "gt_label_id",
+        "pd_label_id",
+        "iou",
+        "pd_score",
+    ]
+    sorting_args = [
+        ("pd_score", "descending"),
+        ("iou", "descending"),
+    ]
+    # initial sort
+    sorted_tbl = tbl.sort_by(sorting_args)
+    pairs = np.column_stack(
+        [sorted_tbl[col].to_numpy() for col in numeric_columns]
+    )
-    # np.unique orders its results by value, we need to sort the indices to maintain the results of the lexsort
-    indices = np.lexsort(
-        (
-            -pairs[:, 5],  # iou
-            -pairs[:, 6],  # score
-        )
+    # rank pairs
+    ranked_pairs, indices = rank_pairs(pairs)
+    ranked_tbl = sorted_tbl.take(indices)
+    # find boundaries
+    lower_iou_bound = calculate_ranking_boundaries(ranked_pairs)
+    ranked_tbl = ranked_tbl.append_column(
+        pa.field("iou_prev", pa.float64()),
+        pa.array(lower_iou_bound, type=pa.float64()),
     )
-    pairs = pairs[indices]
-    return pairs
+    return ranked_tbl
-def compute_precion_recall(
+def compute_counts(
     ranked_pairs: NDArray[np.float64],
-    label_metadata: NDArray[np.int32],
     iou_thresholds: NDArray[np.float64],
     score_thresholds: NDArray[np.float64],
-) -> tuple[
-    tuple[
-        NDArray[np.float64],
-        NDArray[np.float64],
-    ],
-    tuple[
-        NDArray[np.float64],
-        NDArray[np.float64],
-    ],
-    NDArray[np.float64],
-    NDArray[np.float64],
-]:
+    number_of_groundtruths_per_label: NDArray[np.uint64],
+    number_of_labels: int,
+    running_counts: NDArray[np.uint64],
+    pr_curve: NDArray[np.float64],
+) -> NDArray[np.uint64]:
     """
     Computes Object Detection metrics.
-    Takes data with shape (N, 7):
-    Index 0 - Datum Index
-    Index 1 - GroundTruth Index
-    Index 2 - Prediction Index
-    Index 3 - IOU
-    Index 4 - GroundTruth Label Index
-    Index 5 - Prediction Label Index
-    Index 6 - Score
+    Precision-recall curve and running counts are updated in-place.
     Parameters
     ----------
     ranked_pairs : NDArray[np.float64]
         A ranked array summarizing the IOU calculations of one or more pairs.
-    label_metadata : NDArray[np.int32]
-        An array containing metadata related to labels.
+        Index 0 - Datum Index
+        Index 1 - GroundTruth Index
+        Index 2 - Prediction Index
+        Index 3 - GroundTruth Label Index
+        Index 4 - Prediction Label Index
+        Index 5 - IOU
+        Index 6 - Score
+        Index 7 - IOU Lower Boundary
     iou_thresholds : NDArray[np.float64]
         A 1-D array containing IOU thresholds.
     score_thresholds : NDArray[np.float64]
         A 1-D array containing score thresholds.
+    number_of_groundtruths_per_label : NDArray[np.uint64]
+        A 1-D array containing total number of ground truths per label.
+    number_of_labels : int
+        Total number of unique labels.
+    running_counts : NDArray[np.uint64]
+        A 2-D array containing running counts of total predictions and true-positive. This array is mutated.
+    pr_curve : NDArray[np.float64]
+        A 2-D array containing 101-point binning of precision and score over a fixed recall interval. This array is mutated.
     Returns
     -------
-    tuple[NDArray[np.float64], NDArray[np.float64]]
-        Average Precision results (AP, mAP).
-    tuple[NDArray[np.float64], NDArray[np.float64]]
-        Average Recall results (AR, mAR).
-    NDArray[np.float64]
-        Precision, Recall, TP, FP, FN, F1 Score.
-    NDArray[np.float64]
-        Interpolated Precision-Recall Curves.
+    NDArray[uint64]
+        Batched counts of TP, FP, FN.
     """
     n_rows = ranked_pairs.shape[0]
-    n_labels = label_metadata.shape[0]
+    n_labels = number_of_labels
     n_ious = iou_thresholds.shape[0]
     n_scores = score_thresholds.shape[0]
     # initialize result arrays
-    average_precision = np.zeros((n_ious, n_labels), dtype=np.float64)
-    mAP = np.zeros(n_ious, dtype=np.float64)
-    average_recall = np.zeros((n_scores, n_labels), dtype=np.float64)
-    mAR = np.zeros(n_scores, dtype=np.float64)
-    counts = np.zeros((n_ious, n_scores, n_labels, 6), dtype=np.float64)
-    pr_curve = np.zeros((n_ious, n_labels, 101, 2))
+    counts = np.zeros((n_ious, n_scores, 3, n_labels), dtype=np.uint64)
     # start computation
-    ids = ranked_pairs[:, :5].astype(np.int32)
+    ids = ranked_pairs[:, :5].astype(np.int64)
     gt_ids = ids[:, 1]
     gt_labels = ids[:, 3]
     pd_labels = ids[:, 4]
     ious = ranked_pairs[:, 5]
     scores = ranked_pairs[:, 6]
+    prev_ious = ranked_pairs[:, 7]
+    unique_pd_labels, _ = np.unique(pd_labels, return_index=True)
-    unique_pd_labels, unique_pd_indices = np.unique(
-        pd_labels, return_index=True
-    )
-    gt_count = label_metadata[:, 0]
     running_total_count = np.zeros(
         (n_ious, n_rows),
-        dtype=np.float64,
+        dtype=np.uint64,
     )
     running_tp_count = np.zeros_like(running_total_count)
-    running_gt_count = np.zeros_like(running_total_count)
+    running_gt_count = number_of_groundtruths_per_label[pd_labels]
-    mask_score_nonzero = scores > 1e-9
+    mask_score_nonzero = scores > EPSILON
     mask_gt_exists = gt_ids >= 0.0
     mask_labels_match = np.isclose(gt_labels, pd_labels)
@@ -459,23 +407,22 @@ def compute_precion_recall(
     mask_tp = mask_score_nonzero & mask_gt_exists_labels_match
     mask_fp = mask_score_nonzero
-    mask_fn = mask_gt_exists_labels_match
     for iou_idx in range(n_ious):
-        mask_iou = ious >= iou_thresholds[iou_idx]
+        mask_iou_curr = ious >= iou_thresholds[iou_idx]
+        mask_iou_prev = prev_ious < iou_thresholds[iou_idx]
+        mask_iou = mask_iou_curr & mask_iou_prev
         mask_tp_outer = mask_tp & mask_iou
         mask_fp_outer = mask_fp & (
             (~mask_gt_exists_labels_match & mask_iou) | ~mask_iou
         )
-        mask_fn_outer = mask_fn & mask_iou
         for score_idx in range(n_scores):
             mask_score_thresh = scores >= score_thresholds[score_idx]
             mask_tp_inner = mask_tp_outer & mask_score_thresh
             mask_fp_inner = mask_fp_outer & mask_score_thresh
-            mask_fn_inner = mask_fn_outer & ~mask_score_thresh
             # create true-positive mask score threshold
             tp_candidates = ids[mask_tp_inner]
@@ -491,108 +438,150 @@ def compute_precion_recall(
             mask_fp_inner |= mask_tp_inner & ~true_positives_mask
             # calculate intermediates
-            tp_count = np.bincount(
+            counts[iou_idx, score_idx, 0, :] = np.bincount(
                 pd_labels,
                 weights=true_positives_mask,
                 minlength=n_labels,
-            ).astype(np.float64)
-            fp_count = np.bincount(
+            )
+            # fp count
+            counts[iou_idx, score_idx, 1, :] = np.bincount(
                 pd_labels[mask_fp_inner],
                 minlength=n_labels,
-            ).astype(np.float64)
-            fn_count = np.bincount(
-                pd_labels[mask_fn_inner],
-                minlength=n_labels,
             )
-            fn_count = gt_count - tp_count
-            tp_fp_count = tp_count + fp_count
-            # calculate component metrics
-            recall = np.zeros_like(tp_count)
-            np.divide(tp_count, gt_count, where=gt_count > 1e-9, out=recall)
-            precision = np.zeros_like(tp_count)
-            np.divide(
-                tp_count, tp_fp_count, where=tp_fp_count > 1e-9, out=precision
-            )
-            f1_score = np.zeros_like(precision)
-            np.divide(
-                2 * np.multiply(precision, recall),
-                (precision + recall),
-                where=(precision + recall) > 1e-9,
-                out=f1_score,
-                dtype=np.float64,
-            )
-            counts[iou_idx][score_idx] = np.concatenate(
-                (
-                    tp_count[:, np.newaxis],
-                    fp_count[:, np.newaxis],
-                    fn_count[:, np.newaxis],
-                    precision[:, np.newaxis],
-                    recall[:, np.newaxis],
-                    f1_score[:, np.newaxis],
-                ),
-                axis=1,
-            )
-            # calculate recall for AR
-            average_recall[score_idx] += recall
-        # create true-positive mask score threshold
-        tp_candidates = ids[mask_tp_outer]
-        _, indices_gt_unique = np.unique(
-            tp_candidates[:, [0, 1, 3]], axis=0, return_index=True
-        )
-        mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=np.bool_)
-        mask_gt_unique[indices_gt_unique] = True
-        true_positives_mask = np.zeros(n_rows, dtype=np.bool_)
-        true_positives_mask[mask_tp_outer] = mask_gt_unique
         # count running tp and total for AP
         for pd_label in unique_pd_labels:
             mask_pd_label = pd_labels == pd_label
-            running_gt_count[iou_idx][mask_pd_label] = gt_count[pd_label]
-            running_total_count[iou_idx][mask_pd_label] = np.arange(
-                1, mask_pd_label.sum() + 1
+            total_count = mask_pd_label.sum()
+            if total_count == 0:
+                continue
+            # running total prediction count
+            running_total_count[iou_idx, mask_pd_label] = np.arange(
+                running_counts[iou_idx, pd_label, 0] + 1,
+                running_counts[iou_idx, pd_label, 0] + total_count + 1,
             )
-            mask_tp_for_counting = mask_pd_label & true_positives_mask
-            running_tp_count[iou_idx][mask_tp_for_counting] = np.arange(
-                1, mask_tp_for_counting.sum() + 1
+            running_counts[iou_idx, pd_label, 0] += total_count
+            # running true-positive count
+            mask_tp_for_counting = mask_pd_label & mask_tp_outer
+            tp_count = mask_tp_for_counting.sum()
+            running_tp_count[iou_idx, mask_tp_for_counting] = np.arange(
+                running_counts[iou_idx, pd_label, 1] + 1,
+                running_counts[iou_idx, pd_label, 1] + tp_count + 1,
             )
+            running_counts[iou_idx, pd_label, 1] += tp_count
     # calculate running precision-recall points for AP
-    precision = np.zeros_like(running_total_count)
+    precision = np.zeros_like(running_total_count, dtype=np.float64)
     np.divide(
         running_tp_count,
         running_total_count,
-        where=running_total_count > 1e-9,
+        where=running_total_count > 0,
         out=precision,
     )
-    recall = np.zeros_like(running_total_count)
+    recall = np.zeros_like(running_total_count, dtype=np.float64)
     np.divide(
         running_tp_count,
         running_gt_count,
-        where=running_gt_count > 1e-9,
+        where=running_gt_count > 0,
         out=recall,
     )
     recall_index = np.floor(recall * 100.0).astype(np.int32)
-    # bin precision-recall curve
+    # sort precision in descending order
+    precision_indices = np.argsort(-precision, axis=1)
+    # populate precision-recall curve
     for iou_idx in range(n_ious):
-        p = precision[iou_idx]
-        r = recall_index[iou_idx]
-        pr_curve[iou_idx, pd_labels, r, 0] = np.maximum(
-            pr_curve[iou_idx, pd_labels, r, 0],
-            p,
+        labeled_recall = np.hstack(
+            [
+                pd_labels.reshape(-1, 1),
+                recall_index[iou_idx, :].reshape(-1, 1),
+            ]
+        )
+        # extract maximum score per (label, recall) bin
+        # arrays are already ordered by descending score
+        lr_pairs, recall_indices = np.unique(
+            labeled_recall, return_index=True, axis=0
         )
-        pr_curve[iou_idx, pd_labels, r, 1] = np.maximum(
-            pr_curve[iou_idx, pd_labels, r, 1],
-            scores,
+        li = lr_pairs[:, 0]
+        ri = lr_pairs[:, 1]
+        pr_curve[iou_idx, li, ri, 1] = np.maximum(
+            pr_curve[iou_idx, li, ri, 1],
+            scores[recall_indices],
         )
+        # extract maximum precision per (label, recall) bin
+        # reorder arrays into descending precision order
+        indices = precision_indices[iou_idx]
+        sorted_precision = precision[iou_idx, indices]
+        sorted_labeled_recall = labeled_recall[indices]
+        lr_pairs, recall_indices = np.unique(
+            sorted_labeled_recall, return_index=True, axis=0
+        )
+        li = lr_pairs[:, 0]
+        ri = lr_pairs[:, 1]
+        pr_curve[iou_idx, li, ri, 0] = np.maximum(
+            pr_curve[iou_idx, li, ri, 0],
+            sorted_precision[recall_indices],
+        )
+    return counts
+def compute_precision_recall_f1(
+    counts: NDArray[np.uint64],
+    number_of_groundtruths_per_label: NDArray[np.uint64],
+) -> NDArray[np.float64]:
+    prec_rec_f1 = np.zeros_like(counts, dtype=np.float64)
+    # alias
+    tp_count = counts[:, :, 0, :]
+    fp_count = counts[:, :, 1, :]
+    tp_fp_count = tp_count + fp_count
+    # calculate component metrics
+    np.divide(
+        tp_count,
+        tp_fp_count,
+        where=tp_fp_count > 0,
+        out=prec_rec_f1[:, :, 0, :],
+    )
+    np.divide(
+        tp_count,
+        number_of_groundtruths_per_label,
+        where=number_of_groundtruths_per_label > 0,
+        out=prec_rec_f1[:, :, 1, :],
+    )
+    p = prec_rec_f1[:, :, 0, :]
+    r = prec_rec_f1[:, :, 1, :]
+    np.divide(
+        2 * np.multiply(p, r),
+        (p + r),
+        where=(p + r) > EPSILON,
+        out=prec_rec_f1[:, :, 2, :],
+    )
+    return prec_rec_f1
+def compute_average_recall(prec_rec_f1: NDArray[np.float64]):
+    recall = prec_rec_f1[:, :, 1, :]
+    average_recall = recall.mean(axis=0)
+    mAR = average_recall.mean(axis=-1)
+    return average_recall, mAR
+def compute_average_precision(pr_curve: NDArray[np.float64]):
+    n_ious = pr_curve.shape[0]
+    n_labels = pr_curve.shape[1]
+    # initialize result arrays
+    average_precision = np.zeros((n_ious, n_labels), dtype=np.float64)
+    mAP = np.zeros(n_ious, dtype=np.float64)
     # calculate average precision
     running_max_precision = np.zeros((n_ious, n_labels), dtype=np.float64)
     running_max_score = np.zeros((n_labels), dtype=np.float64)
@@ -616,24 +605,11 @@ def compute_precion_recall(
     average_precision = average_precision / 101.0
-    # calculate average recall
-    average_recall = average_recall / n_ious
     # calculate mAP and mAR
-    if unique_pd_labels.size > 0:
-        mAP: NDArray[np.float64] = average_precision[:, unique_pd_labels].mean(
-            axis=1
-        )
-        mAR: NDArray[np.float64] = average_recall[:, unique_pd_labels].mean(
-            axis=1
-        )
+    if average_precision.size > 0:
+        mAP = average_precision.mean(axis=1)
-    return (
-        (average_precision.astype(np.float64), mAP),
-        (average_recall, mAR),
-        counts,
-        pr_curve,
-    )
+    return average_precision, mAP, pr_curve
 def _isin(
@@ -666,6 +642,7 @@ def _isin(
 class PairClassification(IntFlag):
+    NULL = auto()
     TP = auto()
     FP_FN_MISCLF = auto()
     FP_UNMATCHED = auto()
@@ -704,11 +681,13 @@ def mask_pairs_greedily(
     return mask_matches
-def compute_confusion_matrix(
+def compute_pair_classifications(
     detailed_pairs: NDArray[np.float64],
     iou_thresholds: NDArray[np.float64],
     score_thresholds: NDArray[np.float64],
-) -> NDArray[np.uint8]:
+) -> tuple[
+    NDArray[np.bool_], NDArray[np.bool_], NDArray[np.bool_], NDArray[np.bool_]
+]:
     """
     Compute detailed counts.
@@ -760,8 +739,8 @@ def compute_confusion_matrix(
     mask_gt_exists = gt_ids > -0.5
     mask_pd_exists = pd_ids > -0.5
     mask_label_match = np.isclose(gt_labels, pd_labels)
-    mask_score_nonzero = scores > 1e-9
-    mask_iou_nonzero = ious > 1e-9
+    mask_score_nonzero = scores > EPSILON
+    mask_iou_nonzero = ious > EPSILON
     mask_gt_pd_exists = mask_gt_exists & mask_pd_exists
     mask_gt_pd_match = mask_gt_pd_exists & mask_label_match
@@ -821,4 +800,88 @@ def compute_confusion_matrix(
                 iou_idx, score_idx, mask_unmatched_groundtruths
             ] |= np.uint8(PairClassification.FN_UNMATCHED)
-    return pair_classifications
+    mask_tp = np.bitwise_and(pair_classifications, PairClassification.TP) > 0
+    mask_fp_fn_misclf = (
+        np.bitwise_and(pair_classifications, PairClassification.FP_FN_MISCLF)
+        > 0
+    )
+    mask_fp_unmatched = (
+        np.bitwise_and(pair_classifications, PairClassification.FP_UNMATCHED)
+        > 0
+    )
+    mask_fn_unmatched = (
+        np.bitwise_and(pair_classifications, PairClassification.FN_UNMATCHED)
+        > 0
+    )
+    return (
+        mask_tp,
+        mask_fp_fn_misclf,
+        mask_fp_unmatched,
+        mask_fn_unmatched,
+    )
+def compute_confusion_matrix(
+    detailed_pairs: NDArray[np.float64],
+    mask_tp: NDArray[np.bool_],
+    mask_fp_fn_misclf: NDArray[np.bool_],
+    mask_fp_unmatched: NDArray[np.bool_],
+    mask_fn_unmatched: NDArray[np.bool_],
+    number_of_labels: int,
+    iou_thresholds: NDArray[np.float64],
+    score_thresholds: NDArray[np.float64],
+):
+    n_ious = iou_thresholds.size
+    n_scores = score_thresholds.size
+    ids = detailed_pairs[:, :5].astype(np.int64)
+    # initialize arrays
+    confusion_matrices = np.zeros(
+        (n_ious, n_scores, number_of_labels, number_of_labels), dtype=np.uint64
+    )
+    unmatched_groundtruths = np.zeros(
+        (n_ious, n_scores, number_of_labels), dtype=np.uint64
+    )
+    unmatched_predictions = np.zeros_like(unmatched_groundtruths)
+    mask_matched = mask_tp | mask_fp_fn_misclf
+    for iou_idx in range(n_ious):
+        for score_idx in range(n_scores):
+            # matched annotations
+            unique_pairs = np.unique(
+                ids[np.ix_(mask_matched[iou_idx, score_idx], (0, 1, 2, 3, 4))],  # type: ignore - numpy ix_ typing
+                axis=0,
+            )
+            unique_labels, unique_label_counts = np.unique(
+                unique_pairs[:, (3, 4)], axis=0, return_counts=True
+            )
+            confusion_matrices[
+                iou_idx, score_idx, unique_labels[:, 0], unique_labels[:, 1]
+            ] = unique_label_counts
+            # unmatched groundtruths
+            unique_pairs = np.unique(
+                ids[np.ix_(mask_fn_unmatched[iou_idx, score_idx], (0, 1, 3))],  # type: ignore - numpy ix_ typing
+                axis=0,
+            )
+            unique_labels, unique_label_counts = np.unique(
+                unique_pairs[:, 2], return_counts=True
+            )
+            unmatched_groundtruths[
+                iou_idx, score_idx, unique_labels
+            ] = unique_label_counts
+            # unmatched predictions
+            unique_pairs = np.unique(
+                ids[np.ix_(mask_fp_unmatched[iou_idx, score_idx], (0, 2, 4))],  # type: ignore - numpy ix_ typing
+                axis=0,
+            )
+            unique_labels, unique_label_counts = np.unique(
+                unique_pairs[:, 2], return_counts=True
+            )
+            unmatched_predictions[
+                iou_idx, score_idx, unique_labels
+            ] = unique_label_counts
+    return confusion_matrices, unmatched_groundtruths, unmatched_predictions

valor-lite 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl

valor-lite 0.36.5py3-none-any.whl → 0.37.5py3-none-any.whl