PyPI - valor-lite - Versions diffs - 0.36.6__py3-none-any.whl → 0.37.5__py3-none-any.whl - Mend

valor-lite 0.36.6py3-none-any.whl → 0.37.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

valor_lite/cache/__init__.py +11 -0
valor_lite/cache/compute.py +211 -0
valor_lite/cache/ephemeral.py +302 -0
valor_lite/cache/persistent.py +536 -0
valor_lite/classification/__init__.py +5 -10
valor_lite/classification/annotation.py +4 -0
valor_lite/classification/computation.py +233 -251
valor_lite/classification/evaluator.py +882 -0
valor_lite/classification/loader.py +97 -0
valor_lite/classification/metric.py +141 -4
valor_lite/classification/shared.py +184 -0
valor_lite/classification/utilities.py +221 -118
valor_lite/exceptions.py +5 -0
valor_lite/object_detection/__init__.py +5 -4
valor_lite/object_detection/annotation.py +13 -1
valor_lite/object_detection/computation.py +368 -299
valor_lite/object_detection/evaluator.py +804 -0
valor_lite/object_detection/loader.py +292 -0
valor_lite/object_detection/metric.py +152 -3
valor_lite/object_detection/shared.py +206 -0
valor_lite/object_detection/utilities.py +182 -100
valor_lite/semantic_segmentation/__init__.py +5 -4
valor_lite/semantic_segmentation/annotation.py +7 -0
valor_lite/semantic_segmentation/computation.py +20 -110
valor_lite/semantic_segmentation/evaluator.py +414 -0
valor_lite/semantic_segmentation/loader.py +205 -0
valor_lite/semantic_segmentation/shared.py +149 -0
valor_lite/semantic_segmentation/utilities.py +6 -23
{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
valor_lite-0.37.5.dist-info/RECORD +49 -0
{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
valor_lite/classification/manager.py +0 -545
valor_lite/object_detection/manager.py +0 -864
valor_lite/profiling.py +0 -374
valor_lite/semantic_segmentation/benchmark.py +0 -237
valor_lite/semantic_segmentation/manager.py +0 -446
valor_lite-0.36.6.dist-info/RECORD +0 -41
{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0

valor_lite/classification/utilities.py CHANGED Viewed

@@ -1,54 +1,25 @@
 from collections import defaultdict
 import numpy as np
+import pyarrow as pa
 from numpy.typing import NDArray
-from valor_lite.classification.computation import PairClassification
 from valor_lite.classification.metric import Metric, MetricType
-def unpack_precision_recall_rocauc_into_metric_lists(
-    results: tuple[
-        NDArray[np.int32],
-        NDArray[np.float64],
-        NDArray[np.float64],
-        NDArray[np.float64],
-        NDArray[np.float64],
-        NDArray[np.float64],
-        float,
-    ],
+def unpack_precision_recall(
+    counts: NDArray[np.uint64],
+    precision: NDArray[np.float64],
+    recall: NDArray[np.float64],
+    accuracy: NDArray[np.float64],
+    f1_score: NDArray[np.float64],
     score_thresholds: list[float],
     hardmax: bool,
-    label_metadata: NDArray[np.int32],
-    index_to_label: list[str],
+    index_to_label: dict[int, str],
 ) -> dict[MetricType, list[Metric]]:
-    (
-        counts,
-        precision,
-        recall,
-        accuracy,
-        f1_score,
-        rocauc,
-        mean_rocauc,
-    ) = results
     metrics = defaultdict(list)
-    metrics[MetricType.ROCAUC] = [
-        Metric.roc_auc(
-            value=float(rocauc[label_idx]),
-            label=label,
-        )
-        for label_idx, label in enumerate(index_to_label)
-        if label_metadata[label_idx, 0] > 0
-    ]
-    metrics[MetricType.mROCAUC] = [
-        Metric.mean_roc_auc(
-            value=float(mean_rocauc),
-        )
-    ]
     metrics[MetricType.Accuracy] = [
         Metric.accuracy(
             value=float(accuracy[score_idx]),
@@ -58,9 +29,8 @@ def unpack_precision_recall_rocauc_into_metric_lists(
         for score_idx, score_threshold in enumerate(score_thresholds)
     ]
-    for label_idx, label in enumerate(index_to_label):
+    for label_idx, label in index_to_label.items():
         for score_idx, score_threshold in enumerate(score_thresholds):
             kwargs = {
                 "label": label,
                 "hardmax": hardmax,
@@ -77,10 +47,6 @@ def unpack_precision_recall_rocauc_into_metric_lists(
                 )
             )
-            # if no groundtruths exists for a label, skip it.
-            if label_metadata[label_idx, 0] == 0:
-                continue
             metrics[MetricType.Precision].append(
                 Metric.precision(
                     value=float(precision[score_idx, label_idx]),
@@ -102,110 +68,247 @@ def unpack_precision_recall_rocauc_into_metric_lists(
     return metrics
-def _create_empty_confusion_matrix(index_to_labels: list[str]):
-    unmatched_ground_truths = dict()
+def unpack_rocauc(
+    rocauc: NDArray[np.float64],
+    mean_rocauc: float,
+    index_to_label: dict[int, str],
+) -> dict[MetricType, list[Metric]]:
+    metrics = {}
+    metrics[MetricType.ROCAUC] = [
+        Metric.roc_auc(
+            value=float(rocauc[label_idx]),
+            label=label,
+        )
+        for label_idx, label in index_to_label.items()
+    ]
+    metrics[MetricType.mROCAUC] = [
+        Metric.mean_roc_auc(
+            value=float(mean_rocauc),
+        )
+    ]
+    return metrics
+def unpack_confusion_matrix(
+    confusion_matrices: NDArray[np.uint64],
+    unmatched_groundtruths: NDArray[np.uint64],
+    index_to_label: dict[int, str],
+    score_thresholds: list[float],
+    hardmax: bool,
+) -> list[Metric]:
+    metrics = []
+    for score_idx, score_thresh in enumerate(score_thresholds):
+        cm_dict = {}
+        ugt_dict = {}
+        for idx, label in index_to_label.items():
+            ugt_dict[label] = int(unmatched_groundtruths[score_idx, idx])
+            for pidx, plabel in index_to_label.items():
+                if label not in cm_dict:
+                    cm_dict[label] = {}
+                cm_dict[label][plabel] = int(
+                    confusion_matrices[score_idx, idx, pidx]
+                )
+        metrics.append(
+            Metric.confusion_matrix(
+                confusion_matrix=cm_dict,
+                unmatched_ground_truths=ugt_dict,
+                score_threshold=score_thresh,
+                hardmax=hardmax,
+            )
+        )
+    return metrics
+def create_mapping(
+    tbl: pa.Table,
+    pairs: NDArray[np.float64],
+    index: int,
+    id_col: str,
+    uid_col: str,
+) -> dict[int, str]:
+    col = pairs[:, index].astype(np.int64)
+    values, indices = np.unique(col, return_index=True)
+    indices = indices[values >= 0]
+    return {
+        tbl[id_col][idx].as_py(): tbl[uid_col][idx].as_py() for idx in indices
+    }
+def unpack_examples(
+    ids: NDArray[np.int64],
+    mask_tp: NDArray[np.bool_],
+    mask_fn: NDArray[np.bool_],
+    mask_fp: NDArray[np.bool_],
+    score_thresholds: list[float],
+    hardmax: bool,
+    index_to_datum_id: dict[int, str],
+    index_to_label: dict[int, str],
+) -> list[Metric]:
+    metrics = []
+    unique_datums = np.unique(ids[:, 0])
+    for datum_index in unique_datums:
+        mask_datum = ids[:, 0] == datum_index
+        mask_datum_tp = mask_tp & mask_datum
+        mask_datum_fp = mask_fp & mask_datum
+        mask_datum_fn = mask_fn & mask_datum
+        datum_id = index_to_datum_id[datum_index]
+        for score_idx, score_thresh in enumerate(score_thresholds):
+            unique_tp = np.unique(
+                # extract true-positive (datum_id, gt_id, pd_id) pairs
+                ids[np.ix_(mask_datum_tp[score_idx], (0, 1, 2))],
+                axis=0,
+            )
+            unique_fp = np.unique(
+                # extract false-positive (datum_id, pd_id) pairs
+                ids[np.ix_(mask_datum_fp[score_idx], (0, 2))],
+                axis=0,
+            )
+            unique_fn = np.unique(
+                # extract false-negative (datum_id, gt_id)
+                ids[np.ix_(mask_datum_fn[score_idx], (0, 1))],
+                axis=0,
+            )
+            tp = [index_to_label[row[1]] for row in unique_tp]
+            fp = [
+                index_to_label[row[1]]
+                for row in unique_fp
+                if index_to_label[row[1]] not in tp
+            ]
+            fn = [
+                index_to_label[row[1]]
+                for row in unique_fn
+                if index_to_label[row[1]] not in tp
+            ]
+            metrics.append(
+                Metric.examples(
+                    datum_id=datum_id,
+                    true_positives=tp,
+                    false_negatives=fn,
+                    false_positives=fp,
+                    score_threshold=score_thresh,
+                    hardmax=hardmax,
+                )
+            )
+    return metrics
+def create_empty_confusion_matrix_with_examples(
+    score_threshold: float,
+    hardmax: bool,
+    index_to_label: dict[int, str],
+) -> Metric:
+    unmatched_groundtruths = dict()
     confusion_matrix = dict()
-    for label in index_to_labels:
-        unmatched_ground_truths[label] = {"count": 0, "examples": []}
+    for label in index_to_label.values():
+        unmatched_groundtruths[label] = {"count": 0, "examples": []}
         confusion_matrix[label] = {}
-        for plabel in index_to_labels:
+        for plabel in index_to_label.values():
             confusion_matrix[label][plabel] = {"count": 0, "examples": []}
-    return (
-        confusion_matrix,
-        unmatched_ground_truths,
+    return Metric.confusion_matrix_with_examples(
+        confusion_matrix=confusion_matrix,
+        unmatched_ground_truths=unmatched_groundtruths,
+        score_threshold=score_threshold,
+        hardmax=hardmax,
     )
-def _unpack_confusion_matrix(
-    ids: NDArray[np.int32],
+def _unpack_confusion_matrix_with_examples(
+    metric: Metric,
+    ids: NDArray[np.int64],
     scores: NDArray[np.float64],
+    winners: NDArray[np.bool_],
     mask_matched: NDArray[np.bool_],
-    mask_fn_unmatched: NDArray[np.bool_],
-    index_to_datum_id: list[str],
-    index_to_label: list[str],
-    score_threshold: float,
+    mask_unmatched_fn: NDArray[np.bool_],
+    index_to_datum_id: dict[int, str],
+    index_to_label: dict[int, str],
 ):
-    (
-        confusion_matrix,
-        unmatched_ground_truths,
-    ) = _create_empty_confusion_matrix(index_to_label)
-    unique_matches, unique_match_indices = np.unique(
-        ids[np.ix_(mask_matched, (0, 1, 2))],  # type: ignore - numpy ix_ typing
-        axis=0,
-        return_index=True,
-    )
-    (
-        unique_unmatched_groundtruths,
-        unique_unmatched_groundtruth_indices,
-    ) = np.unique(
-        ids[np.ix_(mask_fn_unmatched, (0, 1))],  # type: ignore - numpy ix_ typing
-        axis=0,
-        return_index=True,
-    )
+    if not isinstance(metric.value, dict):
+        raise TypeError("expected metric to contain a dictionary value")
-    n_matched = unique_matches.shape[0]
-    n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
-    n_max = max(n_matched, n_unmatched_groundtruths)
+    mask_valid_gts = ids[:, 1] >= 0
+    mask_valid_pds = ids[:, 2] >= 0
+    valid_matches = ids[mask_valid_gts & mask_valid_pds]
+    valid_gts = ids[mask_valid_gts]
+    n_matched = 0
+    unique_matches = np.empty((1, 3))
+    if valid_matches.size > 0:
+        unique_matches, unique_match_indices = np.unique(
+            # extract matched (datum_id, gt_id, pd_id) pairs
+            valid_matches[np.ix_(mask_matched, (0, 1, 2))],  # type: ignore[reportArgumentType]
+            axis=0,
+            return_index=True,
+        )
+        scores = scores[mask_matched][unique_match_indices]
+        n_matched = unique_matches.shape[0]
+    n_unmatched_groundtruths = 0
+    unique_unmatched_groundtruths = np.empty((1, 2))
+    if valid_gts.size > 0:
+        unique_unmatched_groundtruths = np.unique(
+            # extract unmatched false-negative (datum_id, gt_id) pairs
+            valid_gts[np.ix_(mask_unmatched_fn, (0, 1))],  # type: ignore[reportArgumentType]
+            axis=0,
+        )
+        unique_unmatched_groundtruths = unique_unmatched_groundtruths[
+            unique_unmatched_groundtruths[:, 1] >= 0
+        ]
+        n_unmatched_groundtruths = unique_unmatched_groundtruths.shape[0]
+    n_max = max(n_matched, n_unmatched_groundtruths)
     for idx in range(n_max):
         if idx < n_matched:
+            datum_id = index_to_datum_id[unique_matches[idx, 0]]
             glabel = index_to_label[unique_matches[idx, 1]]
             plabel = index_to_label[unique_matches[idx, 2]]
-            confusion_matrix[glabel][plabel]["count"] += 1
-            confusion_matrix[glabel][plabel]["examples"].append(
+            score = float(scores[idx])
+            metric.value["confusion_matrix"][glabel][plabel]["count"] += 1
+            metric.value["confusion_matrix"][glabel][plabel][
+                "examples"
+            ].append(
                 {
-                    "datum_id": index_to_datum_id[unique_matches[idx, 0]],
-                    "score": float(scores[unique_match_indices[idx]]),
+                    "datum_id": datum_id,
+                    "score": score,
                 }
             )
         if idx < n_unmatched_groundtruths:
+            datum_id = index_to_datum_id[unique_unmatched_groundtruths[idx, 0]]
             label = index_to_label[unique_unmatched_groundtruths[idx, 1]]
-            unmatched_ground_truths[label]["count"] += 1
-            unmatched_ground_truths[label]["examples"].append(
-                {
-                    "datum_id": index_to_datum_id[
-                        unique_unmatched_groundtruths[idx, 0]
-                    ],
-                }
+            metric.value["unmatched_ground_truths"][label]["count"] += 1
+            metric.value["unmatched_ground_truths"][label]["examples"].append(
+                {"datum_id": datum_id}
             )
-    return Metric.confusion_matrix(
-        confusion_matrix=confusion_matrix,
-        unmatched_ground_truths=unmatched_ground_truths,
-        score_threshold=score_threshold,
-    )
+    return metric
-def unpack_confusion_matrix_into_metric_list(
-    result: NDArray[np.uint8],
-    detailed_pairs: NDArray[np.float64],
-    score_thresholds: list[float],
-    index_to_datum_id: list[str],
-    index_to_label: list[str],
+def unpack_confusion_matrix_with_examples(
+    metrics: dict[int, Metric],
+    ids: NDArray[np.int64],
+    scores: NDArray[np.float64],
+    winners: NDArray[np.bool_],
+    mask_matched: NDArray[np.bool_],
+    mask_unmatched_fn: NDArray[np.bool_],
+    index_to_datum_id: dict[int, str],
+    index_to_label: dict[int, str],
 ) -> list[Metric]:
-    ids = detailed_pairs[:, :3].astype(np.int32)
-    mask_matched = (
-        np.bitwise_and(
-            result, PairClassification.TP | PairClassification.FP_FN_MISCLF
-        )
-        > 0
-    )
-    mask_fn_unmatched = (
-        np.bitwise_and(result, PairClassification.FN_UNMATCHED) > 0
-    )
     return [
-        _unpack_confusion_matrix(
+        _unpack_confusion_matrix_with_examples(
+            metric,
             ids=ids,
-            scores=detailed_pairs[:, 3],
+            scores=scores,
+            winners=winners,
             mask_matched=mask_matched[score_idx, :],
-            mask_fn_unmatched=mask_fn_unmatched[score_idx, :],
+            mask_unmatched_fn=mask_unmatched_fn[score_idx, :],
             index_to_datum_id=index_to_datum_id,
             index_to_label=index_to_label,
-            score_threshold=score_threshold,
         )
-        for score_idx, score_threshold in enumerate(score_thresholds)
+        for score_idx, metric in metrics.items()
     ]

valor_lite/exceptions.py CHANGED Viewed

@@ -5,6 +5,11 @@ class EmptyEvaluatorError(Exception):
         )
+class EmptyCacheError(Exception):
+    def __init__(self):
+        super().__init__("cache contains no data")
 class EmptyFilterError(Exception):
     def __init__(self, message: str):
         super().__init__(message)

valor_lite/object_detection/__init__.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from .annotation import Bitmask, BoundingBox, Detection, Polygon
-from .manager import DataLoader, Evaluator, Filter, Metadata
+from .evaluator import Evaluator
+from .loader import Loader
 from .metric import Metric, MetricType
+from .shared import EvaluatorInfo
 __all__ = [
     "Bitmask",
@@ -9,8 +11,7 @@ __all__ = [
     "Polygon",
     "Metric",
     "MetricType",
-    "DataLoader",
+    "Loader",
     "Evaluator",
-    "Filter",
-    "Metadata",
+    "EvaluatorInfo",
 ]

valor_lite/object_detection/annotation.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Generic, TypeVar
+from typing import Any, Generic, TypeVar
 import numpy as np
 from numpy.typing import NDArray
@@ -27,6 +27,8 @@ class BoundingBox:
         List of labels associated with the bounding box.
     scores : list of float, optional
         Confidence scores corresponding to each label. Defaults to an empty list.
+    metadata : dict[str, Any], optional
+        A dictionary containing any metadata to be used within filtering operations.
     Examples
     --------
@@ -50,6 +52,7 @@ class BoundingBox:
     ymax: float
     labels: list[str]
     scores: list[float] = field(default_factory=list)
+    metadata: dict[str, Any] | None = None
     def __post_init__(self):
         if len(self.scores) == 0 and len(self.labels) != 1:
@@ -89,6 +92,8 @@ class Polygon:
         List of labels associated with the polygon.
     scores : list of float, optional
         Confidence scores corresponding to each label. Defaults to an empty list.
+    metadata : dict[str, Any], optional
+        A dictionary containing any metadata to be used within filtering operations.
     Examples
     --------
@@ -109,6 +114,7 @@ class Polygon:
     shape: ShapelyPolygon
     labels: list[str]
     scores: list[float] = field(default_factory=list)
+    metadata: dict[str, Any] | None = None
     def __post_init__(self):
         if not isinstance(self.shape, ShapelyPolygon):
@@ -141,6 +147,8 @@ class Bitmask:
         List of labels associated with the mask.
     scores : list of float, optional
         Confidence scores corresponding to each label. Defaults to an empty list.
+    metadata : dict[str, Any], optional
+        A dictionary containing any metadata to be used within filtering operations.
     Examples
     --------
@@ -161,6 +169,7 @@ class Bitmask:
     mask: NDArray[np.bool_]
     labels: list[str]
     scores: list[float] = field(default_factory=list)
+    metadata: dict[str, Any] | None = None
     def __post_init__(self):
@@ -200,6 +209,8 @@ class Detection(Generic[AnnotationType]):
         List of ground truth annotations.
     predictions : list[BoundingBox] | list[Polygon] | list[Bitmask]
         List of predicted annotations.
+    metadata : dict[str, Any], optional
+        A dictionary containing any metadata to be used within filtering operations.
     Examples
     --------
@@ -217,6 +228,7 @@ class Detection(Generic[AnnotationType]):
     uid: str
     groundtruths: list[AnnotationType]
     predictions: list[AnnotationType]
+    metadata: dict[str, Any] | None = None
     def __post_init__(self):
         for prediction in self.predictions:

valor-lite 0.36.6__py3-none-any.whl → 0.37.5__py3-none-any.whl

valor-lite 0.36.6py3-none-any.whl → 0.37.5py3-none-any.whl