PyPI - valor-lite - Versions diffs - 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl - Mend

valor-lite 0.36.5py3-none-any.whl → 0.37.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

valor_lite/cache/__init__.py +11 -0
valor_lite/cache/compute.py +211 -0
valor_lite/cache/ephemeral.py +302 -0
valor_lite/cache/persistent.py +536 -0
valor_lite/classification/__init__.py +5 -10
valor_lite/classification/annotation.py +4 -0
valor_lite/classification/computation.py +233 -251
valor_lite/classification/evaluator.py +882 -0
valor_lite/classification/loader.py +97 -0
valor_lite/classification/metric.py +141 -4
valor_lite/classification/shared.py +184 -0
valor_lite/classification/utilities.py +221 -118
valor_lite/exceptions.py +5 -0
valor_lite/object_detection/__init__.py +5 -4
valor_lite/object_detection/annotation.py +13 -1
valor_lite/object_detection/computation.py +367 -304
valor_lite/object_detection/evaluator.py +804 -0
valor_lite/object_detection/loader.py +292 -0
valor_lite/object_detection/metric.py +152 -3
valor_lite/object_detection/shared.py +206 -0
valor_lite/object_detection/utilities.py +182 -109
valor_lite/semantic_segmentation/__init__.py +5 -4
valor_lite/semantic_segmentation/annotation.py +7 -0
valor_lite/semantic_segmentation/computation.py +20 -110
valor_lite/semantic_segmentation/evaluator.py +414 -0
valor_lite/semantic_segmentation/loader.py +205 -0
valor_lite/semantic_segmentation/shared.py +149 -0
valor_lite/semantic_segmentation/utilities.py +6 -23
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
valor_lite-0.37.5.dist-info/RECORD +49 -0
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
valor_lite/classification/manager.py +0 -545
valor_lite/object_detection/manager.py +0 -865
valor_lite/profiling.py +0 -374
valor_lite/semantic_segmentation/benchmark.py +0 -237
valor_lite/semantic_segmentation/manager.py +0 -446
valor_lite-0.36.5.dist-info/RECORD +0 -41
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0

valor_lite/object_detection/loader.py ADDED Viewed

@@ -0,0 +1,292 @@
+import numpy as np
+import pyarrow as pa
+from numpy.typing import NDArray
+from tqdm import tqdm
+from valor_lite.cache import FileCacheWriter, MemoryCacheWriter
+from valor_lite.object_detection.annotation import (
+    Bitmask,
+    BoundingBox,
+    Detection,
+    Polygon,
+)
+from valor_lite.object_detection.computation import (
+    EPSILON,
+    compute_bbox_iou,
+    compute_bitmask_iou,
+    compute_polygon_iou,
+)
+from valor_lite.object_detection.evaluator import Builder
+class Loader(Builder):
+    def __init__(
+        self,
+        detailed_writer: MemoryCacheWriter | FileCacheWriter,
+        ranked_writer: MemoryCacheWriter | FileCacheWriter,
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        super().__init__(
+            detailed_writer=detailed_writer,
+            ranked_writer=ranked_writer,
+            metadata_fields=metadata_fields,
+        )
+        # internal state
+        self._labels = {}
+        self._datum_count = 0
+        self._groundtruth_count = 0
+        self._prediction_count = 0
+    def _add_label(self, value: str) -> int:
+        """Add a label to the index mapping."""
+        idx = self._labels.get(value, None)
+        if idx is None:
+            idx = len(self._labels)
+            self._labels[value] = idx
+        return idx
+    def _add_data(
+        self,
+        detections: list[Detection],
+        detection_ious: list[NDArray[np.float64]],
+        show_progress: bool = False,
+    ):
+        """Adds detections to the cache."""
+        disable_tqdm = not show_progress
+        for detection, ious in tqdm(
+            zip(detections, detection_ious), disable=disable_tqdm
+        ):
+            # cache labels and annotation pairs
+            datum_idx = self._datum_count
+            datum_metadata = detection.metadata if detection.metadata else {}
+            pairs = []
+            if detection.groundtruths:
+                for gidx, gann in enumerate(detection.groundtruths):
+                    gt_id = self._groundtruth_count + gidx
+                    glabel = gann.labels[0]
+                    glabel_idx = self._add_label(gann.labels[0])
+                    gann_metadata = gann.metadata if gann.metadata else {}
+                    if (ious[:, gidx] < EPSILON).all():
+                        pairs.append(
+                            {
+                                # metadata
+                                **datum_metadata,
+                                **gann_metadata,
+                                # datum
+                                "datum_uid": detection.uid,
+                                "datum_id": datum_idx,
+                                # groundtruth
+                                "gt_uid": gann.uid,
+                                "gt_id": gt_id,
+                                "gt_label": glabel,
+                                "gt_label_id": glabel_idx,
+                                # prediction
+                                "pd_uid": None,
+                                "pd_id": -1,
+                                "pd_label": None,
+                                "pd_label_id": -1,
+                                "pd_score": -1,
+                                # pair
+                                "iou": 0.0,
+                            }
+                        )
+                    for pidx, pann in enumerate(detection.predictions):
+                        pann_id = self._prediction_count + pidx
+                        pann_metadata = pann.metadata if pann.metadata else {}
+                        if (ious[pidx, :] < EPSILON).all():
+                            pairs.extend(
+                                [
+                                    {
+                                        # metadata
+                                        **datum_metadata,
+                                        **pann_metadata,
+                                        # datum
+                                        "datum_uid": detection.uid,
+                                        "datum_id": datum_idx,
+                                        # groundtruth
+                                        "gt_uid": None,
+                                        "gt_id": -1,
+                                        "gt_label": None,
+                                        "gt_label_id": -1,
+                                        # prediction
+                                        "pd_uid": pann.uid,
+                                        "pd_id": pann_id,
+                                        "pd_label": plabel,
+                                        "pd_label_id": self._add_label(plabel),
+                                        "pd_score": float(pscore),
+                                        # pair
+                                        "iou": 0.0,
+                                    }
+                                    for plabel, pscore in zip(
+                                        pann.labels, pann.scores
+                                    )
+                                ]
+                            )
+                        if ious[pidx, gidx] >= EPSILON:
+                            pairs.extend(
+                                [
+                                    {
+                                        # metadata
+                                        **datum_metadata,
+                                        **gann_metadata,
+                                        **pann_metadata,
+                                        # datum
+                                        "datum_uid": detection.uid,
+                                        "datum_id": datum_idx,
+                                        # groundtruth
+                                        "gt_uid": gann.uid,
+                                        "gt_id": gt_id,
+                                        "gt_label": glabel,
+                                        "gt_label_id": self._add_label(glabel),
+                                        # prediction
+                                        "pd_uid": pann.uid,
+                                        "pd_id": pann_id,
+                                        "pd_label": plabel,
+                                        "pd_label_id": self._add_label(plabel),
+                                        "pd_score": float(pscore),
+                                        # pair
+                                        "iou": float(ious[pidx, gidx]),
+                                    }
+                                    for glabel in gann.labels
+                                    for plabel, pscore in zip(
+                                        pann.labels, pann.scores
+                                    )
+                                ]
+                            )
+            elif detection.predictions:
+                for pidx, pann in enumerate(detection.predictions):
+                    pann_id = self._prediction_count + pidx
+                    pann_metadata = pann.metadata if pann.metadata else {}
+                    pairs.extend(
+                        [
+                            {
+                                # metadata
+                                **datum_metadata,
+                                **pann_metadata,
+                                # datum
+                                "datum_uid": detection.uid,
+                                "datum_id": datum_idx,
+                                # groundtruth
+                                "gt_uid": None,
+                                "gt_id": -1,
+                                "gt_label": None,
+                                "gt_label_id": -1,
+                                # prediction
+                                "pd_uid": pann.uid,
+                                "pd_id": pann_id,
+                                "pd_label": plabel,
+                                "pd_label_id": self._add_label(plabel),
+                                "pd_score": float(pscore),
+                                # pair
+                                "iou": 0.0,
+                            }
+                            for plabel, pscore in zip(pann.labels, pann.scores)
+                        ]
+                    )
+            self._datum_count += 1
+            self._groundtruth_count += len(detection.groundtruths)
+            self._prediction_count += len(detection.predictions)
+            self._detailed_writer.write_rows(pairs)
+    def add_bounding_boxes(
+        self,
+        detections: list[Detection[BoundingBox]],
+        show_progress: bool = False,
+    ):
+        """
+        Adds bounding box detections to the cache.
+        Parameters
+        ----------
+        detections : list[Detection]
+            A list of Detection objects.
+        show_progress : bool, default=False
+            Toggle for tqdm progress bar.
+        """
+        ious = [
+            compute_bbox_iou(
+                np.array(
+                    [
+                        [gt.extrema, pd.extrema]
+                        for pd in detection.predictions
+                        for gt in detection.groundtruths
+                    ],
+                    dtype=np.float64,
+                )
+            ).reshape(len(detection.predictions), len(detection.groundtruths))
+            for detection in detections
+        ]
+        return self._add_data(
+            detections=detections,
+            detection_ious=ious,
+            show_progress=show_progress,
+        )
+    def add_polygons(
+        self,
+        detections: list[Detection[Polygon]],
+        show_progress: bool = False,
+    ):
+        """
+        Adds polygon detections to the cache.
+        Parameters
+        ----------
+        detections : list[Detection]
+            A list of Detection objects.
+        show_progress : bool, default=False
+            Toggle for tqdm progress bar.
+        """
+        ious = [
+            compute_polygon_iou(
+                np.array(
+                    [
+                        [gt.shape, pd.shape]
+                        for pd in detection.predictions
+                        for gt in detection.groundtruths
+                    ]
+                )
+            ).reshape(len(detection.predictions), len(detection.groundtruths))
+            for detection in detections
+        ]
+        return self._add_data(
+            detections=detections,
+            detection_ious=ious,
+            show_progress=show_progress,
+        )
+    def add_bitmasks(
+        self,
+        detections: list[Detection[Bitmask]],
+        show_progress: bool = False,
+    ):
+        """
+        Adds bitmask detections to the cache.
+        Parameters
+        ----------
+        detections : list[Detection]
+            A list of Detection objects.
+        show_progress : bool, default=False
+            Toggle for tqdm progress bar.
+        """
+        ious = [
+            compute_bitmask_iou(
+                np.array(
+                    [
+                        [gt.mask, pd.mask]
+                        for pd in detection.predictions
+                        for gt in detection.groundtruths
+                    ]
+                )
+            ).reshape(len(detection.predictions), len(detection.groundtruths))
+            for detection in detections
+        ]
+        return self._add_data(
+            detections=detections,
+            detection_ious=ious,
+            show_progress=show_progress,
+        )

valor_lite/object_detection/metric.py CHANGED Viewed

@@ -18,7 +18,9 @@ class MetricType(str, Enum):
     ARAveragedOverScores = "ARAveragedOverScores"
     mARAveragedOverScores = "mARAveragedOverScores"
     PrecisionRecallCurve = "PrecisionRecallCurve"
+    ConfusionMatrixWithExamples = "ConfusionMatrixWithExamples"
     ConfusionMatrix = "ConfusionMatrix"
+    Examples = "Examples"
 @dataclass
@@ -562,6 +564,153 @@ class Metric(BaseMetric):
     @classmethod
     def confusion_matrix(
+        cls,
+        confusion_matrix: dict[str, dict[str, int]],
+        unmatched_predictions: dict[str, int],
+        unmatched_ground_truths: dict[str, int],
+        score_threshold: float,
+        iou_threshold: float,
+    ):
+        """
+        Confusion matrix for object detection task.
+        This class encapsulates detailed information about the model's performance, including correct
+        predictions, misclassifications, unmatched_predictions (subset of false positives), and unmatched ground truths
+        (subset of false negatives).
+        Confusion Matrix Format:
+        {
+            <ground truth label>: {
+                <prediction label>: 129
+                ...
+            },
+            ...
+        }
+        Unmatched Predictions Format:
+        {
+            <prediction label>: 11
+            ...
+        }
+        Unmatched Ground Truths Format:
+        {
+            <ground truth label>: 7
+            ...
+        }
+        Parameters
+        ----------
+        confusion_matrix : dict
+            A nested dictionary containing integer counts of occurences where the first key is the ground truth label value
+            and the second key is the prediction label value.
+        unmatched_predictions : dict
+            A dictionary where each key is a prediction label value with no corresponding ground truth
+            (subset of false positives). The value is a dictionary containing counts.
+        unmatched_ground_truths : dict
+            A dictionary where each key is a ground truth label value for which the model failed to predict
+            (subset of false negatives). The value is a dictionary containing counts.
+        score_threshold : float
+            The confidence score threshold used to filter predictions.
+        iou_threshold : float
+            The Intersection over Union (IOU) threshold used to determine true positives.
+        Returns
+        -------
+        Metric
+        """
+        return cls(
+            type=MetricType.ConfusionMatrix.value,
+            value={
+                "confusion_matrix": confusion_matrix,
+                "unmatched_predictions": unmatched_predictions,
+                "unmatched_ground_truths": unmatched_ground_truths,
+            },
+            parameters={
+                "score_threshold": score_threshold,
+                "iou_threshold": iou_threshold,
+            },
+        )
+    @classmethod
+    def examples(
+        cls,
+        datum_id: str,
+        true_positives: list[tuple[str, str]],
+        false_positives: list[str],
+        false_negatives: list[str],
+        score_threshold: float,
+        iou_threshold: float,
+    ):
+        """
+        Per-datum examples for object detection tasks.
+        This metric is per-datum and contains lists of annotation identifiers that categorize them
+        as true-positive, false-positive or false-negative. This is intended to be used with an
+        external database where the identifiers can be used for retrieval.
+        Examples Format:
+        {
+            "type": "Examples",
+            "value": {
+                "datum_id": "some string ID",
+                "true_positives": [
+                    ["groundtruth0", "prediction0"],
+                    ["groundtruth123", "prediction11"],
+                    ...
+                ],
+                "false_positives": [
+                    "prediction25",
+                    "prediction92",
+                    ...
+                ]
+                "false_negatives": [
+                    "groundtruth32",
+                    "groundtruth24",
+                    ...
+                ]
+            },
+            "parameters": {
+                "score_threshold": 0.5,
+                "iou_threshold": 0.5,
+            }
+        }
+        Parameters
+        ----------
+        datum_id : str
+            A string identifier representing a datum.
+        true_positives : list[tuple[str, str]]
+            A list of string identifier pairs representing true positive ground truth and prediction combinations.
+        false_positives : list[str]
+            A list of string identifiers representing false positive predictions.
+        false_negatives : list[str]
+            A list of string identifiers representing false negative ground truths.
+        score_threshold : float
+            The confidence score threshold used to filter predictions.
+        iou_threshold : float
+            The Intersection over Union (IOU) threshold used to determine true positives.
+        Returns
+        -------
+        Metric
+        """
+        return cls(
+            type=MetricType.Examples.value,
+            value={
+                "datum_id": datum_id,
+                "true_positives": true_positives,
+                "false_positives": false_positives,
+                "false_negatives": false_negatives,
+            },
+            parameters={
+                "score_threshold": score_threshold,
+                "iou_threshold": iou_threshold,
+            },
+        )
+    @classmethod
+    def confusion_matrix_with_examples(
         cls,
         confusion_matrix: dict[
             str,  # ground truth label value
@@ -609,7 +758,7 @@ class Metric(BaseMetric):
         iou_threshold: float,
     ):
         """
-        Confusion matrix for object detection tasks.
+        Confusion matrix with examples for object detection tasks.
         This class encapsulates detailed information about the model's performance, including correct
         predictions, misclassifications, unmatched_predictions (subset of false positives), and unmatched ground truths
@@ -674,7 +823,7 @@ class Metric(BaseMetric):
             A dictionary where each key is a prediction label value with no corresponding ground truth
             (subset of false positives). The value is a dictionary containing either a `count` or a list of
             `examples`. Each example includes annotation and datum identifers.
-        unmatched_ground_truths : dict
+        unmatched_groundtruths : dict
             A dictionary where each key is a ground truth label value for which the model failed to predict
             (subset of false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
             Each example includes annotation and datum identifers.
@@ -688,7 +837,7 @@ class Metric(BaseMetric):
         Metric
         """
         return cls(
-            type=MetricType.ConfusionMatrix.value,
+            type=MetricType.ConfusionMatrixWithExamples.value,
             value={
                 "confusion_matrix": confusion_matrix,
                 "unmatched_predictions": unmatched_predictions,

valor_lite/object_detection/shared.py ADDED Viewed

@@ -0,0 +1,206 @@
+from dataclasses import dataclass
+from pathlib import Path
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+from numpy.typing import NDArray
+from valor_lite.cache import FileCacheReader, MemoryCacheReader
+@dataclass
+class EvaluatorInfo:
+    number_of_datums: int = 0
+    number_of_groundtruth_annotations: int = 0
+    number_of_prediction_annotations: int = 0
+    number_of_labels: int = 0
+    number_of_rows: int = 0
+    metadata_fields: list[tuple[str, str | pa.DataType]] | None = None
+def generate_detailed_cache_path(path: str | Path) -> Path:
+    return Path(path) / "detailed"
+def generate_ranked_cache_path(path: str | Path) -> Path:
+    return Path(path) / "ranked"
+def generate_temporary_cache_path(path: str | Path) -> Path:
+    return Path(path) / "tmp"
+def generate_metadata_path(path: str | Path) -> Path:
+    return Path(path) / "metadata.json"
+def generate_detailed_schema(
+    metadata_fields: list[tuple[str, str | pa.DataType]] | None
+) -> pa.Schema:
+    metadata_fields = metadata_fields if metadata_fields else []
+    reserved_fields = [
+        ("datum_uid", pa.string()),
+        ("datum_id", pa.int64()),
+        # groundtruth
+        ("gt_uid", pa.string()),
+        ("gt_id", pa.int64()),
+        ("gt_label", pa.string()),
+        ("gt_label_id", pa.int64()),
+        # prediction
+        ("pd_uid", pa.string()),
+        ("pd_id", pa.int64()),
+        ("pd_label", pa.string()),
+        ("pd_label_id", pa.int64()),
+        ("pd_score", pa.float64()),
+        # pair
+        ("iou", pa.float64()),
+    ]
+    # validate
+    reserved_field_names = {f[0] for f in reserved_fields}
+    metadata_field_names = {f[0] for f in metadata_fields}
+    if conflicting := reserved_field_names & metadata_field_names:
+        raise ValueError(
+            f"metadata fields {conflicting} conflict with reserved fields"
+        )
+    return pa.schema(reserved_fields + metadata_fields)
+def generate_ranked_schema(
+    metadata_fields: list[tuple[str, str | pa.DataType]] | None
+) -> pa.Schema:
+    reserved_detailed_fields = [
+        ("datum_uid", pa.string()),
+        ("datum_id", pa.int64()),
+        # groundtruth
+        ("gt_id", pa.int64()),
+        ("gt_label_id", pa.int64()),
+        # prediction
+        ("pd_id", pa.int64()),
+        ("pd_label_id", pa.int64()),
+        ("pd_score", pa.float64()),
+        # pair
+        ("iou", pa.float64()),
+    ]
+    reserved_ranking_fields = [
+        ("iou_prev", pa.float64()),
+    ]
+    metadata_fields = metadata_fields if metadata_fields else []
+    # validate
+    reserved_field_names = {
+        f[0] for f in reserved_detailed_fields + reserved_ranking_fields
+    }
+    metadata_field_names = {f[0] for f in metadata_fields}
+    if conflicting := reserved_field_names & metadata_field_names:
+        raise ValueError(
+            f"metadata fields {conflicting} conflict with reserved fields"
+        )
+    return pa.schema(
+        [
+            *reserved_detailed_fields,
+            *metadata_fields,
+            *reserved_ranking_fields,
+        ]
+    )
+def encode_metadata_fields(
+    metadata_fields: list[tuple[str, str | pa.DataType]] | None
+) -> dict[str, str]:
+    metadata_fields = metadata_fields if metadata_fields else []
+    return {k: str(v) for k, v in metadata_fields}
+def decode_metadata_fields(
+    encoded_metadata_fields: dict[str, str]
+) -> list[tuple[str, str]]:
+    return [(k, v) for k, v in encoded_metadata_fields.items()]
+def extract_labels(
+    reader: MemoryCacheReader | FileCacheReader,
+    index_to_label_override: dict[int, str] | None = None,
+) -> dict[int, str]:
+    if index_to_label_override is not None:
+        return index_to_label_override
+    index_to_label = {}
+    for tbl in reader.iterate_tables(
+        columns=[
+            "gt_label_id",
+            "gt_label",
+            "pd_label_id",
+            "pd_label",
+        ]
+    ):
+        # get gt labels
+        gt_label_ids = tbl["gt_label_id"].to_numpy()
+        gt_label_ids, gt_indices = np.unique(gt_label_ids, return_index=True)
+        gt_labels = tbl["gt_label"].take(gt_indices).to_pylist()
+        gt_labels = dict(zip(gt_label_ids.astype(int).tolist(), gt_labels))
+        gt_labels.pop(-1, None)
+        index_to_label.update(gt_labels)
+        # get pd labels
+        pd_label_ids = tbl["pd_label_id"].to_numpy()
+        pd_label_ids, pd_indices = np.unique(pd_label_ids, return_index=True)
+        pd_labels = tbl["pd_label"].take(pd_indices).to_pylist()
+        pd_labels = dict(zip(pd_label_ids.astype(int).tolist(), pd_labels))
+        pd_labels.pop(-1, None)
+        index_to_label.update(pd_labels)
+    return index_to_label
+def extract_counts(
+    reader: MemoryCacheReader | FileCacheReader,
+    datums: pc.Expression | None = None,
+    groundtruths: pc.Expression | None = None,
+    predictions: pc.Expression | None = None,
+):
+    n_dts, n_gts, n_pds = 0, 0, 0
+    for tbl in reader.iterate_tables(filter=datums):
+        # count datums
+        n_dts += int(np.unique(tbl["datum_id"].to_numpy()).shape[0])
+        # count groundtruths
+        if groundtruths is not None:
+            gts = tbl.filter(groundtruths)["gt_id"].to_numpy()
+        else:
+            gts = tbl["gt_id"].to_numpy()
+        n_gts += int(np.unique(gts[gts >= 0]).shape[0])
+        # count predictions
+        if predictions is not None:
+            pds = tbl.filter(predictions)["pd_id"].to_numpy()
+        else:
+            pds = tbl["pd_id"].to_numpy()
+        n_pds += int(np.unique(pds[pds >= 0]).shape[0])
+    return n_dts, n_gts, n_pds
+def extract_groundtruth_count_per_label(
+    reader: MemoryCacheReader | FileCacheReader,
+    number_of_labels: int,
+    datums: pc.Expression | None = None,
+) -> NDArray[np.uint64]:
+    gt_counts_per_lbl = np.zeros(number_of_labels, dtype=np.uint64)
+    for gts in reader.iterate_arrays(
+        numeric_columns=["gt_id", "gt_label_id"],
+        filter=datums,
+    ):
+        # count gts per label
+        unique_ann = np.unique(gts[gts[:, 0] >= 0], axis=0)
+        unique_labels, label_counts = np.unique(
+            unique_ann[:, 1], return_counts=True
+        )
+        for label_id, count in zip(unique_labels, label_counts):
+            gt_counts_per_lbl[int(label_id)] += int(count)
+    return gt_counts_per_lbl

valor-lite 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl

valor-lite 0.36.5py3-none-any.whl → 0.37.5py3-none-any.whl