PyPI - valor-lite - Versions diffs - 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl - Mend

valor-lite 0.36.5py3-none-any.whl → 0.37.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

valor_lite/cache/__init__.py +11 -0
valor_lite/cache/compute.py +211 -0
valor_lite/cache/ephemeral.py +302 -0
valor_lite/cache/persistent.py +536 -0
valor_lite/classification/__init__.py +5 -10
valor_lite/classification/annotation.py +4 -0
valor_lite/classification/computation.py +233 -251
valor_lite/classification/evaluator.py +882 -0
valor_lite/classification/loader.py +97 -0
valor_lite/classification/metric.py +141 -4
valor_lite/classification/shared.py +184 -0
valor_lite/classification/utilities.py +221 -118
valor_lite/exceptions.py +5 -0
valor_lite/object_detection/__init__.py +5 -4
valor_lite/object_detection/annotation.py +13 -1
valor_lite/object_detection/computation.py +367 -304
valor_lite/object_detection/evaluator.py +804 -0
valor_lite/object_detection/loader.py +292 -0
valor_lite/object_detection/metric.py +152 -3
valor_lite/object_detection/shared.py +206 -0
valor_lite/object_detection/utilities.py +182 -109
valor_lite/semantic_segmentation/__init__.py +5 -4
valor_lite/semantic_segmentation/annotation.py +7 -0
valor_lite/semantic_segmentation/computation.py +20 -110
valor_lite/semantic_segmentation/evaluator.py +414 -0
valor_lite/semantic_segmentation/loader.py +205 -0
valor_lite/semantic_segmentation/shared.py +149 -0
valor_lite/semantic_segmentation/utilities.py +6 -23
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
valor_lite-0.37.5.dist-info/RECORD +49 -0
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
valor_lite/classification/manager.py +0 -545
valor_lite/object_detection/manager.py +0 -865
valor_lite/profiling.py +0 -374
valor_lite/semantic_segmentation/benchmark.py +0 -237
valor_lite/semantic_segmentation/manager.py +0 -446
valor_lite-0.36.5.dist-info/RECORD +0 -41
{valor_lite-0.36.5.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0

valor_lite/semantic_segmentation/computation.py CHANGED Viewed

@@ -2,93 +2,13 @@ import numpy as np
 from numpy.typing import NDArray
-def compute_label_metadata(
-    confusion_matrices: NDArray[np.int64],
-    n_labels: int,
-) -> NDArray[np.int64]:
-    """
-    Computes label metadata returning a count of annotations per label.
-    Parameters
-    ----------
-    confusion_matrices : NDArray[np.int64]
-        Confusion matrices per datum with shape (n_datums, n_labels + 1, n_labels + 1).
-    n_labels : int
-        The total number of unique labels.
-    Returns
-    -------
-    NDArray[np.int64]
-        The label metadata array with shape (n_labels, 2).
-            Index 0 - Ground truth label count
-            Index 1 - Prediction label count
-    """
-    label_metadata = np.zeros((n_labels, 2), dtype=np.int64)
-    label_metadata[:, 0] = confusion_matrices[:, 1:, :].sum(axis=(0, 2))
-    label_metadata[:, 1] = confusion_matrices[:, :, 1:].sum(axis=(0, 1))
-    return label_metadata
-def filter_cache(
-    confusion_matrices: NDArray[np.int64],
-    datum_mask: NDArray[np.bool_],
-    label_mask: NDArray[np.bool_],
-    number_of_labels: int,
-) -> tuple[NDArray[np.int64], NDArray[np.int64]]:
-    """
-    Performs the filter operation over the internal cache.
-    Parameters
-    ----------
-    confusion_matrices : NDArray[int64]
-        The internal evaluator cache.
-    datum_mask : NDArray[bool]
-        A mask that filters out datums.
-    datum_mask : NDArray[bool]
-        A mask that filters out labels.
-    Returns
-    -------
-    NDArray[int64]
-        Filtered confusion matrices.
-    NDArray[int64]
-        Filtered label metadata.
-    """
-    if label_mask.any():
-        # add filtered labels to background
-        null_predictions = confusion_matrices[:, label_mask, :].sum(
-            axis=(1, 2)
-        )
-        null_groundtruths = confusion_matrices[:, :, label_mask].sum(
-            axis=(1, 2)
-        )
-        null_intersection = (
-            confusion_matrices[:, label_mask, label_mask]
-            .reshape(confusion_matrices.shape[0], -1)
-            .sum(axis=1)
-        )
-        confusion_matrices[:, 0, 0] += (
-            null_groundtruths + null_predictions - null_intersection
-        )
-        confusion_matrices[:, label_mask, :] = 0
-        confusion_matrices[:, :, label_mask] = 0
-    confusion_matrices = confusion_matrices[datum_mask]
-    label_metadata = compute_label_metadata(
-        confusion_matrices=confusion_matrices,
-        n_labels=number_of_labels,
-    )
-    return confusion_matrices, label_metadata
-def compute_intermediate_confusion_matrices(
+def compute_intermediates(
     groundtruths: NDArray[np.bool_],
     predictions: NDArray[np.bool_],
     groundtruth_labels: NDArray[np.int64],
     prediction_labels: NDArray[np.int64],
     n_labels: int,
-) -> NDArray[np.int64]:
+) -> NDArray[np.uint64]:
     """
     Computes an intermediate confusion matrix containing label counts.
@@ -99,15 +19,15 @@ def compute_intermediate_confusion_matrices(
     predictions : NDArray[np.bool_]
         A 2-D array containing flattened bitmasks for each label.
     groundtruth_labels : NDArray[np.int64]
-        A 1-D array containing label indices.
-    groundtruth_labels : NDArray[np.int64]
-        A 1-D array containing label indices.
+        A 1-D array containing ground truth label indices.
+    prediction_labels : NDArray[np.int64]
+        A 1-D array containing prediction label indices.
     n_labels : int
         The number of unique labels.
     Returns
     -------
-    NDArray[np.int64]
+    NDArray[np.uint64]
         A 2-D confusion matrix with shape (n_labels + 1, n_labels + 1).
     """
@@ -125,7 +45,7 @@ def compute_intermediate_confusion_matrices(
     intersected_groundtruth_counts = intersection_counts.sum(axis=1)
     intersected_prediction_counts = intersection_counts.sum(axis=0)
-    confusion_matrix = np.zeros((n_labels + 1, n_labels + 1), dtype=np.int64)
+    confusion_matrix = np.zeros((n_labels + 1, n_labels + 1), dtype=np.uint64)
     confusion_matrix[0, 0] = background_counts
     confusion_matrix[
         np.ix_(groundtruth_labels + 1, prediction_labels + 1)
@@ -136,14 +56,11 @@ def compute_intermediate_confusion_matrices(
     confusion_matrix[groundtruth_labels + 1, 0] = (
         groundtruth_counts - intersected_groundtruth_counts
     )
     return confusion_matrix
 def compute_metrics(
-    confusion_matrices: NDArray[np.int64],
-    label_metadata: NDArray[np.int64],
-    n_pixels: int,
+    confusion_matrix: NDArray[np.uint64],
 ) -> tuple[
     NDArray[np.float64],
     NDArray[np.float64],
@@ -156,16 +73,10 @@ def compute_metrics(
     """
     Computes semantic segmentation metrics.
-    Takes data with shape (3, N).
     Parameters
     ----------
-    confusion_matrices : NDArray[np.int64]
-        A 3-D array containing confusion matrices for each datum with shape (n_datums, n_labels + 1, n_labels + 1).
-    label_metadata : NDArray[np.int64]
-        A 2-D array containing label metadata with shape (n_labels, 2).
-            Index 0: Ground Truth Label Count
-            Index 1: Prediction Label Count
+    counts : NDArray[np.uint64]
+        A 2-D confusion matrix with shape (n_labels + 1, n_labels + 1).
     Returns
     -------
@@ -184,14 +95,13 @@ def compute_metrics(
     NDArray[np.float64]
         Unmatched ground truth ratios.
     """
-    n_labels = label_metadata.shape[0]
-    gt_counts = label_metadata[:, 0]
-    pd_counts = label_metadata[:, 1]
-    counts = confusion_matrices.sum(axis=0)
+    n_labels = confusion_matrix.shape[0] - 1
+    n_pixels = confusion_matrix.sum()
+    gt_counts = confusion_matrix[1:, :].sum(axis=1)
+    pd_counts = confusion_matrix[:, 1:].sum(axis=0)
     # compute iou, unmatched_ground_truth and unmatched predictions
-    intersection_ = counts[1:, 1:]
+    intersection_ = confusion_matrix[1:, 1:]
     union_ = (
         gt_counts[:, np.newaxis] + pd_counts[np.newaxis, :] - intersection_
     )
@@ -206,7 +116,7 @@ def compute_metrics(
     unmatched_prediction_ratio = np.zeros((n_labels), dtype=np.float64)
     np.divide(
-        counts[0, 1:],
+        confusion_matrix[0, 1:],
         pd_counts,
         where=pd_counts > 1e-9,
         out=unmatched_prediction_ratio,
@@ -214,14 +124,14 @@ def compute_metrics(
     unmatched_ground_truth_ratio = np.zeros((n_labels), dtype=np.float64)
     np.divide(
-        counts[1:, 0],
+        confusion_matrix[1:, 0],
         gt_counts,
         where=gt_counts > 1e-9,
         out=unmatched_ground_truth_ratio,
     )
     # compute precision, recall, f1
-    tp_counts = counts.diagonal()[1:]
+    tp_counts = confusion_matrix.diagonal()[1:]
     precision = np.zeros(n_labels, dtype=np.float64)
     np.divide(tp_counts, pd_counts, where=pd_counts > 1e-9, out=precision)
@@ -238,8 +148,8 @@ def compute_metrics(
     )
     # compute accuracy
-    tp_count = counts[1:, 1:].diagonal().sum()
-    background_count = counts[0, 0]
+    tp_count = confusion_matrix[1:, 1:].diagonal().sum()
+    background_count = confusion_matrix[0, 0]
     accuracy = (
         (tp_count + background_count) / n_pixels if n_pixels > 0 else 0.0
     )

valor_lite/semantic_segmentation/evaluator.py ADDED Viewed

@@ -0,0 +1,414 @@
+from __future__ import annotations
+import json
+from pathlib import Path
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+from numpy.typing import NDArray
+from valor_lite.cache import (
+    FileCacheReader,
+    FileCacheWriter,
+    MemoryCacheReader,
+    MemoryCacheWriter,
+)
+from valor_lite.exceptions import EmptyCacheError
+from valor_lite.semantic_segmentation.computation import compute_metrics
+from valor_lite.semantic_segmentation.metric import MetricType
+from valor_lite.semantic_segmentation.shared import (
+    EvaluatorInfo,
+    decode_metadata_fields,
+    encode_metadata_fields,
+    extract_counts,
+    extract_labels,
+    generate_cache_path,
+    generate_metadata_path,
+    generate_schema,
+)
+from valor_lite.semantic_segmentation.utilities import (
+    unpack_precision_recall_iou_into_metric_lists,
+)
+class Builder:
+    def __init__(
+        self,
+        writer: MemoryCacheWriter | FileCacheWriter,
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        self._writer = writer
+        self._metadata_fields = metadata_fields
+    @classmethod
+    def in_memory(
+        cls,
+        batch_size: int = 10_000,
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        """
+        Create an in-memory evaluator cache.
+        Parameters
+        ----------
+        batch_size : int, default=10_000
+            The target number of rows to buffer before writing to the cache. Defaults to 10_000.
+        metadata_fields : list[tuple[str, str | pa.DataType]], optional
+            Optional metadata field definitions.
+        """
+        # create cache
+        writer = MemoryCacheWriter.create(
+            schema=generate_schema(metadata_fields),
+            batch_size=batch_size,
+        )
+        return cls(
+            writer=writer,
+            metadata_fields=metadata_fields,
+        )
+    @classmethod
+    def persistent(
+        cls,
+        path: str | Path,
+        batch_size: int = 10_000,
+        rows_per_file: int = 100_000,
+        compression: str = "snappy",
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        """
+        Create a persistent file-based evaluator cache.
+        Parameters
+        ----------
+        path : str | Path
+            Where to store the file-based cache.
+        batch_size : int, default=10_000
+            The target number of rows to buffer before writing to the cache. Defaults to 10_000.
+        rows_per_file : int, default=100_000
+            The target number of rows to store per cache file. Defaults to 100_000.
+        compression : str, default="snappy"
+            The compression methods used when writing cache files.
+        metadata_fields : list[tuple[str, str | pa.DataType]], optional
+            Optional metadata field definitions.
+        """
+        path = Path(path)
+        # create cache
+        writer = FileCacheWriter.create(
+            path=generate_cache_path(path),
+            schema=generate_schema(metadata_fields),
+            batch_size=batch_size,
+            rows_per_file=rows_per_file,
+            compression=compression,
+        )
+        # write metadata
+        metadata_path = generate_metadata_path(path)
+        with open(metadata_path, "w") as f:
+            encoded_types = encode_metadata_fields(metadata_fields)
+            json.dump(encoded_types, f, indent=2)
+        return cls(
+            writer=writer,
+            metadata_fields=metadata_fields,
+        )
+    def finalize(
+        self,
+        index_to_label_override: dict[int, str] | None = None,
+    ):
+        """
+        Performs data finalization and some preprocessing steps.
+        Parameters
+        ----------
+        index_to_label_override : dict[int, str], optional
+            Pre-configures label mapping. Used when operating over filtered subsets.
+        Returns
+        -------
+        Evaluator
+            A ready-to-use evaluator object.
+        """
+        self._writer.flush()
+        if self._writer.count_rows() == 0:
+            raise EmptyCacheError()
+        reader = self._writer.to_reader()
+        # extract labels
+        index_to_label = extract_labels(
+            reader=reader,
+            index_to_label_override=index_to_label_override,
+        )
+        return Evaluator(
+            reader=reader,
+            index_to_label=index_to_label,
+            metadata_fields=self._metadata_fields,
+        )
+class Evaluator:
+    def __init__(
+        self,
+        reader: MemoryCacheReader | FileCacheReader,
+        index_to_label: dict[int, str],
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        self._reader = reader
+        self._index_to_label = index_to_label
+        self._metadata_fields = metadata_fields
+    @property
+    def info(self) -> EvaluatorInfo:
+        return self.get_info()
+    def get_info(
+        self,
+        datums: pc.Expression | None = None,
+        groundtruths: pc.Expression | None = None,
+        predictions: pc.Expression | None = None,
+    ) -> EvaluatorInfo:
+        info = EvaluatorInfo()
+        info.number_of_rows = self._reader.count_rows()
+        info.number_of_labels = len(self._index_to_label)
+        info.metadata_fields = self._metadata_fields
+        (
+            info.number_of_datums,
+            info.number_of_pixels,
+            info.number_of_groundtruth_pixels,
+            info.number_of_prediction_pixels,
+        ) = extract_counts(
+            reader=self._reader,
+            datums=datums,
+            groundtruths=groundtruths,
+            predictions=predictions,
+        )
+        return info
+    @classmethod
+    def load(
+        cls,
+        path: str | Path,
+        index_to_label_override: dict[int, str] | None = None,
+    ):
+        """
+        Load from an existing semantic segmentation cache.
+        Parameters
+        ----------
+        path : str | Path
+            Path to the existing cache.
+        index_to_label_override : dict[int, str], optional
+            Option to preset index to label dictionary. Used when loading from filtered caches.
+        """
+        # validate path
+        path = Path(path)
+        if not path.exists():
+            raise FileNotFoundError(f"Directory does not exist: {path}")
+        elif not path.is_dir():
+            raise NotADirectoryError(
+                f"Path exists but is not a directory: {path}"
+            )
+        # load cache
+        reader = FileCacheReader.load(generate_cache_path(path))
+        # extract labels
+        index_to_label = extract_labels(
+            reader=reader,
+            index_to_label_override=index_to_label_override,
+        )
+        # read config
+        metadata_path = generate_metadata_path(path)
+        metadata_fields = None
+        with open(metadata_path, "r") as f:
+            metadata_types = json.load(f)
+            metadata_fields = decode_metadata_fields(metadata_types)
+        return cls(
+            reader=reader,
+            index_to_label=index_to_label,
+            metadata_fields=metadata_fields,
+        )
+    def filter(
+        self,
+        datums: pc.Expression | None = None,
+        groundtruths: pc.Expression | None = None,
+        predictions: pc.Expression | None = None,
+        path: str | Path | None = None,
+    ) -> Evaluator:
+        """
+        Filter evaluator cache.
+        Parameters
+        ----------
+        datums : pc.Expression | None = None
+            A filter expression used to filter datums.
+        groundtruths : pc.Expression | None = None
+            A filter expression used to filter ground truth annotations.
+        predictions : pc.Expression | None = None
+            A filter expression used to filter predictions.
+        path : str | Path, optional
+            Where to store the filtered cache if storing on disk.
+        Returns
+        -------
+        Evaluator
+            A new evaluator object containing the filtered cache.
+        """
+        if isinstance(self._reader, FileCacheReader):
+            if not path:
+                raise ValueError(
+                    "expected path to be defined for file-based cache"
+                )
+            builder = Builder.persistent(
+                path=path,
+                batch_size=self._reader.batch_size,
+                rows_per_file=self._reader.rows_per_file,
+                compression=self._reader.compression,
+                metadata_fields=self.info.metadata_fields,
+            )
+        else:
+            builder = Builder.in_memory(
+                batch_size=self._reader.batch_size,
+                metadata_fields=self.info.metadata_fields,
+            )
+        for tbl in self._reader.iterate_tables(filter=datums):
+            columns = (
+                "datum_id",
+                "gt_label_id",
+                "pd_label_id",
+            )
+            pairs = np.column_stack([tbl[col].to_numpy() for col in columns])
+            n_pairs = pairs.shape[0]
+            gt_ids = pairs[:, (0, 1)].astype(np.int64)
+            pd_ids = pairs[:, (0, 2)].astype(np.int64)
+            if groundtruths is not None:
+                mask_valid_gt = np.zeros(n_pairs, dtype=np.bool_)
+                gt_tbl = tbl.filter(groundtruths)
+                gt_pairs = np.column_stack(
+                    [
+                        gt_tbl[col].to_numpy()
+                        for col in ("datum_id", "gt_label_id")
+                    ]
+                ).astype(np.int64)
+                for gt in np.unique(gt_pairs, axis=0):
+                    mask_valid_gt |= (gt_ids == gt).all(axis=1)
+            else:
+                mask_valid_gt = np.ones(n_pairs, dtype=np.bool_)
+            if predictions is not None:
+                mask_valid_pd = np.zeros(n_pairs, dtype=np.bool_)
+                pd_tbl = tbl.filter(predictions)
+                pd_pairs = np.column_stack(
+                    [
+                        pd_tbl[col].to_numpy()
+                        for col in ("datum_id", "pd_label_id")
+                    ]
+                ).astype(np.int64)
+                for pd in np.unique(pd_pairs, axis=0):
+                    mask_valid_pd |= (pd_ids == pd).all(axis=1)
+            else:
+                mask_valid_pd = np.ones(n_pairs, dtype=np.bool_)
+            mask_valid = mask_valid_gt | mask_valid_pd
+            mask_valid_gt &= mask_valid
+            mask_valid_pd &= mask_valid
+            pairs[~mask_valid_gt, 1] = -1
+            pairs[~mask_valid_pd, 2] = -1
+            for idx, col in enumerate(columns):
+                tbl = tbl.set_column(
+                    tbl.schema.names.index(col), col, pa.array(pairs[:, idx])
+                )
+            builder._writer.write_table(tbl)
+        return builder.finalize(index_to_label_override=self._index_to_label)
+    def _compute_confusion_matrix_intermediate(
+        self, datums: pc.Expression | None = None
+    ) -> NDArray[np.uint64]:
+        """
+        Performs an evaluation and returns metrics.
+        Parameters
+        ----------
+        datums : pyarrow.compute.Expression, optional
+            Option to filter datums by an expression.
+        Returns
+        -------
+        dict[MetricType, list]
+            A dictionary mapping MetricType enumerations to lists of computed metrics.
+        """
+        n_labels = len(self._index_to_label)
+        confusion_matrix = np.zeros(
+            (n_labels + 1, n_labels + 1), dtype=np.uint64
+        )
+        for tbl in self._reader.iterate_tables(filter=datums):
+            columns = (
+                "datum_id",
+                "gt_label_id",
+                "pd_label_id",
+            )
+            ids = np.column_stack(
+                [tbl[col].to_numpy() for col in columns]
+            ).astype(np.int64)
+            counts = tbl["count"].to_numpy()
+            mask_null_gts = ids[:, 1] == -1
+            mask_null_pds = ids[:, 2] == -1
+            confusion_matrix[0, 0] += counts[
+                mask_null_gts & mask_null_pds
+            ].sum()
+            for idx in range(n_labels):
+                mask_gts = ids[:, 1] == idx
+                for pidx in range(n_labels):
+                    mask_pds = ids[:, 2] == pidx
+                    confusion_matrix[idx + 1, pidx + 1] += counts[
+                        mask_gts & mask_pds
+                    ].sum()
+                mask_unmatched_gts = mask_gts & mask_null_pds
+                confusion_matrix[idx + 1, 0] += counts[
+                    mask_unmatched_gts
+                ].sum()
+                mask_unmatched_pds = mask_null_gts & (ids[:, 2] == idx)
+                confusion_matrix[0, idx + 1] += counts[
+                    mask_unmatched_pds
+                ].sum()
+        return confusion_matrix
+    def compute_precision_recall_iou(
+        self, datums: pc.Expression | None = None
+    ) -> dict[MetricType, list]:
+        """
+        Performs an evaluation and returns metrics.
+        Parameters
+        ----------
+        datums : pyarrow.compute.Expression, optional
+            Option to filter datums by an expression.
+        Returns
+        -------
+        dict[MetricType, list]
+            A dictionary mapping MetricType enumerations to lists of computed metrics.
+        """
+        confusion_matrix = self._compute_confusion_matrix_intermediate(
+            datums=datums
+        )
+        results = compute_metrics(confusion_matrix=confusion_matrix)
+        return unpack_precision_recall_iou_into_metric_lists(
+            results=results,
+            index_to_label=self._index_to_label,
+        )

valor-lite 0.36.5__py3-none-any.whl → 0.37.5__py3-none-any.whl

valor-lite 0.36.5py3-none-any.whl → 0.37.5py3-none-any.whl