PyPI - valor-lite - Versions diffs - 0.37.1__py3-none-any.whl - Mend

valor-lite 0.37.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of valor-lite might be problematic. Click here for more details.

Files changed (49) hide show

valor_lite/LICENSE +21 -0
valor_lite/__init__.py +0 -0
valor_lite/cache/__init__.py +11 -0
valor_lite/cache/compute.py +154 -0
valor_lite/cache/ephemeral.py +302 -0
valor_lite/cache/persistent.py +529 -0
valor_lite/classification/__init__.py +14 -0
valor_lite/classification/annotation.py +45 -0
valor_lite/classification/computation.py +378 -0
valor_lite/classification/evaluator.py +879 -0
valor_lite/classification/loader.py +97 -0
valor_lite/classification/metric.py +535 -0
valor_lite/classification/numpy_compatibility.py +13 -0
valor_lite/classification/shared.py +184 -0
valor_lite/classification/utilities.py +314 -0
valor_lite/exceptions.py +20 -0
valor_lite/object_detection/__init__.py +17 -0
valor_lite/object_detection/annotation.py +238 -0
valor_lite/object_detection/computation.py +841 -0
valor_lite/object_detection/evaluator.py +805 -0
valor_lite/object_detection/loader.py +292 -0
valor_lite/object_detection/metric.py +850 -0
valor_lite/object_detection/shared.py +185 -0
valor_lite/object_detection/utilities.py +396 -0
valor_lite/schemas.py +11 -0
valor_lite/semantic_segmentation/__init__.py +15 -0
valor_lite/semantic_segmentation/annotation.py +123 -0
valor_lite/semantic_segmentation/computation.py +165 -0
valor_lite/semantic_segmentation/evaluator.py +414 -0
valor_lite/semantic_segmentation/loader.py +205 -0
valor_lite/semantic_segmentation/metric.py +275 -0
valor_lite/semantic_segmentation/shared.py +149 -0
valor_lite/semantic_segmentation/utilities.py +88 -0
valor_lite/text_generation/__init__.py +15 -0
valor_lite/text_generation/annotation.py +56 -0
valor_lite/text_generation/computation.py +611 -0
valor_lite/text_generation/llm/__init__.py +0 -0
valor_lite/text_generation/llm/exceptions.py +14 -0
valor_lite/text_generation/llm/generation.py +903 -0
valor_lite/text_generation/llm/instructions.py +814 -0
valor_lite/text_generation/llm/integrations.py +226 -0
valor_lite/text_generation/llm/utilities.py +43 -0
valor_lite/text_generation/llm/validators.py +68 -0
valor_lite/text_generation/manager.py +697 -0
valor_lite/text_generation/metric.py +381 -0
valor_lite-0.37.1.dist-info/METADATA +174 -0
valor_lite-0.37.1.dist-info/RECORD +49 -0
valor_lite-0.37.1.dist-info/WHEEL +5 -0
valor_lite-0.37.1.dist-info/top_level.txt +1 -0

valor_lite/semantic_segmentation/annotation.py ADDED Viewed

@@ -0,0 +1,123 @@
+import warnings
+from dataclasses import dataclass, field
+from typing import Any
+import numpy as np
+from numpy.typing import NDArray
+@dataclass
+class Bitmask:
+    """
+    Represents a binary mask with an associated semantic label.
+    Parameters
+    ----------
+    mask : NDArray[np.bool_]
+        A NumPy array of boolean values representing the mask.
+    label : str
+        The semantic label associated with the mask.
+    metadata : dict[str, Any], optional
+        A dictionary containing any metadata to be used within filtering operations.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> mask = np.array([[True, False], [False, True]], dtype=np.bool_)
+    >>> bitmask = Bitmask(mask=mask, label='ocean')
+    """
+    mask: NDArray[np.bool_]
+    label: str
+    metadata: dict[str, Any] | None = None
+    def __post_init__(self):
+        if self.mask.dtype != np.bool_:
+            raise ValueError(
+                f"Bitmask recieved mask with dtype '{self.mask.dtype}'."
+            )
+@dataclass
+class Segmentation:
+    """
+    Segmentation data structure holding ground truth and prediction bitmasks for semantic segmentation tasks.
+    Parameters
+    ----------
+    uid : str
+        Unique identifier for the image or sample.
+    groundtruths : List[Bitmask]
+        List of ground truth bitmasks.
+    predictions : List[Bitmask]
+        List of predicted bitmasks.
+    shape : tuple of int, optional
+        The shape of the segmentation masks. This is set automatically after initialization.
+    size : int, optional
+        The total number of pixels in the masks. This is set automatically after initialization.
+    metadata : dict[str, Any], optional
+        A dictionary containing any metadata to be used within filtering operations.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> mask1 = np.array([[True, False], [False, True]], dtype=np.bool_)
+    >>> groundtruth = Bitmask(mask=mask1, label='object')
+    >>> mask2 = np.array([[False, True], [True, False]], dtype=np.bool_)
+    >>> prediction = Bitmask(mask=mask2, label='object')
+    >>> segmentation = Segmentation(
+    ...     uid='123',
+    ...     groundtruths=[groundtruth],
+    ...     predictions=[prediction]
+    ... )
+    """
+    uid: str
+    groundtruths: list[Bitmask]
+    predictions: list[Bitmask]
+    shape: tuple[int, ...]
+    size: int = field(default=0)
+    metadata: dict[str, Any] | None = None
+    def __post_init__(self):
+        if len(self.shape) != 2 or self.shape[0] <= 0 or self.shape[1] <= 0:
+            raise ValueError(
+                f"segmentations must be 2-dimensional and have non-zero dimensions. Recieved shape '{self.shape}'"
+            )
+        self.size = self.shape[0] * self.shape[1]
+        self._validate_bitmasks(self.groundtruths, "ground truth")
+        self._validate_bitmasks(self.predictions, "prediction")
+    def _validate_bitmasks(self, bitmasks: list[Bitmask], key: str):
+        mask_accumulation = None
+        mask_overlap_accumulation = None
+        for idx, bitmask in enumerate(bitmasks):
+            if not isinstance(bitmask, Bitmask):
+                raise ValueError(f"expected 'Bitmask', got '{bitmask}'")
+            if self.shape != bitmask.mask.shape:
+                raise ValueError(
+                    f"{key} masks for datum '{self.uid}' should have shape '{self.shape}'. Received mask with shape '{bitmask.mask.shape}'"
+                )
+            if mask_accumulation is None:
+                mask_accumulation = bitmask.mask.copy()
+                mask_overlap_accumulation = np.zeros_like(mask_accumulation)
+            elif np.logical_and(mask_accumulation, bitmask.mask).any():
+                mask_overlap = np.logical_and(mask_accumulation, bitmask.mask)
+                bitmasks[idx].mask[mask_overlap] = False
+                mask_overlap_accumulation = (
+                    mask_overlap_accumulation | mask_overlap
+                )
+            else:
+                mask_accumulation = mask_accumulation | bitmask.mask
+        if (
+            mask_overlap_accumulation is not None
+            and mask_overlap_accumulation.any()
+        ):
+            count = mask_overlap_accumulation.sum()
+            total = mask_overlap_accumulation.size
+            warnings.warn(
+                f"{key} masks for datum '{self.uid}' had {count} / {total} pixels overlapped."
+            )

valor_lite/semantic_segmentation/computation.py ADDED Viewed

@@ -0,0 +1,165 @@
+import numpy as np
+from numpy.typing import NDArray
+def compute_intermediates(
+    groundtruths: NDArray[np.bool_],
+    predictions: NDArray[np.bool_],
+    groundtruth_labels: NDArray[np.int64],
+    prediction_labels: NDArray[np.int64],
+    n_labels: int,
+) -> NDArray[np.uint64]:
+    """
+    Computes an intermediate confusion matrix containing label counts.
+    Parameters
+    ----------
+    groundtruths : NDArray[np.bool_]
+        A 2-D array containing flattened bitmasks for each label.
+    predictions : NDArray[np.bool_]
+        A 2-D array containing flattened bitmasks for each label.
+    groundtruth_labels : NDArray[np.int64]
+        A 1-D array containing ground truth label indices.
+    prediction_labels : NDArray[np.int64]
+        A 1-D array containing prediction label indices.
+    n_labels : int
+        The number of unique labels.
+    Returns
+    -------
+    NDArray[np.uint64]
+        A 2-D confusion matrix with shape (n_labels + 1, n_labels + 1).
+    """
+    groundtruth_counts = groundtruths.sum(axis=1)
+    prediction_counts = predictions.sum(axis=1)
+    background_counts = np.logical_not(
+        groundtruths.any(axis=0) | predictions.any(axis=0)
+    ).sum()
+    intersection_counts = np.logical_and(
+        groundtruths[:, None, :],
+        predictions[None, :, :],
+    ).sum(axis=2)
+    intersected_groundtruth_counts = intersection_counts.sum(axis=1)
+    intersected_prediction_counts = intersection_counts.sum(axis=0)
+    confusion_matrix = np.zeros((n_labels + 1, n_labels + 1), dtype=np.uint64)
+    confusion_matrix[0, 0] = background_counts
+    confusion_matrix[
+        np.ix_(groundtruth_labels + 1, prediction_labels + 1)
+    ] = intersection_counts
+    confusion_matrix[0, prediction_labels + 1] = (
+        prediction_counts - intersected_prediction_counts
+    )
+    confusion_matrix[groundtruth_labels + 1, 0] = (
+        groundtruth_counts - intersected_groundtruth_counts
+    )
+    return confusion_matrix
+def compute_metrics(
+    confusion_matrix: NDArray[np.uint64],
+) -> tuple[
+    NDArray[np.float64],
+    NDArray[np.float64],
+    NDArray[np.float64],
+    float,
+    NDArray[np.float64],
+    NDArray[np.float64],
+    NDArray[np.float64],
+]:
+    """
+    Computes semantic segmentation metrics.
+    Parameters
+    ----------
+    counts : NDArray[np.uint64]
+        A 2-D confusion matrix with shape (n_labels + 1, n_labels + 1).
+    Returns
+    -------
+    NDArray[np.float64]
+        Precision.
+    NDArray[np.float64]
+        Recall.
+    NDArray[np.float64]
+        F1 Score.
+    float
+        Accuracy
+    NDArray[np.float64]
+        Confusion matrix containing IOU values.
+    NDArray[np.float64]
+        Unmatched prediction ratios.
+    NDArray[np.float64]
+        Unmatched ground truth ratios.
+    """
+    n_labels = confusion_matrix.shape[0] - 1
+    n_pixels = confusion_matrix.sum()
+    gt_counts = confusion_matrix[1:, :].sum(axis=1)
+    pd_counts = confusion_matrix[:, 1:].sum(axis=0)
+    # compute iou, unmatched_ground_truth and unmatched predictions
+    intersection_ = confusion_matrix[1:, 1:]
+    union_ = (
+        gt_counts[:, np.newaxis] + pd_counts[np.newaxis, :] - intersection_
+    )
+    ious = np.zeros((n_labels, n_labels), dtype=np.float64)
+    np.divide(
+        intersection_,
+        union_,
+        where=union_ > 1e-9,
+        out=ious,
+    )
+    unmatched_prediction_ratio = np.zeros((n_labels), dtype=np.float64)
+    np.divide(
+        confusion_matrix[0, 1:],
+        pd_counts,
+        where=pd_counts > 1e-9,
+        out=unmatched_prediction_ratio,
+    )
+    unmatched_ground_truth_ratio = np.zeros((n_labels), dtype=np.float64)
+    np.divide(
+        confusion_matrix[1:, 0],
+        gt_counts,
+        where=gt_counts > 1e-9,
+        out=unmatched_ground_truth_ratio,
+    )
+    # compute precision, recall, f1
+    tp_counts = confusion_matrix.diagonal()[1:]
+    precision = np.zeros(n_labels, dtype=np.float64)
+    np.divide(tp_counts, pd_counts, where=pd_counts > 1e-9, out=precision)
+    recall = np.zeros_like(precision)
+    np.divide(tp_counts, gt_counts, where=gt_counts > 1e-9, out=recall)
+    f1_score = np.zeros_like(precision)
+    np.divide(
+        2 * (precision * recall),
+        (precision + recall),
+        where=(precision + recall) > 0,
+        out=f1_score,
+    )
+    # compute accuracy
+    tp_count = confusion_matrix[1:, 1:].diagonal().sum()
+    background_count = confusion_matrix[0, 0]
+    accuracy = (
+        (tp_count + background_count) / n_pixels if n_pixels > 0 else 0.0
+    )
+    return (
+        precision,
+        recall,
+        f1_score,
+        accuracy,
+        ious,
+        unmatched_prediction_ratio,
+        unmatched_ground_truth_ratio,
+    )

valor_lite/semantic_segmentation/evaluator.py ADDED Viewed

@@ -0,0 +1,414 @@
+from __future__ import annotations
+import json
+from pathlib import Path
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+from numpy.typing import NDArray
+from valor_lite.cache import (
+    FileCacheReader,
+    FileCacheWriter,
+    MemoryCacheReader,
+    MemoryCacheWriter,
+)
+from valor_lite.exceptions import EmptyCacheError
+from valor_lite.semantic_segmentation.computation import compute_metrics
+from valor_lite.semantic_segmentation.metric import MetricType
+from valor_lite.semantic_segmentation.shared import (
+    EvaluatorInfo,
+    decode_metadata_fields,
+    encode_metadata_fields,
+    extract_counts,
+    extract_labels,
+    generate_cache_path,
+    generate_metadata_path,
+    generate_schema,
+)
+from valor_lite.semantic_segmentation.utilities import (
+    unpack_precision_recall_iou_into_metric_lists,
+)
+class Builder:
+    def __init__(
+        self,
+        writer: MemoryCacheWriter | FileCacheWriter,
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        self._writer = writer
+        self._metadata_fields = metadata_fields
+    @classmethod
+    def in_memory(
+        cls,
+        batch_size: int = 10_000,
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        """
+        Create an in-memory evaluator cache.
+        Parameters
+        ----------
+        batch_size : int, default=10_000
+            The target number of rows to buffer before writing to the cache. Defaults to 10_000.
+        metadata_fields : list[tuple[str, str | pa.DataType]], optional
+            Optional metadata field definitions.
+        """
+        # create cache
+        writer = MemoryCacheWriter.create(
+            schema=generate_schema(metadata_fields),
+            batch_size=batch_size,
+        )
+        return cls(
+            writer=writer,
+            metadata_fields=metadata_fields,
+        )
+    @classmethod
+    def persistent(
+        cls,
+        path: str | Path,
+        batch_size: int = 10_000,
+        rows_per_file: int = 100_000,
+        compression: str = "snappy",
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        """
+        Create a persistent file-based evaluator cache.
+        Parameters
+        ----------
+        path : str | Path
+            Where to store the file-based cache.
+        batch_size : int, default=10_000
+            The target number of rows to buffer before writing to the cache. Defaults to 10_000.
+        rows_per_file : int, default=100_000
+            The target number of rows to store per cache file. Defaults to 100_000.
+        compression : str, default="snappy"
+            The compression methods used when writing cache files.
+        metadata_fields : list[tuple[str, str | pa.DataType]], optional
+            Optional metadata field definitions.
+        """
+        path = Path(path)
+        # create cache
+        writer = FileCacheWriter.create(
+            path=generate_cache_path(path),
+            schema=generate_schema(metadata_fields),
+            batch_size=batch_size,
+            rows_per_file=rows_per_file,
+            compression=compression,
+        )
+        # write metadata
+        metadata_path = generate_metadata_path(path)
+        with open(metadata_path, "w") as f:
+            encoded_types = encode_metadata_fields(metadata_fields)
+            json.dump(encoded_types, f, indent=2)
+        return cls(
+            writer=writer,
+            metadata_fields=metadata_fields,
+        )
+    def finalize(
+        self,
+        index_to_label_override: dict[int, str] | None = None,
+    ):
+        """
+        Performs data finalization and some preprocessing steps.
+        Parameters
+        ----------
+        index_to_label_override : dict[int, str], optional
+            Pre-configures label mapping. Used when operating over filtered subsets.
+        Returns
+        -------
+        Evaluator
+            A ready-to-use evaluator object.
+        """
+        self._writer.flush()
+        if self._writer.count_rows() == 0:
+            raise EmptyCacheError()
+        reader = self._writer.to_reader()
+        # extract labels
+        index_to_label = extract_labels(
+            reader=reader,
+            index_to_label_override=index_to_label_override,
+        )
+        return Evaluator(
+            reader=reader,
+            index_to_label=index_to_label,
+            metadata_fields=self._metadata_fields,
+        )
+class Evaluator:
+    def __init__(
+        self,
+        reader: MemoryCacheReader | FileCacheReader,
+        index_to_label: dict[int, str],
+        metadata_fields: list[tuple[str, str | pa.DataType]] | None = None,
+    ):
+        self._reader = reader
+        self._index_to_label = index_to_label
+        self._metadata_fields = metadata_fields
+    @property
+    def info(self) -> EvaluatorInfo:
+        return self.get_info()
+    def get_info(
+        self,
+        datums: pc.Expression | None = None,
+        groundtruths: pc.Expression | None = None,
+        predictions: pc.Expression | None = None,
+    ) -> EvaluatorInfo:
+        info = EvaluatorInfo()
+        info.number_of_rows = self._reader.count_rows()
+        info.number_of_labels = len(self._index_to_label)
+        info.metadata_fields = self._metadata_fields
+        (
+            info.number_of_datums,
+            info.number_of_pixels,
+            info.number_of_groundtruth_pixels,
+            info.number_of_prediction_pixels,
+        ) = extract_counts(
+            reader=self._reader,
+            datums=datums,
+            groundtruths=groundtruths,
+            predictions=predictions,
+        )
+        return info
+    @classmethod
+    def load(
+        cls,
+        path: str | Path,
+        index_to_label_override: dict[int, str] | None = None,
+    ):
+        """
+        Load from an existing semantic segmentation cache.
+        Parameters
+        ----------
+        path : str | Path
+            Path to the existing cache.
+        index_to_label_override : dict[int, str], optional
+            Option to preset index to label dictionary. Used when loading from filtered caches.
+        """
+        # validate path
+        path = Path(path)
+        if not path.exists():
+            raise FileNotFoundError(f"Directory does not exist: {path}")
+        elif not path.is_dir():
+            raise NotADirectoryError(
+                f"Path exists but is not a directory: {path}"
+            )
+        # load cache
+        reader = FileCacheReader.load(generate_cache_path(path))
+        # extract labels
+        index_to_label = extract_labels(
+            reader=reader,
+            index_to_label_override=index_to_label_override,
+        )
+        # read config
+        metadata_path = generate_metadata_path(path)
+        metadata_fields = None
+        with open(metadata_path, "r") as f:
+            metadata_types = json.load(f)
+            metadata_fields = decode_metadata_fields(metadata_types)
+        return cls(
+            reader=reader,
+            index_to_label=index_to_label,
+            metadata_fields=metadata_fields,
+        )
+    def filter(
+        self,
+        datums: pc.Expression | None = None,
+        groundtruths: pc.Expression | None = None,
+        predictions: pc.Expression | None = None,
+        path: str | Path | None = None,
+    ) -> Evaluator:
+        """
+        Filter evaluator cache.
+        Parameters
+        ----------
+        datums : pc.Expression | None = None
+            A filter expression used to filter datums.
+        groundtruths : pc.Expression | None = None
+            A filter expression used to filter ground truth annotations.
+        predictions : pc.Expression | None = None
+            A filter expression used to filter predictions.
+        path : str | Path, optional
+            Where to store the filtered cache if storing on disk.
+        Returns
+        -------
+        Evaluator
+            A new evaluator object containing the filtered cache.
+        """
+        if isinstance(self._reader, FileCacheReader):
+            if not path:
+                raise ValueError(
+                    "expected path to be defined for file-based cache"
+                )
+            builder = Builder.persistent(
+                path=path,
+                batch_size=self._reader.batch_size,
+                rows_per_file=self._reader.rows_per_file,
+                compression=self._reader.compression,
+                metadata_fields=self.info.metadata_fields,
+            )
+        else:
+            builder = Builder.in_memory(
+                batch_size=self._reader.batch_size,
+                metadata_fields=self.info.metadata_fields,
+            )
+        for tbl in self._reader.iterate_tables(filter=datums):
+            columns = (
+                "datum_id",
+                "gt_label_id",
+                "pd_label_id",
+            )
+            pairs = np.column_stack([tbl[col].to_numpy() for col in columns])
+            n_pairs = pairs.shape[0]
+            gt_ids = pairs[:, (0, 1)].astype(np.int64)
+            pd_ids = pairs[:, (0, 2)].astype(np.int64)
+            if groundtruths is not None:
+                mask_valid_gt = np.zeros(n_pairs, dtype=np.bool_)
+                gt_tbl = tbl.filter(groundtruths)
+                gt_pairs = np.column_stack(
+                    [
+                        gt_tbl[col].to_numpy()
+                        for col in ("datum_id", "gt_label_id")
+                    ]
+                ).astype(np.int64)
+                for gt in np.unique(gt_pairs, axis=0):
+                    mask_valid_gt |= (gt_ids == gt).all(axis=1)
+            else:
+                mask_valid_gt = np.ones(n_pairs, dtype=np.bool_)
+            if predictions is not None:
+                mask_valid_pd = np.zeros(n_pairs, dtype=np.bool_)
+                pd_tbl = tbl.filter(predictions)
+                pd_pairs = np.column_stack(
+                    [
+                        pd_tbl[col].to_numpy()
+                        for col in ("datum_id", "pd_label_id")
+                    ]
+                ).astype(np.int64)
+                for pd in np.unique(pd_pairs, axis=0):
+                    mask_valid_pd |= (pd_ids == pd).all(axis=1)
+            else:
+                mask_valid_pd = np.ones(n_pairs, dtype=np.bool_)
+            mask_valid = mask_valid_gt | mask_valid_pd
+            mask_valid_gt &= mask_valid
+            mask_valid_pd &= mask_valid
+            pairs[~mask_valid_gt, 1] = -1
+            pairs[~mask_valid_pd, 2] = -1
+            for idx, col in enumerate(columns):
+                tbl = tbl.set_column(
+                    tbl.schema.names.index(col), col, pa.array(pairs[:, idx])
+                )
+            builder._writer.write_table(tbl)
+        return builder.finalize(index_to_label_override=self._index_to_label)
+    def _compute_confusion_matrix_intermediate(
+        self, datums: pc.Expression | None = None
+    ) -> NDArray[np.uint64]:
+        """
+        Performs an evaluation and returns metrics.
+        Parameters
+        ----------
+        datums : pyarrow.compute.Expression, optional
+            Option to filter datums by an expression.
+        Returns
+        -------
+        dict[MetricType, list]
+            A dictionary mapping MetricType enumerations to lists of computed metrics.
+        """
+        n_labels = len(self._index_to_label)
+        confusion_matrix = np.zeros(
+            (n_labels + 1, n_labels + 1), dtype=np.uint64
+        )
+        for tbl in self._reader.iterate_tables(filter=datums):
+            columns = (
+                "datum_id",
+                "gt_label_id",
+                "pd_label_id",
+            )
+            ids = np.column_stack(
+                [tbl[col].to_numpy() for col in columns]
+            ).astype(np.int64)
+            counts = tbl["count"].to_numpy()
+            mask_null_gts = ids[:, 1] == -1
+            mask_null_pds = ids[:, 2] == -1
+            confusion_matrix[0, 0] += counts[
+                mask_null_gts & mask_null_pds
+            ].sum()
+            for idx in range(n_labels):
+                mask_gts = ids[:, 1] == idx
+                for pidx in range(n_labels):
+                    mask_pds = ids[:, 2] == pidx
+                    confusion_matrix[idx + 1, pidx + 1] += counts[
+                        mask_gts & mask_pds
+                    ].sum()
+                mask_unmatched_gts = mask_gts & mask_null_pds
+                confusion_matrix[idx + 1, 0] += counts[
+                    mask_unmatched_gts
+                ].sum()
+                mask_unmatched_pds = mask_null_gts & (ids[:, 2] == idx)
+                confusion_matrix[0, idx + 1] += counts[
+                    mask_unmatched_pds
+                ].sum()
+        return confusion_matrix
+    def compute_precision_recall_iou(
+        self, datums: pc.Expression | None = None
+    ) -> dict[MetricType, list]:
+        """
+        Performs an evaluation and returns metrics.
+        Parameters
+        ----------
+        datums : pyarrow.compute.Expression, optional
+            Option to filter datums by an expression.
+        Returns
+        -------
+        dict[MetricType, list]
+            A dictionary mapping MetricType enumerations to lists of computed metrics.
+        """
+        confusion_matrix = self._compute_confusion_matrix_intermediate(
+            datums=datums
+        )
+        results = compute_metrics(confusion_matrix=confusion_matrix)
+        return unpack_precision_recall_iou_into_metric_lists(
+            results=results,
+            index_to_label=self._index_to_label,
+        )