PyPI - valor-lite - Versions diffs - 0.33.18__tar.gz → 0.34.0__tar.gz - Mend

valor-lite 0.33.18tar.gz → 0.34.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of valor-lite might be problematic. Click here for more details.

Files changed (126) hide show

{valor_lite-0.33.18/valor_lite.egg-info → valor_lite-0.34.0}/PKG-INFO RENAMED Viewed

@@ -1,42 +1,16 @@
 Metadata-Version: 2.1
 Name: valor-lite
-Version: 0.33.18
-Summary: Compute valor metrics locally.
-License: MIT License
-        Copyright (c) 2023 Striveworks
-        Permission is hereby granted, free of charge, to any person obtaining a copy
-        of this software and associated documentation files (the "Software"), to deal
-        in the Software without restriction, including without limitation the rights
-        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-        copies of the Software, and to permit persons to whom the Software is
-        furnished to do so, subject to the following conditions:
-        The above copyright notice and this permission notice shall be included in all
-        copies or substantial portions of the Software.
-        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-        SOFTWARE.
+Version: 0.34.0
+Summary: Evaluate machine learning models.
 Project-URL: homepage, https://www.striveworks.com
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
-License-File: LICENSE
+Requires-Dist: numpy
+Requires-Dist: tqdm
+Requires-Dist: shapely
 Requires-Dist: evaluate
-Requires-Dist: importlib_metadata; python_version < "3.8"
 Requires-Dist: nltk
-Requires-Dist: numpy
-Requires-Dist: Pillow>=9.1.0
-Requires-Dist: requests
 Requires-Dist: rouge_score
-Requires-Dist: shapely
-Requires-Dist: tqdm
 Provides-Extra: mistral
 Requires-Dist: mistralai>=1.0; extra == "mistral"
 Provides-Extra: openai
@@ -45,6 +19,15 @@ Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: coverage; extra == "test"
 Requires-Dist: pre-commit; extra == "test"
+Provides-Extra: docs
+Requires-Dist: mkdocs; extra == "docs"
+Requires-Dist: mkdocs-material; extra == "docs"
+Requires-Dist: mkdocstrings; extra == "docs"
+Requires-Dist: mkdocstrings-python; extra == "docs"
+Requires-Dist: mkdocs-include-dir-to-nav; extra == "docs"
+Requires-Dist: mkdocs-swagger-ui-tag; extra == "docs"
+Provides-Extra: dev
+Requires-Dist: valor-lite[docs,mistral,openai,test]; extra == "dev"
 # valor-lite: Fast, local machine learning evaluation.

{valor_lite-0.33.18 → valor_lite-0.34.0}/pyproject.toml RENAMED Viewed

@@ -1,20 +1,17 @@
 [project]
 name = "valor-lite"
 dynamic = ["version"]
-description = "Compute valor metrics locally."
+description = "Evaluate machine learning models."
 readme = "README.md"
 requires-python = ">=3.10"
 license = { file = "LICENSE" }
 dependencies = [
+    "numpy",
+    "tqdm",
+    "shapely",
     "evaluate",
-    "importlib_metadata; python_version < '3.8'",
     "nltk",
-    "numpy",
-    "Pillow >= 9.1.0",
-    "requests",
     "rouge_score",
-    "shapely",
-    "tqdm",
 ]
 [project.urls]
@@ -28,6 +25,15 @@ build-backend = "setuptools.build_meta"
 mistral = ["mistralai >= 1.0"]
 openai = ["openai"]
 test = ["pytest", "coverage", "pre-commit"]
+docs = [
+    "mkdocs",
+    "mkdocs-material",
+    "mkdocstrings",
+    "mkdocstrings-python",
+    "mkdocs-include-dir-to-nav",
+    "mkdocs-swagger-ui-tag",
+]
+dev = ["valor-lite[openai, mistral, test, docs]"]
 [tool.black]
 line-length = 79

{valor_lite-0.33.18 → valor_lite-0.34.0}/valor_lite/classification/computation.py RENAMED Viewed

@@ -212,7 +212,7 @@ def _count_with_examples(
     data: NDArray[np.float64],
     unique_idx: int | list[int],
     label_idx: int | list[int],
-) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.int32]]:
+) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.intp]]:
     """
     Helper function for counting occurences of unique detailed pairs.
@@ -231,7 +231,7 @@ def _count_with_examples(
         Examples drawn from the data input.
     NDArray[np.int32]
         Unique label indices.
-    NDArray[np.int32]
+    NDArray[np.intp]
         Counts for each unique label index.
     """
     unique_rows, indices = np.unique(
@@ -282,18 +282,20 @@ def compute_confusion_matrix(
     NDArray[np.float64]
         Confusion matrix.
     NDArray[np.int32]
-        Ground truths with missing predictions.
+        Unmatched Ground Truths.
     """
     n_labels = label_metadata.shape[0]
     n_scores = score_thresholds.shape[0]
-    confusion_matrix = -1 * np.ones(
+    confusion_matrix = np.full(
         (n_scores, n_labels, n_labels, 2 * n_examples + 1),
+        fill_value=-1.0,
         dtype=np.float32,
     )
-    missing_predictions = -1 * np.ones(
+    unmatched_ground_truths = np.full(
         (n_scores, n_labels, n_examples + 1),
+        fill_value=-1,
         dtype=np.int32,
     )
@@ -339,7 +341,7 @@ def compute_confusion_matrix(
             score_idx, misclf_labels[:, 0], misclf_labels[:, 1], 0
         ] = misclf_counts
-        missing_predictions[score_idx, misprd_labels, 0] = misprd_counts
+        unmatched_ground_truths[score_idx, misprd_labels, 0] = misprd_counts
         if n_examples > 0:
             for label_idx in range(n_labels):
@@ -375,16 +377,16 @@ def compute_confusion_matrix(
                                 1 : 2 * misclf_label_examples.shape[0] + 1,
                             ] = misclf_label_examples[:, [0, 3]].flatten()
-                # missing prediction examples
+                # unmatched ground truth examples
                 mask_misprd_label = misprd_examples[:, 1] == label_idx
                 if misprd_examples.size > 0:
                     misprd_label_examples = misprd_examples[mask_misprd_label][
                         :n_examples
                     ]
-                    missing_predictions[
+                    unmatched_ground_truths[
                         score_idx,
                         label_idx,
                         1 : misprd_label_examples.shape[0] + 1,
                     ] = misprd_label_examples[:, 0].flatten()
-    return confusion_matrix, missing_predictions
+    return confusion_matrix, unmatched_ground_truths  # type: ignore[reportReturnType]

{valor_lite-0.33.18 → valor_lite-0.34.0}/valor_lite/classification/manager.py RENAMED Viewed

@@ -4,6 +4,7 @@ from dataclasses import dataclass
 import numpy as np
 from numpy.typing import NDArray
 from tqdm import tqdm
 from valor_lite.classification.annotation import Classification
 from valor_lite.classification.computation import (
     compute_confusion_matrix,
@@ -38,7 +39,7 @@ filtered_metrics = evaluator.evaluate(filter_mask=filter_mask)
 @dataclass
 class Filter:
-    indices: NDArray[np.int32]
+    indices: NDArray[np.intp]
     label_metadata: NDArray[np.int32]
     n_datums: int
@@ -169,8 +170,7 @@ class Evaluator:
         label_metadata_per_datum = self._label_metadata_per_datum.copy()
         label_metadata_per_datum[:, ~mask] = 0
-        label_metadata = np.zeros_like(self._label_metadata, dtype=np.int32)
-        label_metadata = np.transpose(
+        label_metadata: NDArray[np.int32] = np.transpose(
             np.sum(
                 label_metadata_per_datum,
                 axis=1,

{valor_lite-0.33.18 → valor_lite-0.34.0}/valor_lite/classification/metric.py RENAMED Viewed

@@ -321,7 +321,7 @@ class Metric(BaseMetric):
                 ],
             ],
         ],
-        missing_predictions: dict[
+        unmatched_ground_truths: dict[
             str,  # ground truth label value
             dict[
                 str,  # either `count` or `examples`
@@ -335,8 +335,8 @@ class Metric(BaseMetric):
         The confusion matrix and related metrics for the classification task.
         This class encapsulates detailed information about the model's performance, including correct
-        predictions, misclassifications, hallucinations (false positives), and missing predictions
-        (false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
+        predictions, misclassifications, unmatched predictions (subset of false positives), and unmatched ground truths
+        (subset of false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
         Confusion Matrix Structure:
         {
@@ -358,7 +358,7 @@ class Metric(BaseMetric):
             ...
         }
-        Missing Prediction Structure:
+        Unmatched Ground Truths Structure:
         {
             ground_truth_label: {
                 'count': int,
@@ -379,7 +379,7 @@ class Metric(BaseMetric):
             A nested dictionary where the first key is the ground truth label value, the second key
             is the prediction label value, and the innermost dictionary contains either a `count`
             or a list of `examples`. Each example includes the datum UID and prediction score.
-        missing_predictions : dict
+        unmatched_ground_truths : dict
             A dictionary where each key is a ground truth label value for which the model failed to predict
             (false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
             Each example includes the datum UID.
@@ -396,7 +396,7 @@ class Metric(BaseMetric):
             type=MetricType.ConfusionMatrix.value,
             value={
                 "confusion_matrix": confusion_matrix,
-                "missing_predictions": missing_predictions,
+                "unmatched_ground_truths": unmatched_ground_truths,
             },
             parameters={
                 "score_threshold": score_threshold,

{valor_lite-0.33.18 → valor_lite-0.34.0}/valor_lite/classification/utilities.py RENAMED Viewed

@@ -2,6 +2,7 @@ from collections import defaultdict
 import numpy as np
 from numpy.typing import NDArray
 from valor_lite.classification.metric import Metric, MetricType
@@ -153,20 +154,20 @@ def _unpack_confusion_matrix_value(
     }
-def _unpack_missing_predictions_value(
-    missing_predictions: NDArray[np.int32],
+def _unpack_unmatched_ground_truths_value(
+    unmatched_ground_truths: NDArray[np.int32],
     number_of_labels: int,
     number_of_examples: int,
     index_to_uid: dict[int, str],
     index_to_label: dict[int, str],
 ) -> dict[str, dict[str, int | list[dict[str, str]]]]:
     """
-    Unpacks a numpy array of missing prediction counts and examples.
+    Unpacks a numpy array of unmatched ground truth counts and examples.
     """
     datum_idx = (
         lambda gt_label_idx, example_idx: int(  # noqa: E731 - lambda fn
-            missing_predictions[
+            unmatched_ground_truths[
                 gt_label_idx,
                 example_idx + 1,
             ]
@@ -176,7 +177,7 @@ def _unpack_missing_predictions_value(
     return {
         index_to_label[gt_label_idx]: {
             "count": max(
-                int(missing_predictions[gt_label_idx, 0]),
+                int(unmatched_ground_truths[gt_label_idx, 0]),
                 0,
             ),
             "examples": [
@@ -197,7 +198,7 @@ def unpack_confusion_matrix_into_metric_list(
     index_to_label: dict[int, str],
 ) -> list[Metric]:
-    (confusion_matrix, missing_predictions) = results
+    (confusion_matrix, unmatched_ground_truths) = results
     n_scores, n_labels, _, _ = confusion_matrix.shape
     return [
         Metric.confusion_matrix(
@@ -210,8 +211,10 @@ def unpack_confusion_matrix_into_metric_list(
                 index_to_label=index_to_label,
                 index_to_uid=index_to_uid,
             ),
-            missing_predictions=_unpack_missing_predictions_value(
-                missing_predictions=missing_predictions[score_idx, :, :],
+            unmatched_ground_truths=_unpack_unmatched_ground_truths_value(
+                unmatched_ground_truths=unmatched_ground_truths[
+                    score_idx, :, :
+                ],
                 number_of_labels=n_labels,
                 number_of_examples=number_of_examples,
                 index_to_label=index_to_label,

{valor_lite-0.33.18 → valor_lite-0.34.0}/valor_lite/object_detection/computation.py RENAMED Viewed

@@ -381,9 +381,9 @@ def compute_precion_recall(
             _, indices_gt_unique = np.unique(
                 tp_candidates[:, [0, 1, 4]], axis=0, return_index=True
             )
-            mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=bool)
+            mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=np.bool_)
             mask_gt_unique[indices_gt_unique] = True
-            true_positives_mask = np.zeros(n_rows, dtype=bool)
+            true_positives_mask = np.zeros(n_rows, dtype=np.bool_)
             true_positives_mask[mask_tp_inner] = mask_gt_unique
             # calculate intermediates
@@ -452,9 +452,9 @@ def compute_precion_recall(
         _, indices_gt_unique = np.unique(
             tp_candidates[:, [0, 1, 4]], axis=0, return_index=True
         )
-        mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=bool)
+        mask_gt_unique = np.zeros(tp_candidates.shape[0], dtype=np.bool_)
         mask_gt_unique[indices_gt_unique] = True
-        true_positives_mask = np.zeros(n_rows, dtype=bool)
+        true_positives_mask = np.zeros(n_rows, dtype=np.bool_)
         true_positives_mask[mask_tp_outer] = mask_gt_unique
         # count running tp and total for AP
@@ -501,8 +501,8 @@ def compute_precion_recall(
         )
     # calculate average precision
-    running_max_precision = np.zeros((n_ious, n_labels))
-    running_max_score = np.zeros((n_labels))
+    running_max_precision = np.zeros((n_ious, n_labels), dtype=np.float64)
+    running_max_score = np.zeros((n_labels), dtype=np.float64)
     for recall in range(100, -1, -1):
         # running max precision
@@ -528,8 +528,12 @@ def compute_precion_recall(
     # calculate mAP and mAR
     if unique_pd_labels.size > 0:
-        mAP = average_precision[:, unique_pd_labels].mean(axis=1)
-        mAR = average_recall[:, unique_pd_labels].mean(axis=1)
+        mAP: NDArray[np.float64] = average_precision[:, unique_pd_labels].mean(
+            axis=1
+        )
+        mAR: NDArray[np.float64] = average_recall[:, unique_pd_labels].mean(
+            axis=1
+        )
     else:
         mAP = np.zeros(n_ious, dtype=np.float64)
         mAR = np.zeros(n_scores, dtype=np.float64)
@@ -561,14 +565,14 @@ def compute_precion_recall(
         accuracy,
         counts,
         pr_curve,
-    )
+    )  # type: ignore[reportReturnType]
 def _count_with_examples(
     data: NDArray[np.float64],
     unique_idx: int | list[int],
     label_idx: int | list[int],
-) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.int32]]:
+) -> tuple[NDArray[np.float64], NDArray[np.int32], NDArray[np.intp]]:
     """
     Helper function for counting occurences of unique detailed pairs.
@@ -587,7 +591,7 @@ def _count_with_examples(
         Examples drawn from the data input.
     NDArray[np.int32]
         Unique label indices.
-    NDArray[np.int32]
+    NDArray[np.intp]
         Counts for each unique label index.
     """
     unique_rows, indices = np.unique(
@@ -669,9 +673,9 @@ def compute_confusion_matrix(
     NDArray[np.float64]
         Confusion matrix.
     NDArray[np.float64]
-        Hallucinations.
+        Unmatched Predictions.
     NDArray[np.int32]
-        Missing Predictions.
+        Unmatched Ground Truths.
     """
     n_labels = label_metadata.shape[0]
@@ -683,12 +687,12 @@ def compute_confusion_matrix(
         (n_ious, n_scores, n_labels, n_labels, 4 * n_examples + 1),
         dtype=np.float32,
     )
-    hallucinations = -1 * np.ones(
+    unmatched_predictions = -1 * np.ones(
         # (datum idx, pd idx, pd score) * n_examples + count
         (n_ious, n_scores, n_labels, 3 * n_examples + 1),
         dtype=np.float32,
     )
-    missing_predictions = -1 * np.ones(
+    unmatched_ground_truths = -1 * np.ones(
         # (datum idx, gt idx) * n_examples + count
         (n_ious, n_scores, n_labels, 2 * n_examples + 1),
         dtype=np.int32,
@@ -793,7 +797,7 @@ def compute_confusion_matrix(
                 data[mask_misclf], unique_idx=[0, 1, 2, 4, 5], label_idx=[3, 4]
             )
-            # count hallucinations
+            # count unmatched predictions
             (
                 halluc_examples,
                 halluc_labels,
@@ -802,7 +806,7 @@ def compute_confusion_matrix(
                 data[mask_halluc], unique_idx=[0, 2, 5], label_idx=2
             )
-            # count missing predictions
+            # count unmatched ground truths
             (
                 misprd_examples,
                 misprd_labels,
@@ -822,13 +826,13 @@ def compute_confusion_matrix(
                 misclf_labels[:, 1],
                 0,
             ] = misclf_counts
-            hallucinations[
+            unmatched_predictions[
                 iou_idx,
                 score_idx,
                 halluc_labels,
                 0,
             ] = halluc_counts
-            missing_predictions[
+            unmatched_ground_truths[
                 iou_idx,
                 score_idx,
                 misprd_labels,
@@ -877,26 +881,26 @@ def compute_confusion_matrix(
                                     :, [0, 1, 2, 6]
                                 ].flatten()
-                    # hallucination examples
+                    # unmatched prediction examples
                     mask_halluc_label = halluc_examples[:, 5] == label_idx
                     if mask_halluc_label.sum() > 0:
                         halluc_label_examples = halluc_examples[
                             mask_halluc_label
                         ][:n_examples]
-                        hallucinations[
+                        unmatched_predictions[
                             iou_idx,
                             score_idx,
                             label_idx,
                             1 : 3 * halluc_label_examples.shape[0] + 1,
                         ] = halluc_label_examples[:, [0, 2, 6]].flatten()
-                    # missing prediction examples
+                    # unmatched ground truth examples
                     mask_misprd_label = misprd_examples[:, 4] == label_idx
                     if misprd_examples.size > 0:
                         misprd_label_examples = misprd_examples[
                             mask_misprd_label
                         ][:n_examples]
-                        missing_predictions[
+                        unmatched_ground_truths[
                             iou_idx,
                             score_idx,
                             label_idx,
@@ -905,6 +909,6 @@ def compute_confusion_matrix(
     return (
         confusion_matrix,
-        hallucinations,
-        missing_predictions,
-    )
+        unmatched_predictions,
+        unmatched_ground_truths,
+    )  # type: ignore[reportReturnType]

{valor_lite-0.33.18 → valor_lite-0.34.0}/valor_lite/object_detection/manager.py RENAMED Viewed

@@ -4,6 +4,7 @@ from dataclasses import dataclass
 import numpy as np
 from numpy.typing import NDArray
 from tqdm import tqdm
 from valor_lite.object_detection.annotation import Detection
 from valor_lite.object_detection.computation import (
     compute_bbox_iou,
@@ -42,8 +43,8 @@ filtered_metrics = evaluator.evaluate(iou_thresholds=[0.5], filter_mask=filter_m
 @dataclass
 class Filter:
-    ranked_indices: NDArray[np.int32]
-    detailed_indices: NDArray[np.int32]
+    ranked_indices: NDArray[np.intp]
+    detailed_indices: NDArray[np.intp]
     label_metadata: NDArray[np.int32]
@@ -569,7 +570,8 @@ class DataLoader:
                         [gt.extrema, pd.extrema]
                         for pd in detection.predictions
                         for gt in detection.groundtruths
-                    ]
+                    ],
+                    dtype=np.float64,
                 )
             ).reshape(len(detection.predictions), len(detection.groundtruths))
             for detection in detections

{valor_lite-0.33.18 → valor_lite-0.34.0}/valor_lite/object_detection/metric.py RENAMED Viewed

@@ -619,7 +619,7 @@ class Metric(BaseMetric):
                 ],
             ],
         ],
-        hallucinations: dict[
+        unmatched_predictions: dict[
             str,  # prediction label value
             dict[
                 str,  # either `count` or `examples`
@@ -636,7 +636,7 @@ class Metric(BaseMetric):
                 ],
             ],
         ],
-        missing_predictions: dict[
+        unmatched_ground_truths: dict[
             str,  # ground truth label value
             dict[
                 str,  # either `count` or `examples`
@@ -660,8 +660,8 @@ class Metric(BaseMetric):
         Confusion matrix for object detection tasks.
         This class encapsulates detailed information about the model's performance, including correct
-        predictions, misclassifications, hallucinations (false positives), and missing predictions
-        (false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
+        predictions, misclassifications, unmatched_predictions (subset of false positives), and unmatched ground truths
+        (subset of false negatives). It provides counts and examples for each category to facilitate in-depth analysis.
         Confusion Matrix Format:
         {
@@ -683,7 +683,7 @@ class Metric(BaseMetric):
             ...
         }
-        Hallucinations Format:
+        Unmatched Predictions Format:
         {
             <prediction label>: {
                 'count': int,
@@ -699,7 +699,7 @@ class Metric(BaseMetric):
             ...
         }
-        Missing Prediction Format:
+        Unmatched Ground Truths Format:
         {
             <ground truth label>: {
                 'count': int,
@@ -721,13 +721,13 @@ class Metric(BaseMetric):
             is the prediction label value, and the innermost dictionary contains either a `count`
             or a list of `examples`. Each example includes the datum UID, ground truth bounding box,
             predicted bounding box, and prediction scores.
-        hallucinations : dict
+        unmatched_predictions : dict
             A dictionary where each key is a prediction label value with no corresponding ground truth
-            (false positives). The value is a dictionary containing either a `count` or a list of
+            (subset of false positives). The value is a dictionary containing either a `count` or a list of
             `examples`. Each example includes the datum UID, predicted bounding box, and prediction score.
-        missing_predictions : dict
+        unmatched_ground_truths : dict
             A dictionary where each key is a ground truth label value for which the model failed to predict
-            (false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
+            (subset of false negatives). The value is a dictionary containing either a `count` or a list of `examples`.
             Each example includes the datum UID and ground truth bounding box.
         score_threshold : float
             The confidence score threshold used to filter predictions.
@@ -744,8 +744,8 @@ class Metric(BaseMetric):
             type=MetricType.ConfusionMatrix.value,
             value={
                 "confusion_matrix": confusion_matrix,
-                "hallucinations": hallucinations,
-                "missing_predictions": missing_predictions,
+                "unmatched_predictions": unmatched_predictions,
+                "unmatched_ground_truths": unmatched_ground_truths,
             },
             parameters={
                 "score_threshold": score_threshold,

valor-lite 0.33.18__tar.gz → 0.34.0__tar.gz

Potentially problematic release.

valor-lite 0.33.18tar.gz → 0.34.0tar.gz