PyPI - valor-lite - Versions diffs - 0.36.6__py3-none-any.whl → 0.37.5__py3-none-any.whl - Mend

valor-lite 0.36.6py3-none-any.whl → 0.37.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

valor_lite/cache/__init__.py +11 -0
valor_lite/cache/compute.py +211 -0
valor_lite/cache/ephemeral.py +302 -0
valor_lite/cache/persistent.py +536 -0
valor_lite/classification/__init__.py +5 -10
valor_lite/classification/annotation.py +4 -0
valor_lite/classification/computation.py +233 -251
valor_lite/classification/evaluator.py +882 -0
valor_lite/classification/loader.py +97 -0
valor_lite/classification/metric.py +141 -4
valor_lite/classification/shared.py +184 -0
valor_lite/classification/utilities.py +221 -118
valor_lite/exceptions.py +5 -0
valor_lite/object_detection/__init__.py +5 -4
valor_lite/object_detection/annotation.py +13 -1
valor_lite/object_detection/computation.py +368 -299
valor_lite/object_detection/evaluator.py +804 -0
valor_lite/object_detection/loader.py +292 -0
valor_lite/object_detection/metric.py +152 -3
valor_lite/object_detection/shared.py +206 -0
valor_lite/object_detection/utilities.py +182 -100
valor_lite/semantic_segmentation/__init__.py +5 -4
valor_lite/semantic_segmentation/annotation.py +7 -0
valor_lite/semantic_segmentation/computation.py +20 -110
valor_lite/semantic_segmentation/evaluator.py +414 -0
valor_lite/semantic_segmentation/loader.py +205 -0
valor_lite/semantic_segmentation/shared.py +149 -0
valor_lite/semantic_segmentation/utilities.py +6 -23
{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/METADATA +3 -1
valor_lite-0.37.5.dist-info/RECORD +49 -0
{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/WHEEL +1 -1
valor_lite/classification/manager.py +0 -545
valor_lite/object_detection/manager.py +0 -864
valor_lite/profiling.py +0 -374
valor_lite/semantic_segmentation/benchmark.py +0 -237
valor_lite/semantic_segmentation/manager.py +0 -446
valor_lite-0.36.6.dist-info/RECORD +0 -41
{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/top_level.txt +0 -0

valor_lite/semantic_segmentation/shared.py ADDED Viewed

@@ -0,0 +1,149 @@
+from dataclasses import dataclass
+from pathlib import Path
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+from valor_lite.cache import FileCacheReader, MemoryCacheReader
+@dataclass
+class EvaluatorInfo:
+    number_of_rows: int = 0
+    number_of_datums: int = 0
+    number_of_labels: int = 0
+    number_of_pixels: int = 0
+    number_of_groundtruth_pixels: int = 0
+    number_of_prediction_pixels: int = 0
+    metadata_fields: list[tuple[str, str | pa.DataType]] | None = None
+def generate_cache_path(path: str | Path) -> Path:
+    """Generate cache path from parent directory."""
+    return Path(path) / "counts"
+def generate_metadata_path(path: str | Path) -> Path:
+    """Generate metadata path from parent directory."""
+    return Path(path) / "metadata.json"
+def generate_schema(
+    metadata_fields: list[tuple[str, str | pa.DataType]] | None
+) -> pa.Schema:
+    """Generate PyArrow schema from metadata fields."""
+    metadata_fields = metadata_fields if metadata_fields else []
+    reserved_fields = [
+        ("datum_uid", pa.string()),
+        ("datum_id", pa.int64()),
+        # groundtruth
+        ("gt_label", pa.string()),
+        ("gt_label_id", pa.int64()),
+        # prediction
+        ("pd_label", pa.string()),
+        ("pd_label_id", pa.int64()),
+        # pair
+        ("count", pa.uint64()),
+    ]
+    # validate
+    reserved_field_names = {f[0] for f in reserved_fields}
+    metadata_field_names = {f[0] for f in metadata_fields}
+    if conflicting := reserved_field_names & metadata_field_names:
+        raise ValueError(
+            f"metadata fields {conflicting} conflict with reserved fields"
+        )
+    return pa.schema(
+        [
+            *reserved_fields,
+            *metadata_fields,
+        ]
+    )
+def encode_metadata_fields(
+    metadata_fields: list[tuple[str, str | pa.DataType]] | None
+) -> dict[str, str]:
+    """Encode metadata fields into JSON format."""
+    metadata_fields = metadata_fields if metadata_fields else []
+    return {k: str(v) for k, v in metadata_fields}
+def decode_metadata_fields(
+    encoded_metadata_fields: dict[str, str]
+) -> list[tuple[str, str | pa.DataType]]:
+    """Decode metadata fields from JSON format."""
+    return [(k, v) for k, v in encoded_metadata_fields.items()]
+def extract_labels(
+    reader: MemoryCacheReader | FileCacheReader,
+    index_to_label_override: dict[int, str] | None = None,
+) -> dict[int, str]:
+    if index_to_label_override is not None:
+        return index_to_label_override
+    index_to_label = {}
+    for tbl in reader.iterate_tables(
+        columns=[
+            "gt_label_id",
+            "gt_label",
+            "pd_label_id",
+            "pd_label",
+        ]
+    ):
+        # get gt labels
+        gt_label_ids = tbl["gt_label_id"].to_numpy()
+        gt_label_ids, gt_indices = np.unique(gt_label_ids, return_index=True)
+        gt_labels = tbl["gt_label"].take(gt_indices).to_pylist()
+        gt_labels = dict(zip(gt_label_ids.astype(int).tolist(), gt_labels))
+        gt_labels.pop(-1, None)
+        index_to_label.update(gt_labels)
+        # get pd labels
+        pd_label_ids = tbl["pd_label_id"].to_numpy()
+        pd_label_ids, pd_indices = np.unique(pd_label_ids, return_index=True)
+        pd_labels = tbl["pd_label"].take(pd_indices).to_pylist()
+        pd_labels = dict(zip(pd_label_ids.astype(int).tolist(), pd_labels))
+        pd_labels.pop(-1, None)
+        index_to_label.update(pd_labels)
+    return index_to_label
+def extract_counts(
+    reader: MemoryCacheReader | FileCacheReader,
+    datums: pc.Expression | None = None,
+    groundtruths: pc.Expression | None = None,
+    predictions: pc.Expression | None = None,
+):
+    n_dts, n_total, n_gts, n_pds = 0, 0, 0, 0
+    for tbl in reader.iterate_tables(filter=datums):
+        # count datums
+        n_dts += int(np.unique(tbl["datum_id"].to_numpy()).shape[0])
+        # count pixels
+        n_total += int(tbl["count"].to_numpy().sum())
+        # count groundtruth pixels
+        gt_tbl = tbl
+        gt_expr = pc.field("gt_label_id") >= 0
+        if groundtruths is not None:
+            gt_expr &= groundtruths
+        gt_tbl = tbl.filter(gt_expr)
+        n_gts += int(gt_tbl["count"].to_numpy().sum())
+        # count prediction pixels
+        pd_tbl = tbl
+        pd_expr = pc.field("pd_label_id") >= 0
+        if predictions is not None:
+            pd_expr &= predictions
+        pd_tbl = tbl.filter(pd_expr)
+        n_pds += int(pd_tbl["count"].to_numpy().sum())
+    return n_dts, n_total, n_gts, n_pds

valor_lite/semantic_segmentation/utilities.py CHANGED Viewed

@@ -1,15 +1,11 @@
 from collections import defaultdict
-import numpy as np
-from numpy.typing import NDArray
 from valor_lite.semantic_segmentation.metric import Metric, MetricType
 def unpack_precision_recall_iou_into_metric_lists(
     results: tuple,
-    label_metadata: NDArray[np.int64],
-    index_to_label: list[str],
+    index_to_label: dict[int, str],
 ) -> dict[MetricType, list[Metric]]:
     n_labels = len(index_to_label)
@@ -39,24 +35,20 @@ def unpack_precision_recall_iou_into_metric_lists(
                         "iou": float(ious[gt_label_idx, pd_label_idx])
                     }
                     for pd_label_idx in range(n_labels)
-                    if label_metadata[pd_label_idx, 0] > 0
                 }
                 for gt_label_idx in range(n_labels)
-                if label_metadata[gt_label_idx, 0] > 0
             },
             unmatched_predictions={
                 index_to_label[pd_label_idx]: {
                     "ratio": float(unmatched_prediction_ratios[pd_label_idx])
                 }
                 for pd_label_idx in range(n_labels)
-                if label_metadata[pd_label_idx, 0] > 0
             },
             unmatched_ground_truths={
                 index_to_label[gt_label_idx]: {
                     "ratio": float(unmatched_ground_truth_ratios[gt_label_idx])
                 }
                 for gt_label_idx in range(n_labels)
-                if label_metadata[gt_label_idx, 0] > 0
             },
         )
     ]
@@ -67,38 +59,29 @@ def unpack_precision_recall_iou_into_metric_lists(
         )
     ]
-    for label_idx, label in enumerate(index_to_label):
-        kwargs = {
-            "label": label,
-        }
-        # if no groundtruths exists for a label, skip it.
-        if label_metadata[label_idx, 0] == 0:
-            continue
+    for label_idx, label in index_to_label.items():
         metrics[MetricType.Precision].append(
             Metric.precision(
                 value=float(precision[label_idx]),
-                **kwargs,
+                label=label,
             )
         )
         metrics[MetricType.Recall].append(
             Metric.recall(
                 value=float(recall[label_idx]),
-                **kwargs,
+                label=label,
             )
         )
         metrics[MetricType.F1].append(
             Metric.f1_score(
                 value=float(f1_score[label_idx]),
-                **kwargs,
+                label=label,
             )
         )
         metrics[MetricType.IOU].append(
             Metric.iou(
                 value=float(ious[label_idx, label_idx]),
-                **kwargs,
+                label=label,
             )
         )

{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: valor-lite
-Version: 0.36.6
+Version: 0.37.5
 Summary: Evaluate machine learning models.
 Project-URL: homepage, https://www.striveworks.com
 Requires-Python: >=3.10
@@ -8,6 +8,7 @@ Description-Content-Type: text/markdown
 Requires-Dist: numpy
 Requires-Dist: tqdm
 Requires-Dist: shapely
+Requires-Dist: pyarrow
 Provides-Extra: nlp
 Requires-Dist: evaluate; extra == "nlp"
 Requires-Dist: nltk; extra == "nlp"
@@ -31,6 +32,7 @@ Provides-Extra: benchmark
 Requires-Dist: requests; extra == "benchmark"
 Provides-Extra: dev
 Requires-Dist: valor-lite[benchmark,docs,mistral,nlp,openai,test]; extra == "dev"
+Requires-Dist: pyarrow-stubs; extra == "dev"
 # valor-lite: Fast, local machine learning evaluation.

valor_lite-0.37.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,49 @@
+valor_lite/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
+valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+valor_lite/exceptions.py,sha256=Odg4m7VfTYtGunzA-JpNWGoDwvXFvCamkQu8WLncE-A,496
+valor_lite/schemas.py,sha256=pB0MrPx5qFLbwBWDiOUUm-vmXdWvbJLFCBmKgbcbI5g,198
+valor_lite/cache/__init__.py,sha256=OzVJq6WMXOmhJPQLOVu9y0xEui41PDpwGnkrU6A3-dA,266
+valor_lite/cache/compute.py,sha256=1OP7h_00asTydA3ZSPZeMgqiwlvxu6RfzsOyvqDAwEg,6906
+valor_lite/cache/ephemeral.py,sha256=BPUtVVE3RuU0_gB87CPZUBTbnUbcdz4MMsUhs_YLzDE,8164
+valor_lite/cache/persistent.py,sha256=3XkMvGtUdU5ZK1Q4p2ipU97p-MYd3KS9ZTDfV46SxG0,15319
+valor_lite/classification/__init__.py,sha256=5cAK1exDJGGhZWFcswj977-qVtEafUTsNIg7stJV2oc,293
+valor_lite/classification/annotation.py,sha256=93NzpfpBhkyLq33gLtAqkCl0TTmraDGy03cSjAxMWec,1304
+valor_lite/classification/computation.py,sha256=x2IINIDHP_52GTCdkTgu8ujx3o3HGosdWE0_oQvNZuU,11498
+valor_lite/classification/evaluator.py,sha256=h0B3NTR746mVIraICPLDw_xls1RBAeFyc39cBm0vcpw,29150
+valor_lite/classification/loader.py,sha256=t976u0XGBuQGrG6VgxPViu-Ipbj5sUm_ShW8FV0YGxg,3355
+valor_lite/classification/metric.py,sha256=4ZoP9f36DqKnDOif40kjwznJc0fw93F1yx3gvX_lnz8,16104
+valor_lite/classification/numpy_compatibility.py,sha256=roqtTetsm1_HxuaejrthQdydjsRIy-FpXpGb86cLh_E,365
+valor_lite/classification/shared.py,sha256=J3lrcJQrSSEXc1qv7s9CIU3UF_XEZxd6flki1I0nhfA,5653
+valor_lite/classification/utilities.py,sha256=NZP830x0noKSZCC6-Hc_K34JzxLsHgU8Zcq0YjTlH90,10218
+valor_lite/object_detection/__init__.py,sha256=GARztCi3rYlq3gltJ0lDBQK0bVYr5Upwc_M3_Cl_RMg,363
+valor_lite/object_detection/annotation.py,sha256=--uvHOS7vPIz3cqArp8dZM4Ax9_lzW0HCaz626ZULDM,7684
+valor_lite/object_detection/computation.py,sha256=8A9p4uG2lf0S7SIR3paF8jBHJJ6f7z_CQ0ZmOioT1nM,26528
+valor_lite/object_detection/evaluator.py,sha256=LIL2a5jyXvCx7BuPmBEz1B63PPanIJeUcSp3QJTmaCE,27821
+valor_lite/object_detection/loader.py,sha256=kPGG8UXCBDZl10qfDv3YqailgdHk3O4DyZQ8pqkVhJc,11428
+valor_lite/object_detection/metric.py,sha256=-bcupJvS_BSWNPN2trkO8EWD-z7TDFGGim7fIHpzPzw,27924
+valor_lite/object_detection/shared.py,sha256=X1lE3GxOzmS_3PZt2L9Su1Z-jSvz7cQ0IInGFSxu_fY,6292
+valor_lite/object_detection/utilities.py,sha256=zoez6MaBx0IwxJf-zDQMkCh80lQf0zB4Fl1xluFTtnY,14526
+valor_lite/semantic_segmentation/__init__.py,sha256=OeAKuANM2mvw3JX4pi-eudc82YMqsXJwK1DIjgl2oeI,318
+valor_lite/semantic_segmentation/annotation.py,sha256=XB54BcBu_soQvbP3DrbXCruw-sypJBC6KhLqRWX1Vmw,4384
+valor_lite/semantic_segmentation/computation.py,sha256=dhbwybDe5kQGXzUSOjm15UGjDoGGt5zPtvPyvp8kFc4,4690
+valor_lite/semantic_segmentation/evaluator.py,sha256=jXFBPv_pmIOj85wJFFikbNNWDq8wPG2rcLiaWrgDcUs,13569
+valor_lite/semantic_segmentation/loader.py,sha256=rDtbeGhQEDhGjcjx9RHiSATQM25uftF13c1pm09axUM,7582
+valor_lite/semantic_segmentation/metric.py,sha256=T9RfPJf4WgqGQTXYvSy08vJG5bjXXJnyYZeW0mlxMa8,7132
+valor_lite/semantic_segmentation/shared.py,sha256=i9F7nAoH9Yhabqj-SVtnQfm2ST0xkVFUcQVBVLabPeE,4539
+valor_lite/semantic_segmentation/utilities.py,sha256=MjqSlS1wy0RwJGiuUpZrT3AUMXEahO7-lh2oeNeqGV8,2470
+valor_lite/text_generation/__init__.py,sha256=pGhpWCSZjLM0pPHCtPykAfos55B8ie3mi9EzbNxfj-U,356
+valor_lite/text_generation/annotation.py,sha256=O5aXiwCS4WjA-fqn4ly-O0MsTHoIOmqxqCaAp9IeI3M,1270
+valor_lite/text_generation/computation.py,sha256=hGDkPfzWY9SDTdozd-nArexJ3ZSNlCIWqHGoD8vO2Cc,18652
+valor_lite/text_generation/manager.py,sha256=C4QwvronGHXmYSkaRmUGy7TN0C0aeyDx9Hb-ClNYXK4,24810
+valor_lite/text_generation/metric.py,sha256=C9gbWejjOJ23JVLecuUhYW5rkx30NUCfRtgsM46uMds,10409
+valor_lite/text_generation/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+valor_lite/text_generation/llm/exceptions.py,sha256=w4eYSJIJQ_jWuCmquCB6ETr_st_LzbDRlhnlPeqwmfo,349
+valor_lite/text_generation/llm/generation.py,sha256=XKPjCxPUZHiWInQSO7wLOb0YtMFLu50s8rHZe1Yz0s0,28954
+valor_lite/text_generation/llm/instructions.py,sha256=fz2onBZZWcl5W8iy7zEWkPGU9N07ez6O7SxZA5M2xe4,34056
+valor_lite/text_generation/llm/integrations.py,sha256=-rTfdAjq1zH-4ixwYuMQEOQ80pIFzMTe0BYfroVx3Pg,6974
+valor_lite/text_generation/llm/utilities.py,sha256=bjqatGgtVTcl1PrMwiDKTYPGJXKrBrx7PDtzIblGSys,1178
+valor_lite/text_generation/llm/validators.py,sha256=Wzr5RlfF58_2wOU-uTw7C8skan_fYdhy4Gfn0jSJ8HM,2700
+valor_lite-0.37.5.dist-info/METADATA,sha256=nHuvYmvGYZAWh9Wc5LQqI6vRWlSd9gGhW3PUk5FgBoM,5139
+valor_lite-0.37.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+valor_lite-0.37.5.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
+valor_lite-0.37.5.dist-info/RECORD,,

{valor_lite-0.36.6.dist-info → valor_lite-0.37.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

valor-lite 0.36.6__py3-none-any.whl → 0.37.5__py3-none-any.whl

valor-lite 0.36.6py3-none-any.whl → 0.37.5py3-none-any.whl