PyPI - valor-lite - Versions diffs - 0.33.5__tar.gz → 0.33.7__tar.gz - Mend

valor-lite 0.33.5tar.gz → 0.33.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{valor_lite-0.33.5 → valor_lite-0.33.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: valor-lite
-Version: 0.33.5
+Version: 0.33.7
 Summary: Compute valor metrics directly in your client.
 License: MIT License

valor_lite-0.33.7/benchmarks/benchmark_classification.py ADDED Viewed

@@ -0,0 +1,254 @@
+import json
+import os
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from time import time
+import requests
+from tqdm import tqdm
+from valor_lite.classification import DataLoader, MetricType
+def time_it(fn):
+    def wrapper(*args, **kwargs):
+        start = time()
+        results = fn(*args, **kwargs)
+        return (time() - start, results)
+    return wrapper
+def download_data_if_not_exists(
+    file_name: str,
+    file_path: Path,
+    url: str,
+):
+    """Download the data from a public bucket if it doesn't exist locally."""
+    if not os.path.exists(file_path):
+        response = requests.get(url, stream=True)
+        if response.status_code == 200:
+            total_size = int(response.headers.get("content-length", 0))
+            with open(file_path, "wb") as f:
+                with tqdm(
+                    total=total_size,
+                    unit="B",
+                    unit_scale=True,
+                    unit_divisor=1024,
+                    desc=file_name,
+                ) as pbar:
+                    for chunk in response.iter_content(chunk_size=1024):
+                        if chunk:
+                            f.write(chunk)
+                            pbar.update(1024)
+        else:
+            raise RuntimeError(response)
+    else:
+        print(f"{file_name} already exists locally.")
+def write_results_to_file(write_path: Path, results: list[dict]):
+    """Write results to results.json"""
+    current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+    if os.path.isfile(write_path):
+        with open(write_path, "r") as file:
+            file.seek(0)
+            data = json.load(file)
+    else:
+        data = {}
+    data[current_datetime] = results
+    with open(write_path, "w+") as file:
+        json.dump(data, file, indent=4)
+@time_it
+def ingest(
+    loader: DataLoader,
+    gt_path: Path,
+    pd_path: Path,
+    limit: int,
+    chunk_size: int,
+):
+    accumulated_time = 0.0
+    with open(gt_path, "r") as gf:
+        with open(pd_path, "r") as pf:
+            count = 0
+            groundtruths = []
+            predictions = []
+            for gline, pline in zip(gf, pf):
+                # groundtruth
+                gt_dict = json.loads(gline)
+                groundtruths.append(gt_dict)
+                # prediction
+                pd_dict = json.loads(pline)
+                predictions.append(pd_dict)
+                count += 1
+                if count >= limit and limit > 0:
+                    break
+                elif len(groundtruths) < chunk_size or chunk_size == -1:
+                    continue
+                timer, _ = time_it(loader.add_data_from_valor_dict)(
+                    zip(groundtruths, predictions), True
+                )
+                accumulated_time += timer
+                groundtruths = []
+                predictions = []
+            if groundtruths:
+                timer, _ = time_it(loader.add_data_from_valor_dict)(
+                    zip(groundtruths, predictions), True
+                )
+                accumulated_time += timer
+    return accumulated_time
+@dataclass
+class Benchmark:
+    limit: int
+    n_datums: int
+    n_groundtruths: int
+    n_predictions: int
+    n_labels: int
+    chunk_size: int
+    ingestion: float
+    preprocessing: float
+    precomputation: float
+    evaluation: float
+    detailed_evaluation: list[tuple[int, float]]
+    def result(self) -> dict:
+        return {
+            "limit": self.limit,
+            "n_datums": self.n_datums,
+            "n_groundtruths": self.n_groundtruths,
+            "n_predictions": self.n_predictions,
+            "n_labels": self.n_labels,
+            "chunk_size": self.chunk_size,
+            "ingestion": {
+                "loading_from_file": f"{round(self.ingestion - self.preprocessing, 2)} seconds",
+                "numpy_conversion": f"{round(self.preprocessing, 2)} seconds",
+                "finalization": f"{round(self.precomputation, 2)} seconds",
+                "total": f"{round(self.ingestion + self.precomputation, 2)} seconds",
+            },
+            "base_evaluation": f"{round(self.evaluation, 2)} seconds",
+            "detailed_evaluation": [
+                {
+                    "n_points": 10,
+                    "n_examples": curve[0],
+                    "computation": f"{round(curve[1], 2)} seconds",
+                }
+                for curve in self.detailed_evaluation
+            ],
+        }
+def run_benchmarking_analysis(
+    limits_to_test: list[int],
+    results_file: str = "clf_results.json",
+    chunk_size: int = -1,
+    ingestion_timeout=30,
+    evaluation_timeout=30,
+):
+    """Time various function calls and export the results."""
+    current_directory = Path(__file__).parent
+    write_path = current_directory / Path(results_file)
+    gt_filename = "gt_classification.jsonl"
+    pd_filename = "pd_classification.jsonl"
+    # cache data locally
+    for filename in [gt_filename, pd_filename]:
+        file_path = current_directory / Path(filename)
+        url = f"https://pub-fae71003f78140bdaedf32a7c8d331d2.r2.dev/{filename}"
+        download_data_if_not_exists(
+            file_name=filename, file_path=file_path, url=url
+        )
+    # iterate through datum limits
+    results = list()
+    for limit in limits_to_test:
+        # === Base Evaluation ===
+        loader = DataLoader()
+        # ingest + preprocess
+        (ingest_time, preprocessing_time,) = ingest(
+            loader=loader,
+            gt_path=current_directory / Path(gt_filename),
+            pd_path=current_directory / Path(pd_filename),
+            limit=limit,
+            chunk_size=chunk_size,
+        )  # type: ignore - time_it wrapper
+        finalization_time, evaluator = time_it(loader.finalize)()
+        if ingest_time > ingestion_timeout and ingestion_timeout != -1:
+            raise TimeoutError(
+                f"Base precomputation timed out with limit of {limit}."
+            )
+        # evaluate
+        eval_time, _ = time_it(evaluator.evaluate)()
+        if eval_time > evaluation_timeout and evaluation_timeout != -1:
+            raise TimeoutError(
+                f"Base evaluation timed out with {evaluator.n_datums} datums."
+            )
+        detail_no_examples_time, _ = time_it(evaluator.evaluate)(
+            metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
+        )
+        if (
+            detail_no_examples_time > evaluation_timeout
+            and evaluation_timeout != -1
+        ):
+            raise TimeoutError(
+                f"Base evaluation timed out with {evaluator.n_datums} datums."
+            )
+        detail_three_examples_time, _ = time_it(evaluator.evaluate)(
+            metrics_to_return=[*MetricType.base(), MetricType.ConfusionMatrix],
+            number_of_examples=3,
+        )
+        if (
+            detail_three_examples_time > evaluation_timeout
+            and evaluation_timeout != -1
+        ):
+            raise TimeoutError(
+                f"Base evaluation timed out with {evaluator.n_datums} datums."
+            )
+        results.append(
+            Benchmark(
+                limit=limit,
+                n_datums=evaluator.n_datums,
+                n_groundtruths=evaluator.n_groundtruths,
+                n_predictions=evaluator.n_predictions,
+                n_labels=evaluator.n_labels,
+                chunk_size=chunk_size,
+                ingestion=ingest_time,
+                preprocessing=preprocessing_time,
+                precomputation=finalization_time,
+                evaluation=eval_time,
+                detailed_evaluation=[
+                    (0, detail_no_examples_time),
+                    (3, detail_three_examples_time),
+                ],
+            ).result()
+        )
+    write_results_to_file(write_path=write_path, results=results)
+if __name__ == "__main__":
+    run_benchmarking_analysis(
+        limits_to_test=[5000, 5000, 5000],
+    )

{valor_lite-0.33.5 → valor_lite-0.33.7}/benchmarks/benchmark_objdet.py RENAMED Viewed

@@ -67,7 +67,7 @@ def download_data_if_not_exists(
 def write_results_to_file(write_path: Path, results: list[dict]):
-    """Write results to manager_results.json"""
+    """Write results to json"""
     current_datetime = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
     if os.path.isfile(write_path):
         with open(write_path, "r") as file:
@@ -178,7 +178,7 @@ class Benchmark:
 def run_benchmarking_analysis(
     limits_to_test: list[int],
     combinations: list[tuple[AnnotationType, AnnotationType]] | None = None,
-    results_file: str = "manager_results.json",
+    results_file: str = "objdet_results.json",
     chunk_size: int = -1,
     compute_pr: bool = True,
     compute_detailed: bool = True,

valor_lite-0.33.5/examples/coco-yolo.ipynb → valor_lite-0.33.7/examples/object-detection.ipynb RENAMED Viewed

@@ -12,9 +12,7 @@
         "\n",
         "In this notebook, we'll walk through a detailed example of how you can use Valor to evaluate object detections made on [the COCO Panoptic dataset](https://cocodataset.org/#home). We'll use Ultralytics' `YOLOv8` model to predict what objects exist in various COCO photographs and compare performance between bounding box and image segmentation results.\n",
         "\n",
-        "For a conceptual introduction to Valor, [check out our project overview](https://striveworks.github.io/valor/). For a higher-level example notebook, [check out our \"Getting Started\" notebook](https://github.com/Striveworks/valor/blob/main/examples/getting_started.ipynb).\n",
-        "\n",
-        "Before using this notebook, please ensure that the Valor service is running on your machine (for start-up instructions, [click here](https://striveworks.github.io/valor/getting_started/)). To connect to a non-local instance of Valor, update `client = Client(\"http://0.0.0.0:8000\")` in the first code block to point to the correct URL."
+        "For a conceptual introduction to Valor, [check out our project overview](https://striveworks.github.io/valor/). For a higher-level example notebook, [check out our \"Getting Started\" notebook](https://github.com/Striveworks/valor/blob/main/examples/getting_started.ipynb)."
       ]
     },
     {
@@ -49,19 +47,6 @@
         "from valor_lite.detection import DataLoader, MetricType"
       ]
     },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "id": "a28f5e66",
-      "metadata": {},
-      "source": [
-        "The modules included in `./integrations` are helper modules that demonstrate how to ingest datasets and model inferences into Valor. The depth of each integration varies depending on the use case. \n",
-        "\n",
-        "The `coco_integration` is designed to download, extract, and upload all in one command as you are starting off with all the the data. \n",
-        "\n",
-        "The `yolo_integration` is much simpler; it is a collection of parser functions that convert YOLO model results into Valor types."
-      ]
-    },
     {
       "cell_type": "code",
       "execution_count": 2,

valor-lite 0.33.5__tar.gz → 0.33.7__tar.gz

valor-lite 0.33.5tar.gz → 0.33.7tar.gz