PyPI - valor-lite - Versions diffs - 0.33.17__py3-none-any.whl → 0.33.19__py3-none-any.whl - Mend

valor-lite 0.33.17py3-none-any.whl → 0.33.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of valor-lite might be problematic. Click here for more details.

Files changed (20) hide show

valor_lite/classification/computation.py +6 -6
valor_lite/classification/metric.py +6 -6
valor_lite/classification/utilities.py +10 -8
valor_lite/object_detection/computation.py +14 -14
valor_lite/object_detection/manager.py +6 -2
valor_lite/object_detection/metric.py +12 -12
valor_lite/object_detection/utilities.py +21 -19
valor_lite/profiling.py +374 -0
valor_lite/semantic_segmentation/__init__.py +2 -1
valor_lite/semantic_segmentation/annotation.py +84 -1
valor_lite/semantic_segmentation/benchmark.py +151 -0
valor_lite/semantic_segmentation/computation.py +20 -33
valor_lite/semantic_segmentation/manager.py +6 -2
valor_lite/semantic_segmentation/metric.py +10 -10
valor_lite/semantic_segmentation/utilities.py +6 -6
{valor_lite-0.33.17.dist-info → valor_lite-0.33.19.dist-info}/METADATA +9 -9
{valor_lite-0.33.17.dist-info → valor_lite-0.33.19.dist-info}/RECORD +20 -18
{valor_lite-0.33.17.dist-info → valor_lite-0.33.19.dist-info}/WHEEL +1 -1
{valor_lite-0.33.17.dist-info → valor_lite-0.33.19.dist-info}/LICENSE +0 -0
{valor_lite-0.33.17.dist-info → valor_lite-0.33.19.dist-info}/top_level.txt +0 -0

valor_lite/profiling.py ADDED Viewed

@@ -0,0 +1,374 @@
+import json
+import math
+import multiprocessing as mp
+import resource
+import time
+from collections import deque
+from multiprocessing import Queue
+from typing import Any
+from tqdm import tqdm
+class BenchmarkError(Exception):
+    def __init__(
+        self, benchmark: str, error_type: str, error_message: str
+    ) -> None:
+        super().__init__(
+            f"'{benchmark}' raised '{error_type}' with the following message: {error_message}"
+        )
+def _timeit_subprocess(*args, __fn, __queue: Queue, **kwargs):
+    """
+    Multiprocessing subprocess that reports either runtime or errors.
+    This is handled within a subprocess to protect the benchmark against OOM errors.
+    """
+    try:
+        timer_start = time.perf_counter()
+        __fn(*args, **kwargs)
+        timer_end = time.perf_counter()
+        __queue.put(timer_end - timer_start)
+    except Exception as e:
+        __queue.put(e)
+def create_runtime_profiler(
+    time_limit: float | None,
+    repeat: int = 1,
+):
+    """
+    Creates a runtime profiler as a decorating function.
+    The profiler reports runtime of the wrapped function from a subprocess to protect against OOM errors.
+    Parameters
+    ----------
+    time_limit : float, optional
+        An optional time limit to constrain the benchmark.
+    repeat : int, default=1
+        The number of times to repeat the benchmark to produce an average runtime.
+    """
+    ctx = mp.get_context("spawn")
+    def decorator(fn):
+        def wrapper(*args, **kwargs):
+            # Record average runtime over repeated runs.
+            elapsed = 0
+            for _ in range(repeat):
+                q = ctx.Queue()
+                p = ctx.Process(
+                    target=_timeit_subprocess,
+                    args=args,
+                    kwargs={"__fn": fn, "__queue": q, **kwargs},
+                )
+                p.start()
+                p.join(timeout=time_limit)
+                # Check if computation finishes within the timeout
+                if p.is_alive():
+                    p.terminate()
+                    p.join()
+                    q.close()
+                    q.join_thread()
+                    raise TimeoutError(
+                        f"Function '{fn.__name__}' did not complete within {time_limit} seconds."
+                    )
+                # Retrieve the result
+                result = q.get(timeout=1)
+                if isinstance(result, Exception):
+                    raise result
+                elif isinstance(result, float):
+                    elapsed += result
+                else:
+                    raise TypeError(type(result).__name__)
+            return elapsed / repeat
+        return wrapper
+    return decorator
+def pretty_print_results(results: tuple):
+    valid, invalid, permutations = results
+    print(
+        "====================================================================="
+    )
+    print("Details")
+    print(json.dumps(permutations, indent=4))
+    if len(valid) > 0:
+        print()
+        print("Passed")
+        keys = ["complexity", "runtime", *valid[0]["details"].keys()]
+        header = " | ".join(f"{header:^15}" for header in keys)
+        print(header)
+        print("-" * len(header))
+        for entry in valid:
+            values = [
+                entry["complexity"],
+                round(entry["runtime"], 4),
+                *entry["details"].values(),
+            ]
+            row = " | ".join(f"{str(value):^15}" for value in values)
+            print(row)
+    if len(invalid) > 0:
+        print()
+        print("Failed")
+        keys = ["complexity", "error", *invalid[0]["details"].keys(), "msg"]
+        header = " | ".join(f"{header:^15}" for header in keys)
+        print(header)
+        print("-" * len(header))
+        for entry in invalid:
+            values = [
+                entry["complexity"],
+                entry["error"],
+                *entry["details"].values(),
+                entry["msg"],
+            ]
+            row = " | ".join(f"{str(value):^15}" for value in values)
+            print(row)
+def _calculate_complexity(params: list[int | tuple[int]]) -> int:
+    """
+    Basic metric of benchmark complexity.
+    """
+    flattened_params = [
+        math.prod(p) if isinstance(p, tuple) else p for p in params
+    ]
+    return math.prod(flattened_params)
+class Benchmark:
+    def __init__(
+        self,
+        time_limit: float | None,
+        memory_limit: int | None,
+        *_,
+        repeat: int | None = 1,
+        verbose: bool = False,
+    ):
+        self.time_limit = time_limit
+        self.memory_limit = memory_limit
+        self.repeat = repeat
+        self.verbose = verbose
+    def get_limits(
+        self,
+        *_,
+        readable: bool = True,
+        memory_unit: str = "GB",
+        time_unit: str = "seconds",
+    ) -> dict[str, str | int | float | None]:
+        """
+        Returns a dictionary of benchmark limits.
+        Parameters
+        ----------
+        readable : bool, default=True
+            Toggles whether the output should be human readable.
+        memory_unit : str, default="GB"
+            Toggles what unit to display the memory limit with when 'readable=True'.
+        time_unit : str, default="seconds"
+            Toggles what unit to display the time limit with when 'readable=True'.
+        Returns
+        -------
+        dict[str, str | int | float | None]
+            The benchmark limits.
+        """
+        memory_value = self.memory_limit
+        if readable and memory_value is not None:
+            match memory_unit:
+                case "TB":
+                    memory_value /= 1024**4
+                case "GB":
+                    memory_value /= 1024**3
+                case "MB":
+                    memory_value /= 1024**2
+                case "KB":
+                    memory_value /= 1024
+                case "B":
+                    pass
+                case _:
+                    valid_set = {"TB", "GB", "MB", "KB", "B"}
+                    raise ValueError(
+                        f"Expected memory unit to be in the set {valid_set}, received '{memory_unit}'."
+                    )
+            memory_value = f"{memory_value} {memory_unit}"
+        time_value = self.time_limit
+        if readable and time_value is not None:
+            match time_unit:
+                case "minutes":
+                    time_value /= 60
+                case "seconds":
+                    pass
+                case "milliseconds":
+                    time_value *= 1000
+                case _:
+                    valid_set = {"minutes", "seconds", "milliseconds"}
+                    raise ValueError(
+                        f"Expected time unit to be in the set {valid_set}, received '{time_unit}'."
+                    )
+            time_value = f"{time_value} {time_unit}"
+        return {
+            "memory_limit": memory_value,
+            "time_limit": time_value,
+            "repeat": self.repeat,
+        }
+    @property
+    def memory_limit(self) -> int | None:
+        """
+        The memory limit in bytes (B).
+        """
+        return self._memory_limit
+    @memory_limit.setter
+    def memory_limit(self, limit: int | None):
+        """
+        Stores the memory limit and restricts resources.
+        """
+        self._memory_limit = limit
+        if limit is not None:
+            _, hard = resource.getrlimit(resource.RLIMIT_AS)
+            resource.setrlimit(resource.RLIMIT_AS, (limit, hard))
+    def run(
+        self,
+        benchmark,
+        **kwargs: list[Any],
+    ):
+        """
+        Runs a benchmark with ranges of parameters.
+        Parameters
+        ----------
+        benchmark : Callable
+            The benchmark function.
+        **kwargs : list[Any]
+            Keyword arguments passing lists of parameters to benchmark. The values should be sorted in
+            decreasing complexity. For example, if the number of labels is a parameter then a higher
+            number of unique labels would be considered "more" complex.
+        Example
+        -------
+        >>> b = Benchmark(
+        ...     time_limit=10.0,
+        ...     memory_limit=8 * (1024**3),
+        ...     repeat=1,
+        ...     verbose=False,
+        ... )
+        >>> results = b.run(
+        ...     benchmark=semseg_add_data,
+        ...     n_labels=[
+        ...         100,
+        ...         10,
+        ...     ],
+        ...     shape=[
+        ...         (1000, 1000),
+        ...         (100, 100),
+        ...     ],
+        ... )
+        """
+        nvars = len(kwargs)
+        keys = tuple(kwargs.keys())
+        vars = tuple(kwargs[key] for key in keys)
+        initial_indices = tuple(0 for _ in range(nvars))
+        max_indices = tuple(len(v) for v in vars)
+        permutations = math.prod(max_indices)
+        # Initialize queue with the starting index (0, ...)
+        queue = deque()
+        queue.append(initial_indices)
+        # Keep track of explored combinations to avoid duplicates
+        explored = set()
+        explored.add(initial_indices)
+        # Store valid combinations that finish within the time limit
+        valid_combinations = []
+        invalid_combinations = []
+        pbar = tqdm(total=math.prod(max_indices), disable=(not self.verbose))
+        prev_count = 0
+        while queue:
+            current_indices = queue.popleft()
+            parameters = {
+                k: v[current_indices[idx]]
+                for idx, (k, v) in enumerate(zip(keys, vars))
+            }
+            complexity = _calculate_complexity(list(parameters.values()))
+            details: dict = {k: str(v) for k, v in parameters.items()}
+            # update terminal with status
+            count = len(valid_combinations) + len(invalid_combinations)
+            pbar.update(count - prev_count)
+            prev_count = count
+            try:
+                runtime = benchmark(
+                    time_limit=self.time_limit,
+                    repeat=self.repeat,
+                    **parameters,
+                )
+                valid_combinations.append(
+                    {
+                        "complexity": complexity,
+                        "runtime": runtime,
+                        "details": details,
+                    }
+                )
+                continue
+            except Exception as e:
+                invalid_combinations.append(
+                    {
+                        "complexity": complexity,
+                        "error": type(e).__name__,
+                        "msg": str(e),
+                        "details": details,
+                    }
+                )
+            for idx in range(nvars):
+                new_indices = list(current_indices)
+                if new_indices[idx] + 1 < max_indices[idx]:
+                    new_indices[idx] += 1
+                    new_indices_tuple = tuple(new_indices)
+                    if new_indices_tuple not in explored:
+                        queue.append(new_indices_tuple)
+                        explored.add(new_indices_tuple)
+        valid_combinations.sort(key=lambda x: -x["complexity"])
+        invalid_combinations.sort(key=lambda x: -x["complexity"])
+        # clear terminal and display results
+        results = (
+            valid_combinations,
+            invalid_combinations,
+            {
+                "benchmark": benchmark.__name__,
+                "limits": self.get_limits(readable=True),
+                "passed": permutations - len(invalid_combinations),
+                "failed": len(invalid_combinations),
+                "total": permutations,
+            },
+        )
+        pbar.close()
+        if self.verbose:
+            pretty_print_results(results)
+        return results

valor_lite/semantic_segmentation/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .annotation import Bitmask, Segmentation
+from .annotation import Bitmask, Segmentation, generate_segmentation
 from .manager import DataLoader, Evaluator
 from .metric import Metric, MetricType
@@ -9,4 +9,5 @@ __all__ = [
     "Bitmask",
     "Metric",
     "MetricType",
+    "generate_segmentation",
 ]

valor_lite/semantic_segmentation/annotation.py CHANGED Viewed

@@ -29,7 +29,7 @@ class Bitmask:
     def __post_init__(self):
         if self.mask.dtype != np.bool_:
             raise ValueError(
-                f"Bitmask recieved mask with dtype `{self.mask.dtype}`."
+                f"Bitmask recieved mask with dtype '{self.mask.dtype}'."
             )
@@ -94,3 +94,86 @@ class Segmentation:
         self.shape = groundtruth_shape.pop()
         self.size = int(np.prod(np.array(self.shape)))
+def generate_segmentation(
+    datum_uid: str,
+    number_of_unique_labels: int,
+    mask_height: int,
+    mask_width: int,
+) -> Segmentation:
+    """
+    Generates a semantic segmentation annotation.
+    Parameters
+    ----------
+    datum_uid : str
+        The datum UID for the generated segmentation.
+    number_of_unique_labels : int
+        The number of unique labels.
+    mask_height : int
+        The height of the mask in pixels.
+    mask_width : int
+        The width of the mask in pixels.
+    Returns
+    -------
+    Segmentation
+        A generated semantic segmenatation annotation.
+    """
+    if number_of_unique_labels > 1:
+        common_proba = 0.4 / (number_of_unique_labels - 1)
+        min_proba = min(common_proba, 0.1)
+        labels = [str(i) for i in range(number_of_unique_labels)] + [None]
+        proba = (
+            [0.5]
+            + [common_proba for _ in range(number_of_unique_labels - 1)]
+            + [0.1]
+        )
+    elif number_of_unique_labels == 1:
+        labels = ["0", None]
+        proba = [0.9, 0.1]
+        min_proba = 0.1
+    else:
+        raise ValueError(
+            "The number of unique labels should be greater than zero."
+        )
+    probabilities = np.array(proba, dtype=np.float64)
+    weights = (probabilities / min_proba).astype(np.int32)
+    indices = np.random.choice(
+        np.arange(len(weights)),
+        size=(mask_height * 2, mask_width),
+        p=probabilities,
+    )
+    N = len(labels)
+    masks = np.arange(N)[:, None, None] == indices
+    gts = []
+    pds = []
+    for lidx in range(N):
+        label = labels[lidx]
+        if label is None:
+            continue
+        gts.append(
+            Bitmask(
+                mask=masks[lidx, :mask_height, :],
+                label=label,
+            )
+        )
+        pds.append(
+            Bitmask(
+                mask=masks[lidx, mask_height:, :],
+                label=label,
+            )
+        )
+    return Segmentation(
+        uid=datum_uid,
+        groundtruths=gts,
+        predictions=pds,
+    )

valor_lite/semantic_segmentation/benchmark.py ADDED Viewed

@@ -0,0 +1,151 @@
+from valor_lite.profiling import create_runtime_profiler
+from valor_lite.semantic_segmentation import DataLoader, generate_segmentation
+def benchmark_add_data(
+    n_labels: int,
+    shape: tuple[int, int],
+    time_limit: float | None,
+    repeat: int = 1,
+) -> float:
+    """
+    Benchmarks 'Dataloader.add_data' for semantic segmentation.
+    Parameters
+    ----------
+    n_labels : int
+        The number of unique labels to generate.
+    shape : tuple[int, int]
+        The size (h,w) of the mask to generate.
+    time_limit : float, optional
+        An optional time limit to constrain the benchmark.
+    repeat : int
+        The number of times to run the benchmark to produce a runtime average.
+    Returns
+    -------
+    float
+        The average runtime.
+    """
+    profile = create_runtime_profiler(
+        time_limit=time_limit,
+        repeat=repeat,
+    )
+    elapsed = 0
+    for _ in range(repeat):
+        data = generate_segmentation(
+            datum_uid="uid",
+            number_of_unique_labels=n_labels,
+            mask_height=shape[0],
+            mask_width=shape[1],
+        )
+        loader = DataLoader()
+        elapsed += profile(loader.add_data)([data])
+    return elapsed / repeat
+def benchmark_finalize(
+    n_datums: int,
+    n_labels: int,
+    time_limit: float | None,
+    repeat: int = 1,
+):
+    """
+    Benchmarks 'Dataloader.finalize' for semantic segmentation.
+    Parameters
+    ----------
+    n_datums : int
+        The number of datums to generate.
+    n_labels : int
+        The number of unique labels to generate.
+    time_limit : float, optional
+        An optional time limit to constrain the benchmark.
+    repeat : int
+        The number of times to run the benchmark to produce a runtime average.
+    Returns
+    -------
+    float
+        The average runtime.
+    """
+    profile = create_runtime_profiler(
+        time_limit=time_limit,
+        repeat=repeat,
+    )
+    elapsed = 0
+    for _ in range(repeat):
+        data = [
+            generate_segmentation(
+                datum_uid=str(i),
+                number_of_unique_labels=n_labels,
+                mask_height=5,
+                mask_width=5,
+            )
+            for i in range(10)
+        ]
+        loader = DataLoader()
+        for datum_idx in range(n_datums):
+            segmentation = data[datum_idx % 10]
+            segmentation.uid = str(datum_idx)
+            loader.add_data([segmentation])
+        elapsed += profile(loader.finalize)()
+    return elapsed / repeat
+def benchmark_evaluate(
+    n_datums: int,
+    n_labels: int,
+    time_limit: float | None,
+    repeat: int = 1,
+):
+    """
+    Benchmarks 'Evaluator.evaluate' for semantic segmentation.
+    Parameters
+    ----------
+    n_datums : int
+        The number of datums to generate.
+    n_labels : int
+        The number of unique labels to generate.
+    time_limit : float, optional
+        An optional time limit to constrain the benchmark.
+    repeat : int
+        The number of times to run the benchmark to produce a runtime average.
+    Returns
+    -------
+    float
+        The average runtime.
+    """
+    profile = create_runtime_profiler(
+        time_limit=time_limit,
+        repeat=repeat,
+    )
+    elapsed = 0
+    for _ in range(repeat):
+        data = [
+            generate_segmentation(
+                datum_uid=str(i),
+                number_of_unique_labels=n_labels,
+                mask_height=5,
+                mask_width=5,
+            )
+            for i in range(10)
+        ]
+        loader = DataLoader()
+        for datum_idx in range(n_datums):
+            segmentation = data[datum_idx % 10]
+            segmentation.uid = str(datum_idx)
+            loader.add_data([segmentation])
+        evaluator = loader.finalize()
+        elapsed += profile(evaluator.evaluate)()
+    return elapsed / repeat

valor-lite 0.33.17__py3-none-any.whl → 0.33.19__py3-none-any.whl

Potentially problematic release.

valor-lite 0.33.17py3-none-any.whl → 0.33.19py3-none-any.whl