PyPI - speedy-utils - Versions diffs - 1.1.33__py3-none-any.whl → 1.1.35__py3-none-any.whl - Mend

speedy-utils 1.1.33py3-none-any.whl → 1.1.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

llm_utils/lm/__init__.py +17 -15
llm_utils/lm/llm.py +2 -0
llm_utils/lm/mixins.py +74 -0
llm_utils/lm/utils.py +5 -1
speedy_utils/multi_worker/process.py +121 -25
{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/METADATA +1 -1
{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/RECORD +12 -12
vision_utils/__init__.py +9 -2
vision_utils/io_utils.py +15 -14
vision_utils/plot.py +2 -2
{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/entry_points.txt +0 -0

llm_utils/lm/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from .lm_base import LMBase, get_model_name
 from .mixins import (
     ModelUtilsMixin,
     TemperatureRangeMixin,
+    TokenizationMixin,
     TwoStepPydanticMixin,
     VLLMMixin,
 )
@@ -14,19 +15,20 @@ from .signature import Input, InputField, Output, OutputField, Signature
 __all__ = [
-    "LMBase",
-    "LLM",
-    "AsyncLM",
-    "AsyncLLMTask",
-    "BasePromptBuilder",
-    "LLMSignature",
-    "Signature",
-    "InputField",
-    "OutputField",
-    "Input",
-    "Output",
-    "TemperatureRangeMixin",
-    "TwoStepPydanticMixin",
-    "VLLMMixin",
-    "ModelUtilsMixin",
+    'LMBase',
+    'LLM',
+    'AsyncLM',
+    'AsyncLLMTask',
+    'BasePromptBuilder',
+    'LLMSignature',
+    'Signature',
+    'InputField',
+    'OutputField',
+    'Input',
+    'Output',
+    'TemperatureRangeMixin',
+    'TwoStepPydanticMixin',
+    'VLLMMixin',
+    'ModelUtilsMixin',
+    'TokenizationMixin',
 ]

llm_utils/lm/llm.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .base_prompt_builder import BasePromptBuilder
 from .mixins import (
     ModelUtilsMixin,
     TemperatureRangeMixin,
+    TokenizationMixin,
     TwoStepPydanticMixin,
     VLLMMixin,
 )
@@ -47,6 +48,7 @@ class LLM(
     TwoStepPydanticMixin,
     VLLMMixin,
     ModelUtilsMixin,
+    TokenizationMixin,
 ):
     """LLM task with structured input/output handling."""

llm_utils/lm/mixins.py CHANGED Viewed

@@ -396,6 +396,80 @@ class VLLMMixin:
         return _kill_vllm_on_port(port)
+class TokenizationMixin:
+    """Mixin for tokenization operations (encode/decode)."""
+    def encode(
+        self,
+        text: str,
+        *,
+        add_special_tokens: bool = True,
+        return_token_strs: bool = False,
+    ) -> list[int] | tuple[list[int], list[str]]:
+        """
+        Encode text to token IDs using the model's tokenizer.
+        Args:
+            text: Text to tokenize
+            add_special_tokens: Whether to add special tokens (e.g., BOS)
+            return_token_strs: If True, also return token strings
+        Returns:
+            List of token IDs, or tuple of (token IDs, token strings)
+        """
+        import requests
+        # Get base_url from client and remove /v1 suffix if present
+        # (tokenize endpoint is at root level, not under /v1)
+        base_url = str(self.client.base_url).rstrip('/')
+        if base_url.endswith('/v1'):
+            base_url = base_url[:-3]  # Remove '/v1'
+        response = requests.post(
+            f'{base_url}/tokenize',
+            json={
+                'prompt': text,
+                'add_special_tokens': add_special_tokens,
+                'return_token_strs': return_token_strs,
+            },
+        )
+        response.raise_for_status()
+        data = response.json()
+        if return_token_strs:
+            return data['tokens'], data.get('token_strs', [])
+        return data['tokens']
+    def decode(
+        self,
+        token_ids: list[int],
+    ) -> str:
+        """
+        Decode token IDs to text using the model's tokenizer.
+        Args:
+            token_ids: List of token IDs to decode
+        Returns:
+            Decoded text string
+        """
+        import requests
+        # Get base_url from client and remove /v1 suffix if present
+        # (detokenize endpoint is at root level, not under /v1)
+        base_url = str(self.client.base_url).rstrip('/')
+        if base_url.endswith('/v1'):
+            base_url = base_url[:-3]  # Remove '/v1'
+        response = requests.post(
+            f'{base_url}/detokenize',
+            json={'tokens': token_ids},
+        )
+        response.raise_for_status()
+        data = response.json()
+        return data['prompt']
 class ModelUtilsMixin:
     """Mixin for model utility methods."""

llm_utils/lm/utils.py CHANGED Viewed

@@ -282,7 +282,11 @@ def get_base_client(
             return MOpenAI(
                 base_url=f"http://localhost:{port}/v1", api_key=api_key, cache=cache
             )
-        raise ValueError("Either client or vllm_cmd must be provided.")
+        # Use default port 8000 when client is None
+        logger.info("No client specified, using default port 8000 at http://localhost:8000/v1")
+        return MOpenAI(
+            base_url="http://localhost:8000/v1", api_key=api_key, cache=cache
+        )
     if isinstance(client, int):
         return MOpenAI(
             base_url=f"http://localhost:{client}/v1", api_key=api_key, cache=cache

speedy_utils/multi_worker/process.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# ray_multi_process.py
 from ..__imports import *
@@ -8,6 +5,12 @@ SPEEDY_RUNNING_PROCESSES: list[psutil.Process] = []
 _SPEEDY_PROCESSES_LOCK = threading.Lock()
+# /mnt/data/anhvth8/venvs/Megatron-Bridge-Host/lib/python3.12/site-packages/ray/_private/worker.py:2046: FutureWarning: Tip: In future versions of Ray, Ray will no longer override accelerator visible devices env var if num_gpus=0 or num_gpus=None (default). To enable this behavior and turn off this error message, set RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO=0
+# turn off future warning and verbose task logs
+os.environ["RAY_ACCEL_ENV_VAR_OVERRIDE_ON_ZERO"] = "0"
+os.environ["RAY_DEDUP_LOGS"] = "0"
+os.environ["RAY_LOG_TO_STDERR"] = "0"
 def _prune_dead_processes() -> None:
     """Remove dead processes from tracking list."""
     with _SPEEDY_PROCESSES_LOCK:
@@ -78,6 +81,7 @@ def _track_multiprocessing_processes() -> None:
 def _build_cache_dir(func: Callable, items: list[Any]) -> Path:
     """Build cache dir with function name + timestamp."""
+    import datetime
     func_name = getattr(func, '__name__', 'func')
     now = datetime.datetime.now()
     stamp = now.strftime('%m%d_%Hh%Mm%Ss')
@@ -85,9 +89,8 @@ def _build_cache_dir(func: Callable, items: list[Any]) -> Path:
     path = Path('.cache') / run_id
     path.mkdir(parents=True, exist_ok=True)
     return path
-def wrap_dump(func: Callable, cache_dir: Path | None):
+_DUMP_THREADS = []
+def wrap_dump(func: Callable, cache_dir: Path | None, dump_in_thread: bool = True):
     """Wrap a function so results are dumped to .pkl when cache_dir is set."""
     if cache_dir is None:
         return func
@@ -95,8 +98,24 @@ def wrap_dump(func: Callable, cache_dir: Path | None):
     def wrapped(x, *args, **kwargs):
         res = func(x, *args, **kwargs)
         p = cache_dir / f'{uuid.uuid4().hex}.pkl'
-        with open(p, 'wb') as fh:
-            pickle.dump(res, fh)
+        def save():
+            with open(p, 'wb') as fh:
+                pickle.dump(res, fh)
+            # Clean trash to avoid bloating memory
+            # print(f'Thread count: {threading.active_count()}')
+            # print(f'Saved result to {p}')
+        if dump_in_thread:
+            thread = threading.Thread(target=save)
+            _DUMP_THREADS.append(thread)
+            # count thread
+            # print(f'Thread count: {threading.active_count()}')
+            while threading.active_count() > 16:
+                time.sleep(0.1)
+            thread.start()
+        else:
+            save()
         return str(p)
     return wrapped
@@ -109,20 +128,28 @@ RAY_WORKER = None
 def ensure_ray(workers: int, pbar: tqdm | None = None):
     """Initialize or reinitialize Ray with a given worker count, log to bar postfix."""
+    import ray as _ray_module
+    import logging
     global RAY_WORKER
-    if not ray.is_initialized() or workers != RAY_WORKER:
-        if ray.is_initialized() and pbar:
+    # shutdown when worker count changes or if Ray not initialized
+    if not _ray_module.is_initialized() or workers != RAY_WORKER:
+        if _ray_module.is_initialized() and pbar:
             pbar.set_postfix_str(f'Restarting Ray {workers} workers')
-            ray.shutdown()
+            _ray_module.shutdown()
         t0 = time.time()
-        ray.init(num_cpus=workers, ignore_reinit_error=True)
+        _ray_module.init(
+            num_cpus=workers,
+            ignore_reinit_error=True,
+            logging_level=logging.ERROR,
+            log_to_driver=False,
+        )
         took = time.time() - t0
         _track_ray_processes()  # Track Ray worker processes
         if pbar:
             pbar.set_postfix_str(f'ray.init {workers} took {took:.2f}s')
         RAY_WORKER = workers
 def multi_process(
     func: Callable[[Any], Any],
     items: Iterable[Any] | None = None,
@@ -134,6 +161,8 @@ def multi_process(
     # backend: str = "ray",   # "seq", "ray", or "fastcore"
     backend: Literal['seq', 'ray', 'mp', 'threadpool', 'safe'] = 'mp',
     desc: str | None = None,
+    shared_kwargs: list[str] | None = None,
+    dump_in_thread: bool = True,
     **func_kwargs: Any,
 ) -> list[Any]:
     """
@@ -146,13 +175,55 @@ def multi_process(
         - "threadpool": run in parallel with thread pool
         - "safe": run in parallel with thread pool (explicitly safe for tests)
+    shared_kwargs:
+        - Optional list of kwarg names that should be shared via Ray's zero-copy object store
+        - Only works with Ray backend
+        - Useful for large objects (e.g., models, datasets) that should be shared across workers
+        - Example: shared_kwargs=['model', 'tokenizer'] for sharing large ML models
+    dump_in_thread:
+        - Whether to dump results to disk in a separate thread (default: True)
+        - If False, dumping is done synchronously, which may block but ensures data is saved before returning
     If lazy_output=True, every result is saved to .pkl and
     the returned list contains file paths.
     """
     # default backend selection
     if backend is None:
-        backend = 'ray' if _HAS_RAY else 'mp'
+        try:
+            import ray as _ray_module
+            backend = 'ray'
+        except ImportError:
+            backend = 'mp'
+    # Validate shared_kwargs
+    if shared_kwargs:
+        # Validate that all shared_kwargs are valid kwargs for the function
+        sig = inspect.signature(func)
+        valid_params = set(sig.parameters.keys())
+        for kw in shared_kwargs:
+            if kw not in func_kwargs:
+                raise ValueError(
+                    f"shared_kwargs key '{kw}' not found in provided func_kwargs"
+                )
+            # Check if parameter exists in function signature or if function accepts **kwargs
+            has_var_keyword = any(
+                p.kind == inspect.Parameter.VAR_KEYWORD
+                for p in sig.parameters.values()
+            )
+            if kw not in valid_params and not has_var_keyword:
+                raise ValueError(
+                    f"shared_kwargs key '{kw}' is not a valid parameter for function '{func.__name__}'. "
+                    f"Valid parameters: {valid_params}"
+                )
+        # Only allow shared_kwargs with Ray backend
+        if backend != 'ray':
+            raise ValueError(
+                f"shared_kwargs only supported with 'ray' backend, got '{backend}'"
+            )
     # unify items
     # unify items and coerce to concrete list so we can use len() and
@@ -169,7 +240,7 @@ def multi_process(
     # build cache dir + wrap func
     cache_dir = _build_cache_dir(func, items) if lazy_output else None
-    f_wrapped = wrap_dump(func, cache_dir)
+    f_wrapped = wrap_dump(func, cache_dir, dump_in_thread)
     total = len(items)
     if desc:
@@ -181,8 +252,6 @@ def multi_process(
     ) as pbar:
         # ---- sequential backend ----
         if backend == 'seq':
-            pbar.set_postfix_str('backend=seq')
-            results = []
             for x in items:
                 results.append(f_wrapped(x, **func_kwargs))
                 pbar.update(1)
@@ -190,19 +259,46 @@ def multi_process(
         # ---- ray backend ----
         if backend == 'ray':
-            pbar.set_postfix_str('backend=ray')
-            ensure_ray(workers, pbar)
+            import ray as _ray_module
-            @ray.remote
-            def _task(x):
-                return f_wrapped(x, **func_kwargs)
-            refs = [_task.remote(x) for x in items]
+            ensure_ray(workers, pbar)
+            shared_refs = {}
+            regular_kwargs = {}
+            if shared_kwargs:
+                for kw in shared_kwargs:
+                    # Put large objects in Ray's object store (zero-copy)
+                    shared_refs[kw] = _ray_module.put(func_kwargs[kw])
+                    pbar.set_postfix_str(f'ray: shared `{kw}` via object store')
+                # Remaining kwargs are regular
+                regular_kwargs = {
+                    k: v for k, v in func_kwargs.items()
+                    if k not in shared_kwargs
+                }
+            else:
+                regular_kwargs = func_kwargs
+            @_ray_module.remote
+            def _task(x, shared_refs_dict, regular_kwargs_dict):
+                # Dereference shared objects (zero-copy for numpy arrays)
+                import ray as _ray_in_task
+                dereferenced = {k: _ray_in_task.get(v) for k, v in shared_refs_dict.items()}
+                # Merge with regular kwargs
+                all_kwargs = {**dereferenced, **regular_kwargs_dict}
+                return f_wrapped(x, **all_kwargs)
+            refs = [
+                _task.remote(x, shared_refs, regular_kwargs) for x in items
+            ]
             results = []
+            t_start = time.time()
             for r in refs:
-                results.append(ray.get(r))
+                results.append(_ray_module.get(r))
                 pbar.update(1)
+            t_end = time.time()
+            print(f"Ray processing took {t_end - t_start:.2f}s for {total} items")
             return results
         # ---- fastcore backend ----

{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: speedy-utils
-Version: 1.1.33
+Version: 1.1.35
 Summary: Fast and easy-to-use package for data science
 Project-URL: Homepage, https://github.com/anhvth/speedy
 Project-URL: Repository, https://github.com/anhvth/speedy

{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/RECORD RENAMED Viewed

@@ -4,15 +4,15 @@ llm_utils/chat_format/__init__.py,sha256=a7BKtBVktgLMq2Do4iNu3YfdDdTG1v9M_BkmaEo
 llm_utils/chat_format/display.py,sha256=Lffjzna9_vV3QgfiXZM2_tuVb3wqA-WxwrmoAjsJigw,17356
 llm_utils/chat_format/transform.py,sha256=PJ2g9KT1GSbWuAs7giEbTpTAffpU9QsIXyRlbfpTZUQ,5351
 llm_utils/chat_format/utils.py,sha256=M2EctZ6NeHXqFYufh26Y3CpSphN0bdZm5xoNaEJj5vg,1251
-llm_utils/lm/__init__.py,sha256=lFE2DZRpj6eRMo11kx7oRLyYOP2FuDmz08mAcq-cYew,730
+llm_utils/lm/__init__.py,sha256=4jYMy3wPH3tg-tHFyWEWOqrnmX4Tu32VZCdzRGMGQsI,778
 llm_utils/lm/base_prompt_builder.py,sha256=_TzYMsWr-SsbA_JNXptUVN56lV5RfgWWTrFi-E8LMy4,12337
-llm_utils/lm/llm.py,sha256=C8Z8l6Ljs7uVX-zabLcDCdTf3fpGxfljaYRM0patHUQ,16469
+llm_utils/lm/llm.py,sha256=yas7Khd0Djc8-GD8jL--B2oPteV9FC3PpfPbr9XCLOQ,16515
 llm_utils/lm/llm_signature.py,sha256=vV8uZgLLd6ZKqWbq0OPywWvXAfl7hrJQnbtBF-VnZRU,1244
 llm_utils/lm/lm_base.py,sha256=Bk3q34KrcCK_bC4Ryxbc3KqkiPL39zuVZaBQ1i6wJqs,9437
-llm_utils/lm/mixins.py,sha256=on83g-JO2SpZ0digOpU8mooqFBX6w7Bc-DeGzVoVCX8,14536
+llm_utils/lm/mixins.py,sha256=o0tZiaKW4u1BxBVlT_0yTwnO8h7KnY02HX5TuWipvr0,16735
 llm_utils/lm/openai_memoize.py,sha256=rYrSFPpgO7adsjK1lVdkJlhqqIw_13TCW7zU8eNwm3o,5185
 llm_utils/lm/signature.py,sha256=K1hvCAqoC5CmsQ0Y_ywnYy2fRb5JzmIK8OS-hjH-5To,9971
-llm_utils/lm/utils.py,sha256=t-RSR-ffOs2HT67JfgqRhbZX9wbxlJNtQMro4ZyuVVM,12461
+llm_utils/lm/utils.py,sha256=dEKFta8S6Mm4LjIctcpFlEGL9RnmLm5DHd2TA70UWuA,12649
 llm_utils/lm/async_lm/__init__.py,sha256=j0xK49ooZ0Dm5GstGGHbmPMrPjd3mOXoJ1H7eAL_Z4g,122
 llm_utils/lm/async_lm/_utils.py,sha256=mB-AueWJJatTx0PXqd_oWc6Kz36cfgDmDTKgiXafCJI,6106
 llm_utils/lm/async_lm/async_llm_task.py,sha256=2PWW4vPW2jYUiGmYFo4-DHrmX5Jm8Iw_1qo6EPL-ytE,18611
@@ -41,16 +41,16 @@ speedy_utils/common/utils_io.py,sha256=w9AxMD_8V3Wyo_0o9OtXjVQS8Z3KhxQiOkrl2p8Np
 speedy_utils/common/utils_misc.py,sha256=ZRJCS7OJxybpVm1sasoeCYRW2TaaGCXj4DySYlQeVR8,2227
 speedy_utils/common/utils_print.py,sha256=AGDB7mgJnO00QkJBH6kJb46738q3GzMUZPwtQ248vQw,4763
 speedy_utils/multi_worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-speedy_utils/multi_worker/process.py,sha256=O3BpGH7iL_2vh_ezwyHb28lvkVADABpTUnhKHbiEe8I,10542
+speedy_utils/multi_worker/process.py,sha256=jk2K3oNnul1jop4g2U7-6GAekJ4fCyXCbj39WWAwXWQ,14925
 speedy_utils/multi_worker/thread.py,sha256=k4Ff4R2W0Ehet1zJ5nHQOfcsvOjnJzU6A2I18qw7_6M,21320
 speedy_utils/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 speedy_utils/scripts/mpython.py,sha256=aZvusJLKa3APVhabbFUAEo873VBm8Fym7HKGmVW4LyE,3843
 speedy_utils/scripts/openapi_client_codegen.py,sha256=GModmmhkvGnxljK4KczyixKDrk-VEcLaW5I0XT6tzWo,9657
 vision_utils/README.md,sha256=AIDZZj8jo_QNrEjFyHwd00iOO431s-js-M2dLtVTn3I,5740
-vision_utils/__init__.py,sha256=XsLxy1Fn33Zxu6hTFl3NEWfxGjuQQ-0Wmoh6lU9NZ_o,257
-vision_utils/io_utils.py,sha256=q41pffN632HbMmzcBzfg2Z7DvZZgoAQCdD9jHLqDgjc,26603
-vision_utils/plot.py,sha256=v73onfH8KbGHigw5KStUPqbLyJqIEOvvJaqtaoGKrls,12032
-speedy_utils-1.1.33.dist-info/METADATA,sha256=QaZU14x_OlpExaRMZp3RnhkxEdRfBlhf0mqhaPTr6x4,8048
-speedy_utils-1.1.33.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-speedy_utils-1.1.33.dist-info/entry_points.txt,sha256=1rrFMfqvaMUE9hvwGiD6vnVh98kmgy0TARBj-v0Lfhs,244
-speedy_utils-1.1.33.dist-info/RECORD,,
+vision_utils/__init__.py,sha256=hF54sT6FAxby8kDVhOvruy4yot8O-Ateey5n96O1pQM,284
+vision_utils/io_utils.py,sha256=pI0Va6miesBysJcllK6NXCay8HpGZsaMWwlsKB2DMgA,26510
+vision_utils/plot.py,sha256=HkNj3osA3moPuupP1VguXfPPOW614dZO5tvC-EFKpKM,12028
+speedy_utils-1.1.35.dist-info/METADATA,sha256=wsz89syaYNXEeGjJXV8zb0W2ZrTjpN2Lj47tE7LQeEI,8048
+speedy_utils-1.1.35.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+speedy_utils-1.1.35.dist-info/entry_points.txt,sha256=1rrFMfqvaMUE9hvwGiD6vnVh98kmgy0TARBj-v0Lfhs,244
+speedy_utils-1.1.35.dist-info/RECORD,,

vision_utils/__init__.py CHANGED Viewed

@@ -1,4 +1,11 @@
-from .io_utils import read_images, read_images_cpu, read_images_gpu, ImageMmap, ImageMmapDynamic
+from .io_utils import (
+    ImageMmap,
+    ImageMmapDynamic,
+    read_images,
+    read_images_cpu,
+    read_images_gpu,
+)
 from .plot import plot_images_notebook
-__all__ = ['plot_images_notebook', 'read_images_cpu', 'read_images_gpu', 'read_images', 'ImageMmap', 'ImageMmapDynamic']
+__all__ = ['plot_images_notebook', 'read_images_cpu', 'read_images_gpu', 'read_images', 'ImageMmap', 'ImageMmapDynamic']

vision_utils/io_utils.py CHANGED Viewed

@@ -3,14 +3,16 @@ from __future__ import annotations
 # type: ignore
 import os
 import time
-from pathlib import Path
-from typing import Sequence, Tuple, TYPE_CHECKING
 from multiprocessing import cpu_count
+from pathlib import Path
+from typing import TYPE_CHECKING, Sequence, Tuple
 import numpy as np
 from PIL import Image
 from speedy_utils import identify
 try:
     from torch.utils.data import Dataset
 except ImportError:
@@ -438,12 +440,11 @@ class ImageMmap(Dataset):
                     if img is None:
                         if self.safe:
                             raise ValueError(f"Failed to load image: {path}")
-                        else:
-                            # Failed to load, write zeros
-                            print(f"Warning: Failed to load {path}, using zeros")
-                            mm[global_idx] = np.zeros(
-                                (self.H, self.W, self.C), dtype=self.dtype
-                            )
+                        # Failed to load, write zeros
+                        print(f"Warning: Failed to load {path}, using zeros")
+                        mm[global_idx] = np.zeros(
+                            (self.H, self.W, self.C), dtype=self.dtype
+                        )
                     else:
                         # Clip to valid range and ensure correct dtype
                         if self.dtype == np.uint8:
@@ -625,9 +626,10 @@ class ImageMmapDynamic(Dataset):
           - data file: concatenated flattened images in path order
           - meta: JSON with offsets, shapes, dtype, total_elems, paths, n
         """
-        from tqdm import tqdm
         import json
+        from tqdm import tqdm
         print(f"Building dynamic mmap cache for {self.n} images...")
         # We don't know total size up front -> write sequentially
         offsets = np.zeros(self.n, dtype=np.int64)
@@ -660,11 +662,10 @@ class ImageMmapDynamic(Dataset):
                     if img is None:
                         if self.safe:
                             raise ValueError(f"Failed to load image: {path}")
-                        else:
-                            print(
-                                f"Warning: Failed to load {path}, storing 1x1x3 zeros"
-                            )
-                            img = np.zeros((1, 1, 3), dtype=self.dtype)
+                        print(
+                            f"Warning: Failed to load {path}, storing 1x1x3 zeros"
+                        )
+                        img = np.zeros((1, 1, 3), dtype=self.dtype)
                     # Clip to valid range for uint8
                     if self.dtype == np.uint8:

vision_utils/plot.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 if TYPE_CHECKING:
@@ -311,7 +311,7 @@ def visualize_tensor(img_tensor, mode='hwc', normalize=True, max_cols=8):
     mpl_available, plt = _check_matplotlib_available()
     if not mpl_available:
         raise ImportError("matplotlib is required for plotting. Install it with: pip install matplotlib")
     if mode == 'chw':
         img_tensor = img_tensor.permute(1, 2, 0)
         imgs = [img_tensor]

{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/WHEEL RENAMED Viewed

File without changes

{speedy_utils-1.1.33.dist-info → speedy_utils-1.1.35.dist-info}/entry_points.txt RENAMED Viewed

File without changes

speedy-utils 1.1.33__py3-none-any.whl → 1.1.35__py3-none-any.whl

speedy-utils 1.1.33py3-none-any.whl → 1.1.35py3-none-any.whl