PyPI - speedy-utils - Versions diffs - 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl - Mend

speedy-utils 1.0.4py3-none-any.whl → 1.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

llm_utils/__init__.py +29 -0
llm_utils/chat_format.py +427 -0
llm_utils/group_messages.py +120 -0
llm_utils/lm/__init__.py +8 -0
llm_utils/lm/base_lm.py +304 -0
llm_utils/lm/utils.py +130 -0
llm_utils/scripts/vllm_load_balancer.py +353 -0
llm_utils/scripts/vllm_serve.py +416 -0
speedy_utils/__init__.py +85 -0
speedy_utils/all.py +159 -0
{speedy → speedy_utils}/common/__init__.py +0 -0
speedy_utils/common/clock.py +215 -0
speedy_utils/common/function_decorator.py +66 -0
speedy_utils/common/logger.py +207 -0
speedy_utils/common/report_manager.py +112 -0
speedy_utils/common/utils_cache.py +264 -0
{speedy → speedy_utils}/common/utils_io.py +66 -19
{speedy → speedy_utils}/common/utils_misc.py +25 -11
speedy_utils/common/utils_print.py +216 -0
speedy_utils/multi_worker/__init__.py +0 -0
speedy_utils/multi_worker/process.py +198 -0
speedy_utils/multi_worker/thread.py +327 -0
speedy_utils/scripts/mpython.py +108 -0
speedy_utils-1.0.5.dist-info/METADATA +279 -0
speedy_utils-1.0.5.dist-info/RECORD +27 -0
{speedy_utils-1.0.4.dist-info → speedy_utils-1.0.5.dist-info}/WHEEL +1 -2
speedy_utils-1.0.5.dist-info/entry_points.txt +3 -0
speedy/__init__.py +0 -53
speedy/common/clock.py +0 -68
speedy/common/utils_cache.py +0 -170
speedy/common/utils_print.py +0 -138
speedy/multi_worker.py +0 -121
speedy_utils-1.0.4.dist-info/METADATA +0 -22
speedy_utils-1.0.4.dist-info/RECORD +0 -12
speedy_utils-1.0.4.dist-info/top_level.txt +0 -1

speedy_utils/multi_worker/process.py ADDED Viewed

@@ -0,0 +1,198 @@
+import inspect
+import multiprocessing
+import os
+import time
+import traceback
+from collections.abc import Callable, Iterable, Iterator, Sequence
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from itertools import islice
+from typing import Any, List, TypeVar, cast
+T = TypeVar("T")
+if hasattr(multiprocessing, "set_start_method"):
+    try:
+        multiprocessing.set_start_method("spawn", force=True)
+    except RuntimeError:
+        pass
+try:
+    from tqdm import tqdm
+except ImportError:  # pragma: no cover
+    tqdm = None  # type: ignore[assignment]
+# ──── internal helpers ────────────────────────────────────────────────────
+def _group_iter(src: Iterable[Any], size: int) -> Iterable[list[Any]]:
+    "Yield *size*-sized chunks from *src*."
+    it = iter(src)
+    while chunk := list(islice(it, size)):
+        yield chunk
+def _short_tb() -> str:
+    tb = "".join(traceback.format_exc())
+    return "\n".join(ln for ln in tb.splitlines() if "multi_process" not in ln)
+def _safe_call(func: Callable, obj, fixed):
+    try:
+        return func(obj, **fixed)
+    except Exception as exc:
+        func_name = getattr(func, "__name__", str(func))
+        raise RuntimeError(
+            f"{func_name}({obj!r}) failed: {exc}\n{_short_tb()}"
+        ) from exc
+def _worker_process(
+    func: Callable, item_batch: Any, fixed_kwargs: dict, batch_size: int
+):
+    """Worker function executed in each process."""
+    if batch_size > 1:
+        results = []
+        for itm in item_batch:
+            try:
+                results.append(_safe_call(func, itm, fixed_kwargs))
+            except Exception:
+                results.append(None)
+        return results
+    return _safe_call(func, item_batch, fixed_kwargs)
+# ──── public API ──────────────────────────────────────────────────────────
+def multi_process(
+    func: Callable[[Any], Any],
+    inputs: Iterable[Any],
+    *,
+    workers: int | None = None,
+    batch: int = 1,
+    ordered: bool = True,
+    progress: bool = False,
+    inflight: int | None = None,
+    timeout: float | None = None,
+    stop_on_error: bool = True,
+    process_update_interval=10,
+    **fixed_kwargs,
+) -> list[Any]:
+    """
+    Simple multi‑processing parallel map that returns a *list*.
+    Parameters
+    ----------
+    func          – target callable executed in separate processes, must be of the form f(obj, ...).
+    inputs        – iterable with the objects.
+    workers       – process pool size (defaults to :pyfunc:`os.cpu_count()`).
+    batch         – package *batch* inputs into one call to reduce IPC overhead.
+    ordered       – keep original order; if ``False`` results stream as finished.
+    progress      – show a tqdm bar (requires *tqdm*).
+    inflight      – max logical items concurrently submitted
+                    *(default: ``workers × 4``)*.
+    timeout       – overall timeout for the mapping (seconds).
+    stop_on_error – raise immediately on first exception (default) or
+                    substitute failing result with ``None``.
+    **fixed_kwargs – static keyword args forwarded to every ``func()`` call.
+    """
+    if workers is None:
+        workers = os.cpu_count() or 1
+    if inflight is None:
+        inflight = workers * 4
+    if batch < 1:
+        raise ValueError("batch must be ≥ 1")
+    try:
+        n_inputs = len(inputs)  # type: ignore[arg-type]
+    except Exception:
+        n_inputs = None
+    src_iter: Iterator[Any] = iter(inputs)
+    if batch > 1:
+        src_iter = cast(Iterator[Any], _group_iter(src_iter, batch))
+    logical_total = n_inputs
+    bar = None
+    last_bar = 0
+    if progress and tqdm is not None and logical_total is not None:
+        bar = tqdm(
+            total=logical_total,
+            ncols=80,
+            colour="green",
+            bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"
+            " [{elapsed}<{remaining}, {rate_fmt}{postfix}]",
+        )
+    if ordered and logical_total is not None:
+        results: list[Any] = [None] * logical_total
+    else:
+        results = []
+    completed = 0
+    next_idx = 0
+    with ProcessPoolExecutor(max_workers=workers) as pool:
+        futures = set()
+        for _ in range(min(inflight, workers)):
+            try:
+                arg = next(src_iter)
+            except StopIteration:
+                break
+            fut = pool.submit(_worker_process, func, arg, fixed_kwargs, batch)
+            fut.idx = next_idx  # type: ignore[attr-defined]
+            futures.add(fut)
+            next_idx += len(arg) if batch > 1 else 1
+        while futures:
+            for fut in as_completed(futures, timeout=timeout):
+                futures.remove(fut)
+                idx = fut.idx  # type: ignore[attr-defined]
+                try:
+                    res = fut.result()
+                except Exception:
+                    if stop_on_error:
+                        raise
+                    num_items = batch if batch > 1 else 1
+                    res = [None] * num_items if batch > 1 else None
+                out_items = res if batch > 1 else [res]
+                if out_items is None:
+                    out_items = []
+                if ordered and logical_total is not None:
+                    if isinstance(out_items, list) and len(out_items) > 0:
+                        for i, item in enumerate(out_items):
+                            if idx + i < len(results):
+                                results[idx + i] = item
+                else:
+                    if isinstance(out_items, list):
+                        results.extend(out_items)
+                completed += len(out_items)
+                if bar and completed - last_bar >= process_update_interval:
+                    bar.update(completed - last_bar)
+                    last_bar = completed
+                try:
+                    while next_idx - completed < inflight:
+                        arg = next(src_iter)
+                        fut2 = pool.submit(
+                            _worker_process, func, arg, fixed_kwargs, batch
+                        )
+                        fut2.idx = next_idx  # type: ignore[attr-defined]
+                        futures.add(fut2)
+                        next_idx += len(arg) if batch > 1 else 1
+                except StopIteration:
+                    pass
+                break
+    if bar:
+        bar.update(completed - last_bar)
+        bar.close()
+    return results
+__all__ = ["multi_process"]

speedy_utils/multi_worker/thread.py ADDED Viewed

@@ -0,0 +1,327 @@
+"""Provides thread-based parallel execution utilities."""
+import os
+import time
+import traceback
+from collections.abc import Callable, Iterable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from itertools import islice
+from typing import Any, TypeVar
+from loguru import logger
+try:
+    from tqdm import tqdm
+except ImportError:  # pragma: no cover
+    tqdm = None  # type: ignore[assignment]
+# Sensible defaults
+DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
+T = TypeVar("T")
+R = TypeVar("R")
+def _group_iter(src: Iterable[T], size: int) -> Iterable[list[T]]:
+    """Yield successive chunks from iterable of specified size."""
+    it = iter(src)
+    while chunk := list(islice(it, size)):
+        yield chunk
+def _short_tb() -> str:
+    """Return a shortened traceback, excluding internal frames."""
+    tb = "".join(traceback.format_exc())
+    # hide frames inside this helper to keep logs short
+    return "\n".join(ln for ln in tb.splitlines() if "multi_thread.py" not in ln)
+def _worker(item: T, func: Callable[[T], R], fixed_kwargs: dict[str, Any]) -> R:
+    """Execute the function with an item and fixed kwargs."""
+    return func(item, **fixed_kwargs)
+# ────────────────────────────────────────────────────────────
+# main API
+# ────────────────────────────────────────────────────────────
+def multi_thread(
+    func: Callable,
+    inputs: Iterable[Any],
+    *,
+    workers: int | None = DEFAULT_WORKERS,
+    batch: int = 1,
+    ordered: bool = True,
+    progress: bool = True,
+    progress_update: int = 10,
+    prefetch_factor: int = 4,
+    timeout: float | None = None,
+    stop_on_error: bool = True,
+    n_proc=0,
+    store_output_pkl_file: str | None = None,
+    **fixed_kwargs,
+) -> list[Any]:
+    """
+    ThreadPool **map** that returns a *list*.
+    Parameters
+    ----------
+    func            – target callable.
+    inputs          – iterable with the arguments.
+    workers         – defaults to ``os.cpu_count()*2``.
+    batch           – package *batch* inputs into one call for low‑overhead.
+    ordered         – keep original order (costs memory); if ``False`` results
+                      are yielded as soon as they finish.
+    progress        – show a tqdm bar (requires *tqdm* installed).
+    progress_update – bar redraw frequency (logical items, *not* batches).
+    prefetch_factor – in‑flight tasks ≈ ``workers * prefetch_factor``.
+    timeout         – overall timeout (seconds) for the mapping.
+    stop_on_error   – raise immediately on first exception (default).  If
+                      ``False`` the failing task’s result becomes ``None``.
+    **fixed_kwargs  – static keyword args forwarded to every ``func()`` call.
+    """
+    from speedy_utils import dump_json_or_pickle, load_by_ext
+    if n_proc > 1:
+        import tempfile
+        from fastcore.all import threaded
+        # split the inputs by nproc
+        inputs = list(inputs)
+        n_per_proc = max(len(inputs) // n_proc, 1)
+        proc_inputs_list = []
+        for i in range(0, len(inputs), n_per_proc):
+            proc_inputs_list.append(inputs[i : i + n_per_proc])
+        procs = []
+        in_process_multi_thread = threaded(process=True)(multi_thread)
+        for proc_id, proc_inputs in enumerate(proc_inputs_list):
+            with tempfile.NamedTemporaryFile(
+                delete=False, suffix="multi_thread.pkl"
+            ) as tmp_file:
+                file_pkl = tmp_file.name
+            assert isinstance(in_process_multi_thread, Callable)
+            proc = in_process_multi_thread(
+                func,
+                proc_inputs,
+                workers=workers,
+                batch=batch,
+                ordered=ordered,
+                progress=proc_id == 0,
+                progress_update=progress_update,
+                prefetch_factor=prefetch_factor,
+                timeout=timeout,
+                stop_on_error=stop_on_error,
+                n_proc=0,  # prevent recursion
+                store_output_pkl_file=file_pkl,
+                **fixed_kwargs,
+            )
+            procs.append([proc, file_pkl])
+        # join
+        results = []
+        for proc, file_pkl in procs:
+            proc.join()
+            logger.info(f"Done proc {proc=}")
+            results.extend(load_by_ext(file_pkl))
+        return results
+    try:
+        import pandas as pd
+        if isinstance(inputs, pd.DataFrame):
+            inputs = inputs.to_dict(orient="records")
+    except ImportError:
+        pass
+    try:
+        n_inputs = len(inputs)  # type: ignore[arg-type]
+    except Exception:
+        n_inputs = None
+    workers_val = workers if workers is not None else DEFAULT_WORKERS
+    if batch == 1 and n_inputs and n_inputs / max(workers_val, 1) > 20_000:
+        batch = 32  # empirically good for sub‑ms tasks
+    # ── build (maybe‑batched) source iterator ────────────────────────────
+    src_iter: Iterable[Any] = iter(inputs)
+    if batch > 1:
+        src_iter = _group_iter(src_iter, batch)
+    # Ensure src_iter is always an iterator
+    src_iter = iter(src_iter)
+    # total logical items (for bar & ordered pre‑allocation)
+    logical_total = n_inputs
+    if logical_total is not None and batch > 1:
+        logical_total = n_inputs  # still number of *items*, not batches
+    # ── progress bar ─────────────────────────────────────────────────────
+    bar = None
+    last_bar_update = 0
+    if progress and tqdm is not None and logical_total is not None:
+        bar = tqdm(
+            total=logical_total,
+            ncols=128,
+            colour="green",
+            bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}"
+            " [{elapsed}<{remaining}, {rate_fmt}{postfix}]",
+        )
+    # ── prepare result container ─────────────────────────────────────────
+    if ordered and logical_total is not None:
+        results: list[Any] = [None] * logical_total
+    else:
+        results = []
+    # ── main execution loop ──────────────────────────────────────────────────
+    workers_val = workers if workers is not None else DEFAULT_WORKERS
+    max_inflight = workers_val * max(prefetch_factor, 1)
+    completed_items = 0
+    next_logical_idx = 0  # index assigned to the next submission
+    with ThreadPoolExecutor(max_workers=workers) as pool:
+        inflight = set()
+        # prime the pool
+        for _ in range(max_inflight):
+            try:
+                arg = next(src_iter)
+            except StopIteration:
+                break
+            if batch > 1:
+                fut = pool.submit(
+                    lambda items: [_worker(item, func, fixed_kwargs) for item in items],
+                    arg,
+                )
+                fut.idx = next_logical_idx  # type: ignore[attr-defined]
+                inflight.add(fut)
+                next_logical_idx += len(arg)
+            else:
+                fut = pool.submit(_worker, arg, func, fixed_kwargs)
+                fut.idx = next_logical_idx  # type: ignore[attr-defined]
+                inflight.add(fut)
+                next_logical_idx += 1
+        try:
+            # Process futures as they complete and add new ones to keep the pool busy
+            while inflight:  # Continue until all in-flight tasks are processed
+                for fut in as_completed(inflight, timeout=timeout):
+                    inflight.remove(fut)
+                    idx = fut.idx  # type: ignore[attr-defined]
+                    try:
+                        res = fut.result()
+                    except Exception:
+                        if stop_on_error:
+                            raise
+                        res = None
+                    # flatten res to list of logical outputs
+                    out_items = res if batch > 1 else [res]
+                    # Ensure out_items is a list (and thus Sized)
+                    if out_items is None:
+                        out_items = [None]
+                    elif not isinstance(out_items, list):
+                        out_items = (
+                            list(out_items)
+                            if isinstance(out_items, Iterable)
+                            else [out_items]
+                        )
+                    # store outputs
+                    if ordered and logical_total is not None:
+                        results[idx : idx + len(out_items)] = out_items
+                    else:
+                        results.extend(out_items)
+                    completed_items += len(out_items)
+                    # progress bar update
+                    if bar and completed_items - last_bar_update >= progress_update:
+                        bar.update(completed_items - last_bar_update)
+                        last_bar_update = completed_items
+                        # Show pending, submitted, processing in the bar postfix
+                        submitted = next_logical_idx
+                        processing = min(len(inflight), workers_val)
+                        pending = (
+                            (logical_total - submitted)
+                            if logical_total is not None
+                            else None
+                        )
+                        postfix = {
+                            "pending": pending if pending is not None else "-",
+                            # 'submitted': submitted,
+                            "processing": processing,
+                        }
+                        bar.set_postfix(postfix)
+                    # keep queue full
+                    try:
+                        while next_logical_idx - completed_items < max_inflight:
+                            arg = next(src_iter)
+                            if batch > 1:
+                                fut2 = pool.submit(
+                                    lambda items: [
+                                        _worker(item, func, fixed_kwargs)
+                                        for item in items
+                                    ],
+                                    arg,
+                                )
+                                fut2.idx = next_logical_idx  # type: ignore[attr-defined]
+                                inflight.add(fut2)
+                                next_logical_idx += len(arg)
+                            else:
+                                fut2 = pool.submit(_worker, arg, func, fixed_kwargs)
+                                fut2.idx = next_logical_idx  # type: ignore[attr-defined]
+                                inflight.add(fut2)
+                                next_logical_idx += 1
+                    except StopIteration:
+                        pass
+                    # Break the inner loop as we've processed one future
+                    break
+                # If we've exhausted the inner loop without processing anything,
+                # and there are still in-flight tasks, we need to wait for them
+                if inflight and timeout is not None:
+                    # Use a small timeout to avoid hanging indefinitely
+                    time.sleep(min(0.01, timeout / 10))
+        finally:
+            if bar:
+                bar.update(completed_items - last_bar_update)
+                bar.close()
+    if store_output_pkl_file:
+        dump_json_or_pickle(results, store_output_pkl_file)
+    return results
+def multi_thread_standard(
+    fn: Callable[[Any], Any], items: Iterable[Any], workers: int = 4
+) -> list[Any]:
+    """Execute a function using standard ThreadPoolExecutor.
+    A standard implementation of multi-threading using ThreadPoolExecutor.
+    Ensures the order of results matches the input order.
+    Parameters
+    ----------
+    fn : Callable
+        The function to execute for each item.
+    items : Iterable
+        The items to process.
+    workers : int, optional
+        Number of worker threads, by default 4.
+    Returns
+    -------
+    list
+        Results in same order as input items.
+    """
+    with ThreadPoolExecutor(max_workers=workers) as executor:
+        futures = [executor.submit(fn, item) for item in items]
+        results = [fut.result() for fut in futures]
+    return results
+__all__ = ["multi_thread", "multi_thread_standard"]

speedy_utils/scripts/mpython.py ADDED Viewed

@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+import argparse
+import itertools
+import multiprocessing  # Import multiprocessing module
+import os
+import shlex  # To properly escape command line arguments
+import shutil
+import subprocess
+taskset_path = shutil.which("taskset")
+def assert_script(python_path):
+    code_str = open(python_path).read()
+    if "MP_ID" not in code_str or "MP_TOTAL" not in code_str:
+        example_code = (
+            'import os; MP_TOTAL = int(os.environ.get("MP_TOTAL"));MP_ID = int(os.environ.get("MP_ID"))\n'
+            "inputs = list(inputs[MP_ID::MP_TOTAL])"
+        )
+        # ANSI escape codes for coloring
+        YELLOW = "\033[93m"
+        RESET = "\033[0m"
+        raise_msg = (
+            f"MP_ID and MP_TOTAL not found in {python_path}, please add them.\n\n"
+            f"Example:\n{YELLOW}{example_code}{RESET}"
+        )
+        raise Exception(raise_msg)
+def run_in_tmux(commands_to_run, tmux_name, num_windows):
+    with open("/tmp/start_multirun_tmux.sh", "w") as script_file:
+        # first cmd is to kill the session if it exists
+        script_file.write("#!/bin/bash\n\n")
+        script_file.write(f"tmux kill-session -t {tmux_name}\nsleep .1\n")
+        script_file.write(f"tmux new-session -d -s {tmux_name}\n")
+        for i, cmd in enumerate(itertools.cycle(commands_to_run)):
+            if i >= num_windows:
+                break
+            window_name = f"{tmux_name}:{i}"
+            if i == 0:
+                script_file.write(f"tmux send-keys -t {window_name} '{cmd}' C-m\n")
+            else:
+                script_file.write(f"tmux new-window -t {tmux_name}\n")
+                script_file.write(f"tmux send-keys -t {window_name} '{cmd}' C-m\n")
+        # Make the script executable
+        script_file.write("chmod +x /tmp/start_multirun_tmux.sh\n")
+        print("Run /tmp/start_multirun_tmux.sh")
+def main():
+    # Assert that MP_ID and MP_TOTAL are not already set
+    parser = argparse.ArgumentParser(description="Process fold arguments")
+    parser.add_argument(
+        "--total_fold", "-t", default=16, type=int, help="total number of folds"
+    )
+    parser.add_argument("--gpus", type=str, default="0,1,2,3,4,5,6,7")
+    parser.add_argument("--ignore_gpus", "-ig", type=str, default="")
+    parser.add_argument(
+        "--total_cpu",
+        type=int,
+        default=multiprocessing.cpu_count(),
+        help="total number of cpu cores available",
+    )
+    parser.add_argument(
+        "cmd", nargs=argparse.REMAINDER
+    )  # This will gather the remaining unparsed arguments
+    args = parser.parse_args()
+    if not args.cmd or (args.cmd[0] == "--" and len(args.cmd) == 1):
+        parser.error("Invalid command provided")
+    assert_script(args.cmd[0])
+    cmd_str = None
+    if args.cmd[0] == "--":
+        cmd_str = shlex.join(args.cmd[1:])
+    else:
+        cmd_str = shlex.join(args.cmd)
+    gpus = args.gpus.split(",")
+    gpus = [gpu for gpu in gpus if not gpu in args.ignore_gpus.split(",")]
+    num_gpus = len(gpus)
+    cpu_per_process = max(args.total_cpu // args.total_fold, 1)
+    cmds = []
+    for i in range(args.total_fold):
+        gpu = gpus[i % num_gpus]
+        cpu_start = (i * cpu_per_process) % args.total_cpu
+        cpu_end = ((i + 1) * cpu_per_process - 1) % args.total_cpu
+        ENV = f"CUDA_VISIBLE_DEVICES={gpu} MP_ID={i} MP_TOTAL={args.total_fold}"
+        if taskset_path:
+            fold_cmd = (
+                f"{ENV} {taskset_path} -c {cpu_start}-{cpu_end}  python {cmd_str}"
+            )
+        else:
+            fold_cmd = f"{ENV} python {cmd_str}"
+        cmds.append(fold_cmd)
+    run_in_tmux(cmds, "mpython", args.total_fold)
+    os.chmod("/tmp/start_multirun_tmux.sh", 0o755)  # Make the script executable
+    os.system("/tmp/start_multirun_tmux.sh")
+if __name__ == "__main__":
+    main()

speedy-utils 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

speedy-utils 1.0.4py3-none-any.whl → 1.0.5py3-none-any.whl