PyPI - speedy-utils - Versions diffs - 1.1.40__py3-none-any.whl → 1.1.43__py3-none-any.whl - Mend

speedy-utils 1.1.40py3-none-any.whl → 1.1.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

llm_utils/__init__.py +2 -0
llm_utils/llm_ray.py +370 -0
llm_utils/lm/llm.py +36 -29
speedy_utils/__init__.py +10 -0
speedy_utils/common/utils_io.py +3 -1
speedy_utils/multi_worker/__init__.py +12 -0
speedy_utils/multi_worker/dataset_ray.py +303 -0
speedy_utils/multi_worker/parallel_gpu_pool.py +178 -0
speedy_utils/multi_worker/process.py +989 -86
speedy_utils/multi_worker/progress.py +140 -0
speedy_utils/multi_worker/thread.py +202 -42
speedy_utils/scripts/mpython.py +49 -4
{speedy_utils-1.1.40.dist-info → speedy_utils-1.1.43.dist-info}/METADATA +5 -3
{speedy_utils-1.1.40.dist-info → speedy_utils-1.1.43.dist-info}/RECORD +16 -12
{speedy_utils-1.1.40.dist-info → speedy_utils-1.1.43.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.40.dist-info → speedy_utils-1.1.43.dist-info}/entry_points.txt +0 -0

speedy_utils/multi_worker/progress.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""
+Real-time progress tracking for distributed Ray tasks.
+This module provides a ProgressActor that allows workers to report item-level
+progress in real-time, giving users visibility into actual items processed
+rather than just task completion.
+"""
+import time
+import threading
+from typing import Optional, Callable
+__all__ = ['ProgressActor', 'create_progress_tracker', 'get_ray_progress_actor']
+def get_ray_progress_actor():
+    """Get the Ray-decorated ProgressActor class (lazy import to avoid Ray at module load)."""
+    import ray
+    @ray.remote
+    class ProgressActor:
+        """
+        A Ray actor for tracking real-time progress across distributed workers.
+        Workers call `update(n)` to report items processed, and the main process
+        can poll `get_progress()` to update a tqdm bar in real-time.
+        """
+        def __init__(self, total: int, desc: str = "Items"):
+            self.total = total
+            self.processed = 0
+            self.desc = desc
+            self.start_time = time.time()
+            self._lock = threading.Lock()
+        def update(self, n: int = 1) -> int:
+            """Increment processed count by n. Returns new total."""
+            with self._lock:
+                self.processed += n
+                return self.processed
+        def get_progress(self) -> dict:
+            """Get current progress stats."""
+            with self._lock:
+                elapsed = time.time() - self.start_time
+                rate = self.processed / elapsed if elapsed > 0 else 0
+                return {
+                    "processed": self.processed,
+                    "total": self.total,
+                    "elapsed": elapsed,
+                    "rate": rate,
+                    "desc": self.desc,
+                }
+        def set_total(self, total: int):
+            """Update total (useful if exact count unknown at start)."""
+            with self._lock:
+                self.total = total
+        def reset(self):
+            """Reset progress counter."""
+            with self._lock:
+                self.processed = 0
+                self.start_time = time.time()
+    return ProgressActor
+def create_progress_tracker(total: int, desc: str = "Items"):
+    """
+    Create a progress tracker actor for use with Ray distributed tasks.
+    Args:
+        total: Total number of items to process
+        desc: Description for the progress bar
+    Returns:
+        A Ray actor handle that workers can use to report progress
+    Example:
+        progress_actor = create_progress_tracker(1000000, "Processing items")
+        @ray.remote
+        def worker(items, progress_actor):
+            for item in items:
+                process(item)
+                ray.get(progress_actor.update.remote(1))
+        # In main process, poll progress:
+        while not done:
+            stats = ray.get(progress_actor.get_progress.remote())
+            pbar.n = stats["processed"]
+            pbar.refresh()
+    """
+    import ray
+    ProgressActor = get_ray_progress_actor()
+    return ProgressActor.remote(total, desc)
+class ProgressPoller:
+    """
+    Background thread that polls a Ray progress actor and updates a tqdm bar.
+    """
+    def __init__(self, progress_actor, pbar, poll_interval: float = 0.5):
+        import ray
+        self._ray = ray
+        self.progress_actor = progress_actor
+        self.pbar = pbar
+        self.poll_interval = poll_interval
+        self._stop_event = threading.Event()
+        self._thread: Optional[threading.Thread] = None
+    def start(self):
+        """Start the polling thread."""
+        self._thread = threading.Thread(target=self._poll_loop, daemon=True)
+        self._thread.start()
+    def stop(self):
+        """Stop the polling thread."""
+        self._stop_event.set()
+        if self._thread:
+            self._thread.join(timeout=2.0)
+    def _poll_loop(self):
+        """Poll the progress actor and update tqdm."""
+        while not self._stop_event.is_set():
+            try:
+                stats = self._ray.get(self.progress_actor.get_progress.remote())
+                self.pbar.n = stats["processed"]
+                self.pbar.set_postfix_str(f'{stats["rate"]:.1f} items/s')
+                self.pbar.refresh()
+            except Exception:
+                pass  # Ignore errors during polling
+            self._stop_event.wait(self.poll_interval)
+        # Final update
+        try:
+            stats = self._ray.get(self.progress_actor.get_progress.remote())
+            self.pbar.n = stats["processed"]
+            self.pbar.refresh()
+        except Exception:
+            pass

speedy_utils/multi_worker/thread.py CHANGED Viewed

@@ -1,4 +1,7 @@
 from ..__imports import *
+import linecache
+from .process import ErrorStats, ErrorHandlerType
 try:
@@ -6,6 +9,17 @@ try:
 except ImportError:  # pragma: no cover
     tqdm = None  # type: ignore[assignment]
+try:
+    from rich.console import Console
+    from rich.panel import Panel
+    from rich.syntax import Syntax
+    from rich.text import Text
+except ImportError:  # pragma: no cover
+    Console = None  # type: ignore[assignment, misc]
+    Panel = None  # type: ignore[assignment, misc]
+    Syntax = None  # type: ignore[assignment, misc]
+    Text = None  # type: ignore[assignment, misc]
 # Sensible defaults
 DEFAULT_WORKERS = (os.cpu_count() or 4) * 2
@@ -25,11 +39,13 @@ class UserFunctionError(Exception):
         func_name: str,
         input_value: Any,
         user_traceback: list[traceback.FrameSummary],
+        caller_frame: traceback.FrameSummary | None = None,
     ) -> None:
         self.original_exception = original_exception
         self.func_name = func_name
         self.input_value = input_value
         self.user_traceback = user_traceback
+        self.caller_frame = caller_frame
         # Create a focused error message
         tb_str = ''.join(traceback.format_list(user_traceback))
@@ -44,6 +60,95 @@ class UserFunctionError(Exception):
         # Return focused error without infrastructure frames
         return super().__str__()
+    def format_rich(self) -> None:
+        """Format and print error with rich panels and code context."""
+        if Console is None or Panel is None or Text is None:
+            # Fallback to plain text
+            print(str(self), file=sys.stderr)
+            return
+        console = Console(stderr=True, force_terminal=True)
+        # Build traceback display with code context
+        tb_parts: list[str] = []
+        # Show caller frame first if available
+        if self.caller_frame and self.caller_frame.lineno is not None:
+            tb_parts.append(
+                f'[cyan]{self.caller_frame.filename}[/cyan]:[yellow]{self.caller_frame.lineno}[/yellow] '
+                f'in [green]{self.caller_frame.name}[/green]'
+            )
+            tb_parts.append('')
+            context = _get_code_context_rich(self.caller_frame.filename, self.caller_frame.lineno, 3)
+            tb_parts.extend(context)
+            tb_parts.append('')
+        # Show user code frames with context
+        for frame in self.user_traceback:
+            if frame.lineno is not None:
+                tb_parts.append(
+                    f'[cyan]{frame.filename}[/cyan]:[yellow]{frame.lineno}[/yellow] '
+                    f'in [green]{frame.name}[/green]'
+                )
+                tb_parts.append('')
+                context = _get_code_context_rich(frame.filename, frame.lineno, 3)
+                tb_parts.extend(context)
+                tb_parts.append('')
+        # Print with rich Panel
+        console.print()
+        console.print(
+            Panel(
+                '\n'.join(tb_parts),
+                title='[bold red]Traceback (most recent call last)[/bold red]',
+                border_style='red',
+                expand=False,
+            )
+        )
+        console.print(
+            f'[bold red]{type(self.original_exception).__name__}[/bold red]: '
+            f'{self.original_exception}'
+        )
+        console.print()
+def _get_code_context(filename: str, lineno: int, context_lines: int = 3) -> list[str]:
+    """Get code context around a line with line numbers and highlighting."""
+    lines: list[str] = []
+    start = max(1, lineno - context_lines)
+    end = lineno + context_lines
+    for i in range(start, end + 1):
+        line = linecache.getline(filename, i)
+        if not line:
+            continue
+        line = line.rstrip()
+        marker = '❱' if i == lineno else ' '
+        lines.append(f'  {i:4d} {marker} {line}')
+    return lines
+def _get_code_context_rich(filename: str, lineno: int, context_lines: int = 3) -> list[str]:
+    """Get code context with rich formatting (colors)."""
+    lines: list[str] = []
+    start = max(1, lineno - context_lines)
+    end = lineno + context_lines
+    for i in range(start, end + 1):
+        line = linecache.getline(filename, i)
+        if not line:
+            continue
+        line = line.rstrip()
+        num_str = f'{i:4d}'
+        if i == lineno:
+            # Highlight error line
+            lines.append(f'[dim]{num_str}[/dim] [red]❱[/red] {line}')
+        else:
+            # Normal context line
+            lines.append(f'[dim]{num_str} │[/dim] {line}')
+    return lines
 _PY_SET_ASYNC_EXC = ctypes.pythonapi.PyThreadState_SetAsyncExc
 try:
@@ -90,6 +195,7 @@ def _worker(
     item: T,
     func: Callable[[T], R],
     fixed_kwargs: Mapping[str, Any],
+    caller_frame: traceback.FrameSummary | None = None,
 ) -> R:
     """Execute the function with an item and fixed kwargs."""
     # Validate func is callable before attempting to call it
@@ -102,7 +208,7 @@ def _worker(
         )
     try:
-        return func(item, **fixed_kwargs)
+        return func(item)
     except Exception as exc:
         # Extract user code traceback (filter out infrastructure)
         exc_tb = sys.exc_info()[2]
@@ -114,8 +220,11 @@ def _worker(
             user_frames = []
             skip_patterns = [
                 'multi_worker/thread.py',
+                'multi_worker/process.py',
                 'concurrent/futures/',
                 'threading.py',
+                'multiprocessing/',
+                'site-packages/ray/',
             ]
             for frame in tb_list:
@@ -130,6 +239,7 @@ def _worker(
                     func_name,
                     item,
                     user_frames,
+                    caller_frame,
                 ) from exc
         # Fallback: re-raise original if we couldn't extract frames
@@ -140,8 +250,9 @@ def _run_batch(
     items: Sequence[T],
     func: Callable[[T], R],
     fixed_kwargs: Mapping[str, Any],
+    caller_frame: traceback.FrameSummary | None = None,
 ) -> list[R]:
-    return [_worker(item, func, fixed_kwargs) for item in items]
+    return [_worker(item, func, fixed_kwargs, caller_frame) for item in items]
 def _attach_metadata(fut: Future[Any], idx: int, logical_size: int) -> None:
@@ -242,7 +353,9 @@ def multi_thread(
     progress_update: int = 10,
     prefetch_factor: int = 4,
     timeout: float | None = None,
-    stop_on_error: bool = True,
+    stop_on_error: bool | None = None,
+    error_handler: ErrorHandlerType = 'raise',
+    max_error_files: int = 100,
     n_proc: int = 0,
     store_output_pkl_file: str | None = None,
     **fixed_kwargs: Any,
@@ -272,8 +385,16 @@ def multi_thread(
         Multiplier controlling in-flight items (``workers * prefetch_factor``).
     timeout : float | None, optional
         Overall wall-clock timeout in seconds.
-    stop_on_error : bool, optional
-        Abort immediately on the first exception when ``True``.
+    stop_on_error : bool | None, optional
+        Deprecated. Use error_handler instead.
+        When True -> error_handler='raise', when False -> error_handler='log'.
+    error_handler : 'raise' | 'ignore' | 'log', optional
+        - 'raise': raise exception on first error (default)
+        - 'ignore': continue, return None for failed items
+        - 'log': same as ignore, but logs errors to files
+    max_error_files : int, optional
+        Maximum number of error log files to write (default: 100).
+        Error logs are written to .cache/speedy_utils/error_logs/{idx}.log
     n_proc : int, optional
         Optional process-level fan-out; ``>1`` shards work across processes.
     store_output_pkl_file : str | None, optional
@@ -285,10 +406,20 @@ def multi_thread(
     -------
     list[R | None]
         Collected results, preserving order when requested. Failed tasks yield
-        ``None`` entries if ``stop_on_error`` is ``False``.
+        ``None`` entries if ``error_handler`` is not 'raise'.
     """
     from speedy_utils import dump_json_or_pickle, load_by_ext
+    # Handle deprecated stop_on_error parameter
+    if stop_on_error is not None:
+        import warnings
+        warnings.warn(
+            "stop_on_error is deprecated, use error_handler instead",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        error_handler = 'raise' if stop_on_error else 'log'
     if n_proc > 1:
         import tempfile
@@ -319,7 +450,8 @@ def multi_thread(
                 progress_update=progress_update,
                 prefetch_factor=prefetch_factor,
                 timeout=timeout,
-                stop_on_error=stop_on_error,
+                error_handler=error_handler,
+                max_error_files=max_error_files,
                 n_proc=0,
                 store_output_pkl_file=file_pkl,
                 **fixed_kwargs,
@@ -363,12 +495,30 @@ def multi_thread(
     if batch == 1 and logical_total and logical_total / max(workers_val, 1) > 20_000:
         batch = 32
-    src_iter: Iterable[Any] = iter(inputs)
+    src_iter: Iterator[Any] = iter(inputs)
     if batch > 1:
-        src_iter = _group_iter(src_iter, batch)
-    src_iter = iter(src_iter)
+        src_iter = iter(_group_iter(src_iter, batch))
     collector: _ResultCollector[Any] = _ResultCollector(ordered, logical_total)
+    # Initialize error stats for error handling
+    func_name = getattr(func, '__name__', repr(func))
+    error_stats = ErrorStats(
+        func_name=func_name,
+        max_error_files=max_error_files,
+        write_logs=error_handler == 'log'
+    )
+    # Convert inputs to list for index access in error logging
+    items_list: list[Any] | None = None
+    if error_handler != 'raise':
+        try:
+            items_list = list(inputs)
+            src_iter = iter(items_list)
+            if batch > 1:
+                src_iter = iter(_group_iter(src_iter, batch))
+        except Exception:
+            items_list = None
     bar = None
     last_bar_update = 0
     if (
@@ -382,10 +532,22 @@ def multi_thread(
             ncols=128,
             colour='green',
             bar_format=(
-                '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]'
+                '{l_bar}{bar}| {n_fmt}/{total_fmt} '
+                '[{elapsed}<{remaining}, {rate_fmt}{postfix}]'
             ),
         )
+    # Capture caller context for error reporting
+    caller_frame_obj = inspect.currentframe()
+    caller_context: traceback.FrameSummary | None = None
+    if caller_frame_obj and caller_frame_obj.f_back:
+        caller_info = inspect.getframeinfo(caller_frame_obj.f_back)
+        caller_context = traceback.FrameSummary(
+            caller_info.filename,
+            caller_info.lineno,
+            caller_info.function,
+        )
     deadline = time.monotonic() + timeout if timeout is not None else None
     max_inflight = max(workers_val * prefetch_factor, 1)
     completed_items = 0
@@ -409,10 +571,10 @@ def multi_thread(
                 batch_items = list(arg)
                 if not batch_items:
                     return
-                fut = pool.submit(_run_batch, batch_items, func, fixed_kwargs_map)
+                fut = pool.submit(_run_batch, batch_items, func, fixed_kwargs_map, caller_context)
                 logical_size = len(batch_items)
             else:
-                fut = pool.submit(_worker, arg, func, fixed_kwargs_map)
+                fut = pool.submit(_worker, arg, func, fixed_kwargs_map, caller_context)
                 logical_size = 1
             _attach_metadata(fut, next_logical_idx, logical_size)
             next_logical_idx += logical_size
@@ -453,37 +615,37 @@ def multi_thread(
                 idx, logical_size = _future_meta(fut)
                 try:
                     result = fut.result()
+                    # Record success for each item in the batch
+                    for _ in range(logical_size):
+                        error_stats.record_success()
                 except UserFunctionError as exc:
-                    # User function error - already has clean traceback
-                    logger.error(str(exc))
-                    if stop_on_error:
+                    # User function error
+                    if error_handler == 'raise':
+                        sys.stderr.flush()
+                        sys.stdout.flush()
+                        exc.format_rich()
+                        sys.stderr.flush()
                         _cancel_futures(inflight)
-                        # Create a clean exception without infrastructure frames
-                        # by re-creating the traceback
-                        orig_exc = exc.original_exception
-                        # Build new traceback from user frames only
-                        tb_str = ''.join(traceback.format_list(exc.user_traceback))
-                        clean_msg = (
-                            f'\nError in "{exc.func_name}" '
-                            f'with input: {exc.input_value!r}\n\n{tb_str}'
-                            f'{type(orig_exc).__name__}: {orig_exc}'
-                        )
-                        # Raise a new instance of the original exception type
-                        # with our clean message
-                        new_exc = type(orig_exc)(clean_msg)
-                        # Suppress the "from" chain to avoid showing infrastructure
-                        raise new_exc from None
+                        sys.exit(1)
+                    # Log error with ErrorStats
+                    input_val = None
+                    if items_list is not None and idx < len(items_list):
+                        input_val = items_list[idx]
+                    error_stats.record_error(
+                        idx, exc.original_exception, input_val, func_name
+                    )
                     out_items = [None] * logical_size
                 except Exception as exc:
                     # Other errors (infrastructure, batching, etc.)
-                    if stop_on_error:
+                    if error_handler == 'raise':
                         _cancel_futures(inflight)
                         raise
-                    logger.exception('multi_thread task failed', exc_info=exc)
+                    input_val = None
+                    if items_list is not None and idx < len(items_list):
+                        input_val = items_list[idx]
+                    error_stats.record_error(idx, exc, input_val, func_name)
                     out_items = [None] * logical_size
                 else:
                     try:
@@ -503,15 +665,13 @@ def multi_thread(
                         bar.update(delta)
                         last_bar_update = completed_items
                         submitted = next_logical_idx
-                        pending = (
+                        pending: int | str = (
                             max(logical_total - submitted, 0)
                             if logical_total is not None
                             else '-'
                         )
-                        postfix = {
-                            'processing': min(len(inflight), workers_val),
-                            'pending': pending,
-                        }
+                        postfix: dict[str, Any] = error_stats.get_postfix_dict()
+                        postfix['pending'] = pending
                         bar.set_postfix(postfix)
             try:

speedy_utils/scripts/mpython.py CHANGED Viewed

@@ -3,13 +3,59 @@ import argparse
 import itertools
 import multiprocessing  # Import multiprocessing module
 import os
+import re
 import shlex  # To properly escape command line arguments
 import shutil
+import subprocess
 taskset_path = shutil.which('taskset')
+def get_existing_tmux_sessions():
+    """Get list of existing tmux session names."""
+    try:
+        result = subprocess.run(
+            ['tmux', 'list-sessions', '-F', '#{session_name}'],
+            capture_output=True,
+            text=True,
+        )
+        if result.returncode == 0:
+            return result.stdout.strip().split('\n')
+        return []
+    except FileNotFoundError:
+        # tmux not installed
+        return []
+def get_next_session_name(base_name='mpython'):
+    """Get next available session name.
+    If 'mpython' doesn't exist, return 'mpython'.
+    If 'mpython' exists, return 'mpython-1', 'mpython-2', etc.
+    """
+    existing_sessions = get_existing_tmux_sessions()
+    if base_name not in existing_sessions:
+        return base_name
+    # Find all existing mpython-N sessions
+    pattern = re.compile(rf'^{re.escape(base_name)}-(\d+)$')
+    existing_numbers = []
+    for session in existing_sessions:
+        match = pattern.match(session)
+        if match:
+            existing_numbers.append(int(match.group(1)))
+    # Find the next available number
+    next_num = 1
+    if existing_numbers:
+        next_num = max(existing_numbers) + 1
+    return f'{base_name}-{next_num}'
 def assert_script(python_path):
     with open(python_path) as f:
         code_str = f.read()
@@ -30,10 +76,7 @@ def assert_script(python_path):
 def run_in_tmux(commands_to_run, tmux_name, num_windows):
     with open('/tmp/start_multirun_tmux.sh', 'w') as script_file:
-        # first cmd is to kill the session if it exists
         script_file.write('#!/bin/bash\n\n')
-        script_file.write(f'tmux kill-session -t {tmux_name}\nsleep .1\n')
         script_file.write(f'tmux new-session -d -s {tmux_name}\n')
         for i, cmd in enumerate(itertools.cycle(commands_to_run)):
             if i >= num_windows:
@@ -99,9 +142,11 @@ def main():
         cmds.append(fold_cmd)
-    run_in_tmux(cmds, 'mpython', args.total_fold)
+    session_name = get_next_session_name('mpython')
+    run_in_tmux(cmds, session_name, args.total_fold)
     os.chmod('/tmp/start_multirun_tmux.sh', 0o755)  # Make the script executable
     os.system('/tmp/start_multirun_tmux.sh')
+    print(f'Started tmux session: {session_name}')
 if __name__ == '__main__':

{speedy_utils-1.1.40.dist-info → speedy_utils-1.1.43.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: speedy-utils
-Version: 1.1.40
+Version: 1.1.43
 Summary: Fast and easy-to-use package for data science
 Project-URL: Homepage, https://github.com/anhvth/speedy
 Project-URL: Repository, https://github.com/anhvth/speedy
@@ -17,7 +17,7 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: 3.14
-Requires-Python: >=3.8
+Requires-Python: >=3.9
 Requires-Dist: aiohttp
 Requires-Dist: bump2version
 Requires-Dist: cachetools
@@ -39,13 +39,15 @@ Requires-Dist: pydantic
 Requires-Dist: pytest
 Requires-Dist: ray
 Requires-Dist: requests
+Requires-Dist: rich>=14.3.1
 Requires-Dist: ruff
 Requires-Dist: scikit-learn
 Requires-Dist: tabulate
 Requires-Dist: tqdm
 Requires-Dist: xxhash
 Provides-Extra: ray
-Requires-Dist: ray>=2.49.1; (python_version >= '3.9') and extra == 'ray'
+Requires-Dist: ray[data,llm]>=2.40.0; extra == 'ray'
+Requires-Dist: vllm>=0.6.3; extra == 'ray'
 Description-Content-Type: text/markdown
 # Speedy Utils

speedy-utils 1.1.40__py3-none-any.whl → 1.1.43__py3-none-any.whl

speedy-utils 1.1.40py3-none-any.whl → 1.1.43py3-none-any.whl