PyPI - speedy-utils - Versions diffs - 1.1.23__py3-none-any.whl → 1.1.24__py3-none-any.whl - Mend

speedy-utils 1.1.23py3-none-any.whl → 1.1.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

llm_utils/__init__.py +12 -8
llm_utils/chat_format/__init__.py +2 -0
llm_utils/chat_format/display.py +115 -44
llm_utils/lm/__init__.py +14 -6
llm_utils/lm/llm.py +413 -0
llm_utils/lm/llm_signature.py +35 -0
llm_utils/lm/mixins.py +379 -0
llm_utils/lm/openai_memoize.py +18 -7
llm_utils/lm/signature.py +26 -37
llm_utils/lm/utils.py +61 -76
speedy_utils/__init__.py +28 -1
speedy_utils/all.py +30 -1
speedy_utils/common/utils_io.py +36 -26
speedy_utils/common/utils_misc.py +25 -1
speedy_utils/multi_worker/thread.py +145 -58
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.24.dist-info}/METADATA +1 -1
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.24.dist-info}/RECORD +19 -18
llm_utils/lm/llm_as_a_judge.py +0 -390
llm_utils/lm/llm_task.py +0 -614
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.24.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.24.dist-info}/entry_points.txt +0 -0

llm_utils/lm/utils.py CHANGED Viewed

@@ -14,6 +14,7 @@ from openai import OpenAI
 try:
     import psutil
     HAS_PSUTIL = True
 except ImportError:
     HAS_PSUTIL = False
@@ -26,7 +27,7 @@ _VLLM_PROCESSES: List[subprocess.Popen] = []
 def _extract_port_from_vllm_cmd(vllm_cmd: str) -> int:
     """Extract port from VLLM command string."""
-    port_match = re.search(r'--port\s+(\d+)', vllm_cmd)
+    port_match = re.search(r"--port\s+(\d+)", vllm_cmd)
     if port_match:
         return int(port_match.group(1))
     return 8000
@@ -34,39 +35,39 @@ def _extract_port_from_vllm_cmd(vllm_cmd: str) -> int:
 def _parse_env_vars_from_cmd(cmd: str) -> tuple[dict[str, str], str]:
     """Parse environment variables from command string.
     Args:
         cmd: Command string that may contain environment variables like 'VAR=value command...'
     Returns:
         Tuple of (env_dict, cleaned_cmd) where env_dict contains parsed env vars
         and cleaned_cmd is the command without the env vars.
     """
     import shlex
     # Split the command while preserving quoted strings
     parts = shlex.split(cmd)
     env_vars = {}
     cmd_parts = []
     for part in parts:
-        if '=' in part and not part.startswith('-'):
+        if "=" in part and not part.startswith("-"):
             # Check if this looks like an environment variable
             # Should be KEY=VALUE format, not contain spaces (unless quoted), and KEY should be uppercase
-            key_value = part.split('=', 1)
+            key_value = part.split("=", 1)
             if len(key_value) == 2:
                 key, value = key_value
-                if key.isupper() and key.replace('_', '').isalnum():
+                if key.isupper() and key.replace("_", "").isalnum():
                     env_vars[key] = value
                     continue
         # Not an env var, add to command parts
         cmd_parts.append(part)
     # Reconstruct the cleaned command
-    cleaned_cmd = ' '.join(cmd_parts)
+    cleaned_cmd = " ".join(cmd_parts)
     return env_vars, cleaned_cmd
@@ -74,38 +75,33 @@ def _start_vllm_server(vllm_cmd: str, timeout: int = 120) -> subprocess.Popen:
     """Start VLLM server and wait for ready."""
     # Parse environment variables from command
     env_vars, cleaned_cmd = _parse_env_vars_from_cmd(vllm_cmd)
     port = _extract_port_from_vllm_cmd(cleaned_cmd)
     logger.info(f"Starting VLLM server: {cleaned_cmd}")
     if env_vars:
         logger.info(f"Environment variables: {env_vars}")
     logger.info(f"VLLM output logged to: /tmp/vllm_{port}.txt")
-    with open(f'/tmp/vllm_{port}.txt', 'w') as log_file:
+    with open(f"/tmp/vllm_{port}.txt", "w") as log_file:
         log_file.write(f"VLLM Server started at {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
         log_file.write(f"Command: {cleaned_cmd}\n")
         if env_vars:
             log_file.write(f"Environment: {env_vars}\n")
         log_file.write(f"Port: {port}\n")
         log_file.write("-" * 50 + "\n")
     # Prepare environment for subprocess
     env = os.environ.copy()
     env.update(env_vars)
-    with open(f'/tmp/vllm_{port}.txt', 'a') as log_file:
+    with open(f"/tmp/vllm_{port}.txt", "a") as log_file:
         process = subprocess.Popen(
-            cleaned_cmd.split(),
-            stdout=log_file,
-            stderr=subprocess.STDOUT,
-            text=True,
-            preexec_fn=os.setsid,
-            env=env
+            cleaned_cmd.split(), stdout=log_file, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid, env=env
         )
     _VLLM_PROCESSES.append(process)
     start_time = time.time()
     while time.time() - start_time < timeout:
         try:
@@ -115,26 +111,24 @@ def _start_vllm_server(vllm_cmd: str, timeout: int = 120) -> subprocess.Popen:
                 return process
         except requests.RequestException:
             pass
         if process.poll() is not None:
             stdout, stderr = process.communicate()
             raise RuntimeError(
-                f"VLLM server terminated unexpectedly. "
-                f"Return code: {process.returncode}, "
-                f"stderr: {stderr[:200]}..."
+                f"VLLM server terminated unexpectedly. Return code: {process.returncode}, stderr: {stderr[:200]}..."
             )
         time.sleep(2)
     process.terminate()
     try:
         process.wait(timeout=5)
     except subprocess.TimeoutExpired:
         process.kill()
     if process in _VLLM_PROCESSES:
         _VLLM_PROCESSES.remove(process)
     raise RuntimeError(f"VLLM server failed to start within {timeout}s on port {port}")
@@ -142,7 +136,7 @@ def _kill_vllm_on_port(port: int) -> bool:
     """Kill VLLM server on port."""
     killed = False
     logger.info(f"Checking VLLM server on port {port}")
     processes_to_remove = []
     for process in _VLLM_PROCESSES:
         try:
@@ -151,8 +145,8 @@ def _kill_vllm_on_port(port: int) -> bool:
                 if HAS_PSUTIL:
                     try:
                         proc = psutil.Process(process.pid)
-                        cmdline = ' '.join(proc.cmdline())
-                        if f'--port {port}' in cmdline or f'--port={port}' in cmdline:
+                        cmdline = " ".join(proc.cmdline())
+                        if f"--port {port}" in cmdline or f"--port={port}" in cmdline:
                             logger.info(f"Killing tracked VLLM process {process.pid} on port {port}")
                             os.killpg(os.getpgid(process.pid), signal.SIGTERM)
                             try:
@@ -164,7 +158,7 @@ def _kill_vllm_on_port(port: int) -> bool:
                             killed_process = True
                     except (psutil.NoSuchProcess, psutil.AccessDenied):
                         pass
                 if not HAS_PSUTIL or not killed_process:
                     logger.info(f"Killing tracked VLLM process {process.pid}")
                     try:
@@ -177,24 +171,23 @@ def _kill_vllm_on_port(port: int) -> bool:
                         killed = True
                     except (ProcessLookupError, OSError):
                         pass
                 processes_to_remove.append(process)
             else:
                 processes_to_remove.append(process)
         except (ProcessLookupError, OSError):
             processes_to_remove.append(process)
     for process in processes_to_remove:
         if process in _VLLM_PROCESSES:
             _VLLM_PROCESSES.remove(process)
     if not killed and HAS_PSUTIL:
         try:
-            for proc in psutil.process_iter(['pid', 'cmdline']):
+            for proc in psutil.process_iter(["pid", "cmdline"]):
                 try:
-                    cmdline = ' '.join(proc.info['cmdline'] or [])
-                    if ('vllm' in cmdline.lower() and
-                        (f'--port {port}' in cmdline or f'--port={port}' in cmdline)):
+                    cmdline = " ".join(proc.info["cmdline"] or [])
+                    if "vllm" in cmdline.lower() and (f"--port {port}" in cmdline or f"--port={port}" in cmdline):
                         logger.info(f"Killing untracked VLLM process {proc.info['pid']} on port {port}")
                         proc.terminate()
                         try:
@@ -207,13 +200,13 @@ def _kill_vllm_on_port(port: int) -> bool:
                     continue
         except Exception as e:
             logger.warning(f"Error searching processes on port {port}: {e}")
     if killed:
         logger.info(f"Killed VLLM server on port {port}")
         time.sleep(2)
     else:
         logger.info(f"No VLLM server on port {port}")
     return killed
@@ -262,32 +255,24 @@ def _is_server_running(port: int) -> bool:
         return False
-def get_base_client(
-    client=None,
-    cache: bool = True,
-    api_key="abc",
-    vllm_cmd=None,
-    vllm_process=None
-) -> OpenAI:
+def get_base_client(client=None, cache: bool = True, api_key="abc", vllm_cmd=None, vllm_process=None) -> OpenAI:
     """Get OpenAI client from various inputs."""
     from llm_utils import MOpenAI
-    open_ai_class = OpenAI if not cache else MOpenAI
     if client is None:
         if vllm_cmd is not None:
             # Parse environment variables from command to get clean command for port extraction
             _, cleaned_cmd = _parse_env_vars_from_cmd(vllm_cmd)
             port = _extract_port_from_vllm_cmd(cleaned_cmd)
-            return open_ai_class(base_url=f"http://localhost:{port}/v1", api_key=api_key)
+            return MOpenAI(base_url=f"http://localhost:{port}/v1", api_key=api_key, cache=cache)
         else:
-            return open_ai_class()
+            raise ValueError("Either client or vllm_cmd must be provided.")
     elif isinstance(client, int):
-        return open_ai_class(base_url=f"http://localhost:{client}/v1", api_key=api_key)
+        return MOpenAI(base_url=f"http://localhost:{client}/v1", api_key=api_key, cache=cache)
     elif isinstance(client, str):
-        return open_ai_class(base_url=client, api_key=api_key)
+        return MOpenAI(base_url=client, api_key=api_key, cache=cache)
     elif isinstance(client, OpenAI):
-        return client
+        return MOpenAI(base_url=client.base_url, api_key=api_key, cache=cache)
     else:
         raise ValueError("Invalid client type. Must be OpenAI, port (int), base_url (str), or None.")
@@ -296,17 +281,17 @@ def _is_lora_path(path: str) -> bool:
     """Check if path is LoRA adapter directory."""
     if not os.path.isdir(path):
         return False
-    adapter_config_path = os.path.join(path, 'adapter_config.json')
+    adapter_config_path = os.path.join(path, "adapter_config.json")
     return os.path.isfile(adapter_config_path)
 def _get_port_from_client(client: OpenAI) -> Optional[int]:
     """Extract port from OpenAI client base_url."""
-    if hasattr(client, 'base_url') and client.base_url:
+    if hasattr(client, "base_url") and client.base_url:
         base_url = str(client.base_url)
-        if 'localhost:' in base_url:
+        if "localhost:" in base_url:
             try:
-                port_part = base_url.split('localhost:')[1].split('/')[0]
+                port_part = base_url.split("localhost:")[1].split("/")[0]
                 return int(port_part)
             except (IndexError, ValueError):
                 pass
@@ -315,14 +300,14 @@ def _get_port_from_client(client: OpenAI) -> Optional[int]:
 def _load_lora_adapter(lora_path: str, port: int) -> str:
     """Load LoRA adapter from path."""
-    lora_name = os.path.basename(lora_path.rstrip('/\\'))
+    lora_name = os.path.basename(lora_path.rstrip("/\\"))
     if not lora_name:
         lora_name = os.path.basename(os.path.dirname(lora_path))
     response = requests.post(
-        f'http://localhost:{port}/v1/load_lora_adapter',
-        headers={'accept': 'application/json', 'Content-Type': 'application/json'},
-        json={"lora_name": lora_name, "lora_path": os.path.abspath(lora_path)}
+        f"http://localhost:{port}/v1/load_lora_adapter",
+        headers={"accept": "application/json", "Content-Type": "application/json"},
+        json={"lora_name": lora_name, "lora_path": os.path.abspath(lora_path)},
     )
     response.raise_for_status()
     return lora_name
@@ -331,14 +316,14 @@ def _load_lora_adapter(lora_path: str, port: int) -> str:
 def _unload_lora_adapter(lora_path: str, port: int) -> None:
     """Unload LoRA adapter."""
     try:
-        lora_name = os.path.basename(lora_path.rstrip('/\\'))
+        lora_name = os.path.basename(lora_path.rstrip("/\\"))
         if not lora_name:
             lora_name = os.path.basename(os.path.dirname(lora_path))
         response = requests.post(
-            f'http://localhost:{port}/v1/unload_lora_adapter',
-            headers={'accept': 'application/json', 'Content-Type': 'application/json'},
-            json={"lora_name": lora_name, "lora_int_id": 0}
+            f"http://localhost:{port}/v1/unload_lora_adapter",
+            headers={"accept": "application/json", "Content-Type": "application/json"},
+            json={"lora_name": lora_name, "lora_int_id": 0},
         )
         response.raise_for_status()
     except requests.RequestException as e:

speedy_utils/__init__.py CHANGED Viewed

@@ -79,7 +79,24 @@ from glob import glob
 from multiprocessing import Pool
 from pathlib import Path
 from threading import Lock
-from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
+from typing import (
+    Any,
+    Awaitable,
+    Callable as TypingCallable,
+    Dict,
+    Generic,
+    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 # Third-party imports
 import numpy as np
@@ -124,6 +141,7 @@ from .common.utils_io import (
 # Misc utilities
 from .common.utils_misc import (
     convert_to_builtin_python,
+    dedup,
     flatten_list,
     get_arg_names,
     is_notebook,
@@ -171,12 +189,20 @@ __all__ = [
     "defaultdict",
     # Typing
     "Any",
+    "Awaitable",
     "Callable",
+    "TypingCallable",
     "Dict",
     "Generic",
+    "Iterable",
     "List",
     "Literal",
+    "Mapping",
     "Optional",
+    "Sequence",
+    "Set",
+    "Tuple",
+    "Type",
     "TypeVar",
     "Union",
     # Third-party
@@ -214,6 +240,7 @@ __all__ = [
     "get_arg_names",
     "is_notebook",
     "convert_to_builtin_python",
+    "dedup",
     # Print utilities
     "display_pretty_table_html",
     "flatten_dict",

speedy_utils/all.py CHANGED Viewed

@@ -71,7 +71,24 @@ from glob import glob
 from multiprocessing import Pool
 from pathlib import Path
 from threading import Lock
-from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
+from typing import (
+    Any,
+    Awaitable,
+    Callable as TypingCallable,
+    Dict,
+    Generic,
+    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 # Third-party imports
 import numpy as np
@@ -115,6 +132,9 @@ from speedy_utils import (  # Clock module; Function decorators; Cache utilities
     timef,
 )
+choice = random.choice
 # Define __all__ explicitly with all exports
 __all__ = [
     # Standard library
@@ -146,12 +166,20 @@ __all__ = [
     "defaultdict",
     # Typing
     "Any",
+    "Awaitable",
     "Callable",
+    "TypingCallable",
     "Dict",
     "Generic",
+    "Iterable",
     "List",
     "Literal",
+    "Mapping",
     "Optional",
+    "Sequence",
+    "Set",
+    "Tuple",
+    "Type",
     "TypeVar",
     "Union",
     # Third-party
@@ -199,4 +227,5 @@ __all__ = [
     # Multi-worker processing
     "multi_process",
     "multi_thread",
+    "choice",
 ]

speedy_utils/common/utils_io.py CHANGED Viewed

@@ -29,9 +29,7 @@ def dump_jsonl(list_dictionaries: list[dict], file_name: str = "output.jsonl") -
             file.write(json.dumps(dictionary, ensure_ascii=False) + "\n")
-def dump_json_or_pickle(
-    obj: Any, fname: str, ensure_ascii: bool = False, indent: int = 4
-) -> None:
+def dump_json_or_pickle(obj: Any, fname: str, ensure_ascii: bool = False, indent: int = 4) -> None:
     """
     Dump an object to a file, supporting both JSON and pickle formats.
     """
@@ -59,6 +57,7 @@ def dump_json_or_pickle(
             if isinstance(obj, BaseModel):
                 data = obj.model_dump()
                 from fastcore.all import dict2obj, obj2dict
                 obj2 = dict2obj(data)
                 with open(fname, "wb") as f:
                     pickle.dump(obj2, f)
@@ -84,7 +83,8 @@ def load_json_or_pickle(fname: str, counter=0) -> Any:
         except EOFError:
             time.sleep(1)
             if counter > 5:
-                print("Error: Ran out of input", fname)
+                # Keep message concise and actionable
+                print(f"Corrupted cache file {fname} removed; it will be regenerated on next access")
                 os.remove(fname)
                 raise
             return load_json_or_pickle(fname, counter + 1)
@@ -92,8 +92,6 @@ def load_json_or_pickle(fname: str, counter=0) -> Any:
             raise ValueError(f"Error {e} while loading {fname}") from e
 try:
     import orjson  # type: ignore[import-not-found]  # fastest JSON parser when available
 except Exception:
@@ -113,11 +111,11 @@ def fast_load_jsonl(
     use_orjson: bool = True,
     encoding: str = "utf-8",
     errors: str = "strict",
-    on_error: str = "raise",   # 'raise' | 'warn' | 'skip'
+    on_error: str = "raise",  # 'raise' | 'warn' | 'skip'
     skip_empty: bool = True,
     max_lines: Optional[int] = None,
     use_multiworker: bool = True,
-    multiworker_threshold: int = 50000,
+    multiworker_threshold: int = 1000000,
     workers: Optional[int] = None,
 ) -> Iterable[Any]:
     """
@@ -127,7 +125,7 @@ def fast_load_jsonl(
     - Optional tqdm progress over bytes (compressed size if gz/bz2/xz/zst).
     - Auto-detects compression by extension: .gz, .bz2, .xz/.lzma, .zst/.zstd.
     - Uses orjson if available (use_orjson=True), falls back to json.
-    - Automatically uses multi-worker processing for large files (>50k lines).
+    - Automatically uses multi-worker processing for large files (>100k lines).
     Args:
         path_or_file: Path-like or file-like object. File-like can be binary or text.
@@ -140,11 +138,12 @@ def fast_load_jsonl(
         max_lines: Stop after reading this many lines (useful for sampling).
         use_multiworker: Enable multi-worker processing for large files.
         multiworker_threshold: Line count threshold to trigger multi-worker processing.
-        workers: Number of worker threads (defaults to CPU count).
+        workers: Number of worker threads (defaults to 80% of CPU count, max 8).
     Yields:
         Parsed Python objects per line.
     """
     def _open_auto(pth_or_f) -> IO[Any]:
         if hasattr(pth_or_f, "read"):
             # ensure binary buffer for consistent byte-length progress
@@ -206,39 +205,47 @@ def fast_load_jsonl(
     # Check if we should use multi-worker processing
     should_use_multiworker = (
-        use_multiworker
+        use_multiworker
         and not hasattr(path_or_file, "read")  # Only for file paths, not file objects
         and max_lines is None  # Don't use multiworker if we're limiting lines
     )
     if should_use_multiworker:
         line_count = _count_lines_fast(cast(Union[str, os.PathLike], path_or_file))
         if line_count > multiworker_threshold:
             # Use multi-worker processing
             from ..multi_worker.thread import multi_thread
+            # Calculate optimal worker count: 80% of CPU count, capped at 8
+            cpu_count = os.cpu_count() or 4
+            default_workers = min(int(cpu_count * 0.8), 8)
+            num_workers = workers if workers is not None else default_workers
+            num_workers = max(1, num_workers)  # At least 1 worker
             # Read all lines into chunks
             f = _open_auto(path_or_file)
             all_lines = list(f)
             f.close()
-            # Split into chunks for workers
-            num_workers = workers or os.cpu_count() or 4
-            chunk_size = max(len(all_lines) // num_workers, 1000)
+            # Split into chunks - aim for ~10k-20k lines per chunk minimum
+            min_chunk_size = 10000
+            chunk_size = max(len(all_lines) // num_workers, min_chunk_size)
             chunks = []
             for i in range(0, len(all_lines), chunk_size):
-                chunks.append(all_lines[i:i + chunk_size])
+                chunks.append(all_lines[i : i + chunk_size])
             # Process chunks in parallel
             if progress:
-                print(f"Processing {line_count} lines with {num_workers} workers...")
+                print(f"Processing {line_count} lines with {num_workers} workers ({len(chunks)} chunks)...")
             chunk_results = multi_thread(_process_chunk, chunks, workers=num_workers, progress=progress)
             # Flatten results and yield
-            for chunk_result in chunk_results:
-                for obj in chunk_result:
-                    yield obj
+            if chunk_results:
+                for chunk_result in chunk_results:
+                    if chunk_result:
+                        for obj in chunk_result:
+                            yield obj
             return
     # Single-threaded processing (original logic)
@@ -266,7 +273,11 @@ def fast_load_jsonl(
             line_no += 1
             if pbar is not None:
                 # raw_line is bytes here; if not, compute byte length
-                nbytes = len(raw_line) if isinstance(raw_line, (bytes, bytearray)) else len(str(raw_line).encode(encoding, errors))
+                nbytes = (
+                    len(raw_line)
+                    if isinstance(raw_line, (bytes, bytearray))
+                    else len(str(raw_line).encode(encoding, errors))
+                )
                 pbar.update(nbytes)
             # Normalize to bytes -> str only if needed
@@ -322,7 +333,6 @@ def fast_load_jsonl(
                 pass
 def load_by_ext(fname: Union[str, list[str]], do_memoize: bool = False) -> Any:
     """
     Load data based on file extension.

speedy_utils/common/utils_misc.py CHANGED Viewed

@@ -3,10 +3,12 @@
 import inspect
 import os
 from collections.abc import Callable
-from typing import Any
+from typing import Any, TypeVar
 from pydantic import BaseModel
+T = TypeVar("T")
 def mkdir_or_exist(dir_name: str) -> None:
     """Create a directory if it doesn't exist."""
@@ -50,10 +52,32 @@ def convert_to_builtin_python(input_data: Any) -> Any:
         raise ValueError(f"Unsupported type {type(input_data)}")
+def dedup(items: list[T], key: Callable[[T], Any]) -> list[T]:
+    """
+    Deduplicate items in a list based on a key function.
+    Args:
+        items: The list of items.
+        key: A function that takes an item and returns a hashable key.
+    Returns:
+        A list with duplicates removed, preserving the first occurrence.
+    """
+    seen = set()
+    result = []
+    for item in items:
+        k = key(item)
+        if k not in seen:
+            seen.add(k)
+            result.append(item)
+    return result
 __all__ = [
     "mkdir_or_exist",
     "flatten_list",
     "get_arg_names",
     "is_notebook",
     "convert_to_builtin_python",
+    "dedup",
 ]

speedy-utils 1.1.23__py3-none-any.whl → 1.1.24__py3-none-any.whl

speedy-utils 1.1.23py3-none-any.whl → 1.1.24py3-none-any.whl