PyPI - speedy-utils - Versions diffs - 1.1.23__py3-none-any.whl → 1.1.25__py3-none-any.whl - Mend

speedy-utils 1.1.23py3-none-any.whl → 1.1.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

llm_utils/__init__.py +12 -8
llm_utils/chat_format/__init__.py +2 -0
llm_utils/chat_format/display.py +115 -44
llm_utils/lm/__init__.py +14 -6
llm_utils/lm/llm.py +413 -0
llm_utils/lm/llm_signature.py +35 -0
llm_utils/lm/mixins.py +379 -0
llm_utils/lm/openai_memoize.py +18 -7
llm_utils/lm/signature.py +26 -37
llm_utils/lm/utils.py +61 -76
speedy_utils/__init__.py +31 -2
speedy_utils/all.py +30 -1
speedy_utils/common/utils_cache.py +142 -1
speedy_utils/common/utils_io.py +36 -26
speedy_utils/common/utils_misc.py +25 -1
speedy_utils/multi_worker/thread.py +145 -58
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/METADATA +1 -1
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/RECORD +20 -19
llm_utils/lm/llm_as_a_judge.py +0 -390
llm_utils/lm/llm_task.py +0 -614
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/entry_points.txt +0 -0

llm_utils/lm/utils.py CHANGED Viewed

@@ -14,6 +14,7 @@ from openai import OpenAI
 try:
     import psutil
     HAS_PSUTIL = True
 except ImportError:
     HAS_PSUTIL = False
@@ -26,7 +27,7 @@ _VLLM_PROCESSES: List[subprocess.Popen] = []
 def _extract_port_from_vllm_cmd(vllm_cmd: str) -> int:
     """Extract port from VLLM command string."""
-    port_match = re.search(r'--port\s+(\d+)', vllm_cmd)
+    port_match = re.search(r"--port\s+(\d+)", vllm_cmd)
     if port_match:
         return int(port_match.group(1))
     return 8000
@@ -34,39 +35,39 @@ def _extract_port_from_vllm_cmd(vllm_cmd: str) -> int:
 def _parse_env_vars_from_cmd(cmd: str) -> tuple[dict[str, str], str]:
     """Parse environment variables from command string.
     Args:
         cmd: Command string that may contain environment variables like 'VAR=value command...'
     Returns:
         Tuple of (env_dict, cleaned_cmd) where env_dict contains parsed env vars
         and cleaned_cmd is the command without the env vars.
     """
     import shlex
     # Split the command while preserving quoted strings
     parts = shlex.split(cmd)
     env_vars = {}
     cmd_parts = []
     for part in parts:
-        if '=' in part and not part.startswith('-'):
+        if "=" in part and not part.startswith("-"):
             # Check if this looks like an environment variable
             # Should be KEY=VALUE format, not contain spaces (unless quoted), and KEY should be uppercase
-            key_value = part.split('=', 1)
+            key_value = part.split("=", 1)
             if len(key_value) == 2:
                 key, value = key_value
-                if key.isupper() and key.replace('_', '').isalnum():
+                if key.isupper() and key.replace("_", "").isalnum():
                     env_vars[key] = value
                     continue
         # Not an env var, add to command parts
         cmd_parts.append(part)
     # Reconstruct the cleaned command
-    cleaned_cmd = ' '.join(cmd_parts)
+    cleaned_cmd = " ".join(cmd_parts)
     return env_vars, cleaned_cmd
@@ -74,38 +75,33 @@ def _start_vllm_server(vllm_cmd: str, timeout: int = 120) -> subprocess.Popen:
     """Start VLLM server and wait for ready."""
     # Parse environment variables from command
     env_vars, cleaned_cmd = _parse_env_vars_from_cmd(vllm_cmd)
     port = _extract_port_from_vllm_cmd(cleaned_cmd)
     logger.info(f"Starting VLLM server: {cleaned_cmd}")
     if env_vars:
         logger.info(f"Environment variables: {env_vars}")
     logger.info(f"VLLM output logged to: /tmp/vllm_{port}.txt")
-    with open(f'/tmp/vllm_{port}.txt', 'w') as log_file:
+    with open(f"/tmp/vllm_{port}.txt", "w") as log_file:
         log_file.write(f"VLLM Server started at {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
         log_file.write(f"Command: {cleaned_cmd}\n")
         if env_vars:
             log_file.write(f"Environment: {env_vars}\n")
         log_file.write(f"Port: {port}\n")
         log_file.write("-" * 50 + "\n")
     # Prepare environment for subprocess
     env = os.environ.copy()
     env.update(env_vars)
-    with open(f'/tmp/vllm_{port}.txt', 'a') as log_file:
+    with open(f"/tmp/vllm_{port}.txt", "a") as log_file:
         process = subprocess.Popen(
-            cleaned_cmd.split(),
-            stdout=log_file,
-            stderr=subprocess.STDOUT,
-            text=True,
-            preexec_fn=os.setsid,
-            env=env
+            cleaned_cmd.split(), stdout=log_file, stderr=subprocess.STDOUT, text=True, preexec_fn=os.setsid, env=env
         )
     _VLLM_PROCESSES.append(process)
     start_time = time.time()
     while time.time() - start_time < timeout:
         try:
@@ -115,26 +111,24 @@ def _start_vllm_server(vllm_cmd: str, timeout: int = 120) -> subprocess.Popen:
                 return process
         except requests.RequestException:
             pass
         if process.poll() is not None:
             stdout, stderr = process.communicate()
             raise RuntimeError(
-                f"VLLM server terminated unexpectedly. "
-                f"Return code: {process.returncode}, "
-                f"stderr: {stderr[:200]}..."
+                f"VLLM server terminated unexpectedly. Return code: {process.returncode}, stderr: {stderr[:200]}..."
             )
         time.sleep(2)
     process.terminate()
     try:
         process.wait(timeout=5)
     except subprocess.TimeoutExpired:
         process.kill()
     if process in _VLLM_PROCESSES:
         _VLLM_PROCESSES.remove(process)
     raise RuntimeError(f"VLLM server failed to start within {timeout}s on port {port}")
@@ -142,7 +136,7 @@ def _kill_vllm_on_port(port: int) -> bool:
     """Kill VLLM server on port."""
     killed = False
     logger.info(f"Checking VLLM server on port {port}")
     processes_to_remove = []
     for process in _VLLM_PROCESSES:
         try:
@@ -151,8 +145,8 @@ def _kill_vllm_on_port(port: int) -> bool:
                 if HAS_PSUTIL:
                     try:
                         proc = psutil.Process(process.pid)
-                        cmdline = ' '.join(proc.cmdline())
-                        if f'--port {port}' in cmdline or f'--port={port}' in cmdline:
+                        cmdline = " ".join(proc.cmdline())
+                        if f"--port {port}" in cmdline or f"--port={port}" in cmdline:
                             logger.info(f"Killing tracked VLLM process {process.pid} on port {port}")
                             os.killpg(os.getpgid(process.pid), signal.SIGTERM)
                             try:
@@ -164,7 +158,7 @@ def _kill_vllm_on_port(port: int) -> bool:
                             killed_process = True
                     except (psutil.NoSuchProcess, psutil.AccessDenied):
                         pass
                 if not HAS_PSUTIL or not killed_process:
                     logger.info(f"Killing tracked VLLM process {process.pid}")
                     try:
@@ -177,24 +171,23 @@ def _kill_vllm_on_port(port: int) -> bool:
                         killed = True
                     except (ProcessLookupError, OSError):
                         pass
                 processes_to_remove.append(process)
             else:
                 processes_to_remove.append(process)
         except (ProcessLookupError, OSError):
             processes_to_remove.append(process)
     for process in processes_to_remove:
         if process in _VLLM_PROCESSES:
             _VLLM_PROCESSES.remove(process)
     if not killed and HAS_PSUTIL:
         try:
-            for proc in psutil.process_iter(['pid', 'cmdline']):
+            for proc in psutil.process_iter(["pid", "cmdline"]):
                 try:
-                    cmdline = ' '.join(proc.info['cmdline'] or [])
-                    if ('vllm' in cmdline.lower() and
-                        (f'--port {port}' in cmdline or f'--port={port}' in cmdline)):
+                    cmdline = " ".join(proc.info["cmdline"] or [])
+                    if "vllm" in cmdline.lower() and (f"--port {port}" in cmdline or f"--port={port}" in cmdline):
                         logger.info(f"Killing untracked VLLM process {proc.info['pid']} on port {port}")
                         proc.terminate()
                         try:
@@ -207,13 +200,13 @@ def _kill_vllm_on_port(port: int) -> bool:
                     continue
         except Exception as e:
             logger.warning(f"Error searching processes on port {port}: {e}")
     if killed:
         logger.info(f"Killed VLLM server on port {port}")
         time.sleep(2)
     else:
         logger.info(f"No VLLM server on port {port}")
     return killed
@@ -262,32 +255,24 @@ def _is_server_running(port: int) -> bool:
         return False
-def get_base_client(
-    client=None,
-    cache: bool = True,
-    api_key="abc",
-    vllm_cmd=None,
-    vllm_process=None
-) -> OpenAI:
+def get_base_client(client=None, cache: bool = True, api_key="abc", vllm_cmd=None, vllm_process=None) -> OpenAI:
     """Get OpenAI client from various inputs."""
     from llm_utils import MOpenAI
-    open_ai_class = OpenAI if not cache else MOpenAI
     if client is None:
         if vllm_cmd is not None:
             # Parse environment variables from command to get clean command for port extraction
             _, cleaned_cmd = _parse_env_vars_from_cmd(vllm_cmd)
             port = _extract_port_from_vllm_cmd(cleaned_cmd)
-            return open_ai_class(base_url=f"http://localhost:{port}/v1", api_key=api_key)
+            return MOpenAI(base_url=f"http://localhost:{port}/v1", api_key=api_key, cache=cache)
         else:
-            return open_ai_class()
+            raise ValueError("Either client or vllm_cmd must be provided.")
     elif isinstance(client, int):
-        return open_ai_class(base_url=f"http://localhost:{client}/v1", api_key=api_key)
+        return MOpenAI(base_url=f"http://localhost:{client}/v1", api_key=api_key, cache=cache)
     elif isinstance(client, str):
-        return open_ai_class(base_url=client, api_key=api_key)
+        return MOpenAI(base_url=client, api_key=api_key, cache=cache)
     elif isinstance(client, OpenAI):
-        return client
+        return MOpenAI(base_url=client.base_url, api_key=api_key, cache=cache)
     else:
         raise ValueError("Invalid client type. Must be OpenAI, port (int), base_url (str), or None.")
@@ -296,17 +281,17 @@ def _is_lora_path(path: str) -> bool:
     """Check if path is LoRA adapter directory."""
     if not os.path.isdir(path):
         return False
-    adapter_config_path = os.path.join(path, 'adapter_config.json')
+    adapter_config_path = os.path.join(path, "adapter_config.json")
     return os.path.isfile(adapter_config_path)
 def _get_port_from_client(client: OpenAI) -> Optional[int]:
     """Extract port from OpenAI client base_url."""
-    if hasattr(client, 'base_url') and client.base_url:
+    if hasattr(client, "base_url") and client.base_url:
         base_url = str(client.base_url)
-        if 'localhost:' in base_url:
+        if "localhost:" in base_url:
             try:
-                port_part = base_url.split('localhost:')[1].split('/')[0]
+                port_part = base_url.split("localhost:")[1].split("/")[0]
                 return int(port_part)
             except (IndexError, ValueError):
                 pass
@@ -315,14 +300,14 @@ def _get_port_from_client(client: OpenAI) -> Optional[int]:
 def _load_lora_adapter(lora_path: str, port: int) -> str:
     """Load LoRA adapter from path."""
-    lora_name = os.path.basename(lora_path.rstrip('/\\'))
+    lora_name = os.path.basename(lora_path.rstrip("/\\"))
     if not lora_name:
         lora_name = os.path.basename(os.path.dirname(lora_path))
     response = requests.post(
-        f'http://localhost:{port}/v1/load_lora_adapter',
-        headers={'accept': 'application/json', 'Content-Type': 'application/json'},
-        json={"lora_name": lora_name, "lora_path": os.path.abspath(lora_path)}
+        f"http://localhost:{port}/v1/load_lora_adapter",
+        headers={"accept": "application/json", "Content-Type": "application/json"},
+        json={"lora_name": lora_name, "lora_path": os.path.abspath(lora_path)},
     )
     response.raise_for_status()
     return lora_name
@@ -331,14 +316,14 @@ def _load_lora_adapter(lora_path: str, port: int) -> str:
 def _unload_lora_adapter(lora_path: str, port: int) -> None:
     """Unload LoRA adapter."""
     try:
-        lora_name = os.path.basename(lora_path.rstrip('/\\'))
+        lora_name = os.path.basename(lora_path.rstrip("/\\"))
         if not lora_name:
             lora_name = os.path.basename(os.path.dirname(lora_path))
         response = requests.post(
-            f'http://localhost:{port}/v1/unload_lora_adapter',
-            headers={'accept': 'application/json', 'Content-Type': 'application/json'},
-            json={"lora_name": lora_name, "lora_int_id": 0}
+            f"http://localhost:{port}/v1/unload_lora_adapter",
+            headers={"accept": "application/json", "Content-Type": "application/json"},
+            json={"lora_name": lora_name, "lora_int_id": 0},
         )
         response.raise_for_status()
     except requests.RequestException as e:

speedy_utils/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@
 # • timef(func) -> Callable - Function execution time decorator
 # • retry_runtime(sleep_seconds: int, max_retry: int, exceptions) -> Callable
 # • memoize(func) -> Callable - Function result caching decorator
+# • imemoize(func) -> Callable - In-memory caching decorator (global persistent)
 # • identify(obj: Any) -> str - Generate unique object identifier
 # • identify_uuid(obj: Any) -> str - Generate UUID-based object identifier
 # • load_by_ext(fname: Union[str, list[str]]) -> Any - Auto-detect file format loader
@@ -79,7 +80,24 @@ from glob import glob
 from multiprocessing import Pool
 from pathlib import Path
 from threading import Lock
-from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
+from typing import (
+    Any,
+    Awaitable,
+    Callable as TypingCallable,
+    Dict,
+    Generic,
+    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 # Third-party imports
 import numpy as np
@@ -108,7 +126,7 @@ from .common.notebook_utils import (
 )
 # Cache utilities
-from .common.utils_cache import identify, identify_uuid, memoize
+from .common.utils_cache import identify, identify_uuid, imemoize, memoize
 # IO utilities
 from .common.utils_io import (
@@ -124,6 +142,7 @@ from .common.utils_io import (
 # Misc utilities
 from .common.utils_misc import (
     convert_to_builtin_python,
+    dedup,
     flatten_list,
     get_arg_names,
     is_notebook,
@@ -171,12 +190,20 @@ __all__ = [
     "defaultdict",
     # Typing
     "Any",
+    "Awaitable",
     "Callable",
+    "TypingCallable",
     "Dict",
     "Generic",
+    "Iterable",
     "List",
     "Literal",
+    "Mapping",
     "Optional",
+    "Sequence",
+    "Set",
+    "Tuple",
+    "Type",
     "TypeVar",
     "Union",
     # Third-party
@@ -198,6 +225,7 @@ __all__ = [
     "retry_runtime",
     # Cache utilities
     "memoize",
+    "imemoize",
     "identify",
     "identify_uuid",
     # IO utilities
@@ -214,6 +242,7 @@ __all__ = [
     "get_arg_names",
     "is_notebook",
     "convert_to_builtin_python",
+    "dedup",
     # Print utilities
     "display_pretty_table_html",
     "flatten_dict",

speedy_utils/all.py CHANGED Viewed

@@ -71,7 +71,24 @@ from glob import glob
 from multiprocessing import Pool
 from pathlib import Path
 from threading import Lock
-from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
+from typing import (
+    Any,
+    Awaitable,
+    Callable as TypingCallable,
+    Dict,
+    Generic,
+    Iterable,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 # Third-party imports
 import numpy as np
@@ -115,6 +132,9 @@ from speedy_utils import (  # Clock module; Function decorators; Cache utilities
     timef,
 )
+choice = random.choice
 # Define __all__ explicitly with all exports
 __all__ = [
     # Standard library
@@ -146,12 +166,20 @@ __all__ = [
     "defaultdict",
     # Typing
     "Any",
+    "Awaitable",
     "Callable",
+    "TypingCallable",
     "Dict",
     "Generic",
+    "Iterable",
     "List",
     "Literal",
+    "Mapping",
     "Optional",
+    "Sequence",
+    "Set",
+    "Tuple",
+    "Type",
     "TypeVar",
     "Union",
     # Third-party
@@ -199,4 +227,5 @@ __all__ = [
     # Multi-worker processing
     "multi_process",
     "multi_thread",
+    "choice",
 ]

speedy_utils/common/utils_cache.py CHANGED Viewed

@@ -44,6 +44,9 @@ _MEM_CACHES: "weakref.WeakKeyDictionary[Callable[..., Any], cachetools.LRUCache]
     weakref.WeakKeyDictionary()
 )
+# Global memory cache for imemoize (persists across IPython reloads)
+_GLOBAL_MEMORY_CACHE: dict[str, Any] = {}
 # Backward-compat global symbol (internal only; not exported)
 LRU_MEM_CACHE = cachetools.LRUCache(maxsize=256)
@@ -680,4 +683,142 @@ def memoize(
         return decorator(_func)
-__all__ = ["memoize", "identify"]
+# --------------------------------------------------------------------------------------
+# In-memory memoize with global persistent cache
+# --------------------------------------------------------------------------------------
+@overload
+def imemoize(
+    _func: Callable[P, R],
+    *,
+    keys: Optional[list[str]] = ...,
+    key: Optional[Callable[..., Any]] = ...,
+    ignore_self: bool = ...,
+) -> Callable[P, R]: ...
+@overload
+def imemoize(
+    _func: Callable[P, Awaitable[R]],
+    *,
+    keys: Optional[list[str]] = ...,
+    key: Optional[Callable[..., Any]] = ...,
+    ignore_self: bool = ...,
+) -> Callable[P, Awaitable[R]]: ...
+@overload
+def imemoize(
+    _func: None = ...,
+    *,
+    keys: Optional[list[str]] = ...,
+    key: Optional[Callable[..., Any]] = ...,
+    ignore_self: bool = ...,
+) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
+@overload
+def imemoize(  # type: ignore
+    _func: None = ...,
+    *,
+    keys: Optional[list[str]] = ...,
+    key: Optional[Callable[..., Any]] = ...,
+    ignore_self: bool = ...,
+) -> Callable[[Callable[P, Awaitable[R]]], Callable[P, Awaitable[R]]]: ...
+def imemoize(
+    _func: Optional[Callable[P, Any]] = None,
+    *,
+    keys: Optional[list[str]] = None,
+    key: Optional[Callable[..., Any]] = None,
+    ignore_self: bool = True,
+):
+    """
+    In-memory memoization decorator with global persistent cache.
+    Unlike regular memoize, this uses a global memory cache that persists
+    across IPython %load executions. The cache key is based on the function's
+    source code combined with runtime arguments, making it suitable for
+    notebook environments where functions may be reloaded.
+    Args:
+        keys: list of argument names to include in key (optional).
+        key: custom callable (*args, **kwargs) -> hashable for keying (optional).
+        ignore_self: ignore 'self' when building cache key for bound methods.
+    Example:
+        @imemoize
+        def expensive_computation(x):
+            import time
+            time.sleep(2)
+            return x * x
+        # First call computes and caches
+        result1 = expensive_computation(5)
+        # Second call retrieves from memory cache
+        result2 = expensive_computation(5)
+        # Even after %load file.py in IPython, the cache persists
+    """
+    def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
+        is_async = inspect.iscoroutinefunction(func)
+        if is_async:
+            @functools.wraps(func)
+            async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
+                # Compute cache key based on function source + args
+                func_source, sub_dir, key_id = _compute_cache_components(
+                    func, args, kwargs, ignore_self, keys, key
+                )
+                cache_key = identify((func_source, sub_dir, key_id))
+                # Check global memory cache
+                with mem_lock:
+                    if cache_key in _GLOBAL_MEMORY_CACHE:
+                        return _GLOBAL_MEMORY_CACHE[cache_key]
+                # Compute result and store in cache
+                result = await func(*args, **kwargs)
+                with mem_lock:
+                    _GLOBAL_MEMORY_CACHE[cache_key] = result
+                return result
+            return async_wrapper
+        else:
+            @functools.wraps(func)
+            def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
+                # Compute cache key based on function source + args
+                func_source, sub_dir, key_id = _compute_cache_components(
+                    func, args, kwargs, ignore_self, keys, key
+                )
+                cache_key = identify((func_source, sub_dir, key_id))
+                # Check global memory cache
+                with mem_lock:
+                    if cache_key in _GLOBAL_MEMORY_CACHE:
+                        return _GLOBAL_MEMORY_CACHE[cache_key]
+                # Compute result and store in cache
+                result = func(*args, **kwargs)
+                with mem_lock:
+                    _GLOBAL_MEMORY_CACHE[cache_key] = result
+                return result
+            return sync_wrapper
+    # Support both @imemoize and @imemoize(...)
+    if _func is None:
+        return decorator
+    else:
+        return decorator(_func)
+__all__ = ["memoize", "imemoize", "identify"]

speedy-utils 1.1.23__py3-none-any.whl → 1.1.25__py3-none-any.whl

speedy-utils 1.1.23py3-none-any.whl → 1.1.25py3-none-any.whl