PyPI - speedy-utils - Versions diffs - 1.1.23__py3-none-any.whl → 1.1.25__py3-none-any.whl - Mend

speedy-utils 1.1.23py3-none-any.whl → 1.1.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

llm_utils/__init__.py +12 -8
llm_utils/chat_format/__init__.py +2 -0
llm_utils/chat_format/display.py +115 -44
llm_utils/lm/__init__.py +14 -6
llm_utils/lm/llm.py +413 -0
llm_utils/lm/llm_signature.py +35 -0
llm_utils/lm/mixins.py +379 -0
llm_utils/lm/openai_memoize.py +18 -7
llm_utils/lm/signature.py +26 -37
llm_utils/lm/utils.py +61 -76
speedy_utils/__init__.py +31 -2
speedy_utils/all.py +30 -1
speedy_utils/common/utils_cache.py +142 -1
speedy_utils/common/utils_io.py +36 -26
speedy_utils/common/utils_misc.py +25 -1
speedy_utils/multi_worker/thread.py +145 -58
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/METADATA +1 -1
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/RECORD +20 -19
llm_utils/lm/llm_as_a_judge.py +0 -390
llm_utils/lm/llm_task.py +0 -614
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.23.dist-info → speedy_utils-1.1.25.dist-info}/entry_points.txt +0 -0

llm_utils/lm/mixins.py ADDED Viewed

@@ -0,0 +1,379 @@
+"""Mixin classes for LLM functionality extensions."""
+import os
+import subprocess
+from time import sleep
+from typing import Any, Dict, List, Optional, Type, Union
+import requests
+from loguru import logger
+from openai import OpenAI
+from pydantic import BaseModel
+class TemperatureRangeMixin:
+    """Mixin for sampling with different temperature ranges."""
+    def temperature_range_sampling(
+        self,
+        input_data: Union[str, BaseModel, List[Dict]],
+        temperature_ranges: tuple[float, float],
+        n: int = 32,
+        response_model: Optional[Type[BaseModel] | Type[str]] = None,
+        **runtime_kwargs,
+    ) -> List[Dict[str, Any]]:
+        """
+        Sample LLM responses with a range of temperatures.
+        This method generates multiple responses by systematically varying
+        the temperature parameter, which controls randomness in the output.
+        Args:
+            input_data: Input data (string, BaseModel, or message list)
+            temperature_ranges: Tuple of (min_temp, max_temp) to sample
+            n: Number of temperature samples to generate (must be >= 2)
+            response_model: Optional response model override
+            **runtime_kwargs: Additional runtime parameters
+        Returns:
+            List of response dictionaries from all temperature samples
+        """
+        from speedy_utils.multi_worker.thread import multi_thread
+        min_temp, max_temp = temperature_ranges
+        if n < 2:
+            raise ValueError(f"n must be >= 2, got {n}")
+        step = (max_temp - min_temp) / (n - 1)
+        list_kwargs = []
+        for i in range(n):
+            kwargs = dict(
+                temperature=min_temp + i * step,
+                i=i,
+                **runtime_kwargs,
+            )
+            list_kwargs.append(kwargs)
+        def f(kwargs):
+            i = kwargs.pop("i")
+            sleep(i * 0.05)
+            return self.__inner_call__(
+                input_data,
+                response_model=response_model,
+                **kwargs,
+            )[0]
+        choices = multi_thread(f, list_kwargs, progress=False)
+        return [c for c in choices if c is not None]
+class TwoStepPydanticMixin:
+    """Mixin for two-step Pydantic parsing functionality."""
+    def two_step_pydantic_parse(
+        self,
+        input_data: Union[str, BaseModel, List[Dict]],
+        response_model: Type[BaseModel],
+        **runtime_kwargs,
+    ) -> List[Dict[str, Any]]:
+        """
+        Parse responses in two steps: text completion then Pydantic parsing.
+        This is useful for models that may include reasoning or extra text
+        before the JSON output.
+        Args:
+            input_data: Input data (string, BaseModel, or message list)
+            response_model: Pydantic model to parse into
+            **runtime_kwargs: Additional runtime parameters
+        Returns:
+            List of parsed response dictionaries
+        """
+        # Step 1: Get text completions
+        results = self.text_completion(input_data, **runtime_kwargs)
+        parsed_results = []
+        for result in results:
+            response_text = result["parsed"]
+            messages = result["messages"]
+            # Handle reasoning models that use <think> tags
+            if "</think>" in response_text:
+                response_text = response_text.split("</think>")[1]
+            try:
+                # Try direct parsing
+                parsed = response_model.model_validate_json(response_text)
+            except Exception:
+                # Fallback: use LLM to extract JSON
+                logger.warning("Failed to parse JSON directly, using LLM to extract")
+                _parsed_messages = [
+                    {
+                        "role": "system",
+                        "content": ("You are a helpful assistant that extracts JSON from text."),
+                    },
+                    {
+                        "role": "user",
+                        "content": (f"Extract JSON from the following text:\n{response_text}"),
+                    },
+                ]
+                parsed_result = self.pydantic_parse(
+                    _parsed_messages,
+                    response_model=response_model,
+                    **runtime_kwargs,
+                )[0]
+                parsed = parsed_result["parsed"]
+            parsed_results.append({"parsed": parsed, "messages": messages})
+        return parsed_results
+class VLLMMixin:
+    """Mixin for VLLM server management and LoRA operations."""
+    def _setup_vllm_server(self) -> None:
+        """
+        Setup VLLM server if vllm_cmd is provided.
+        This method handles:
+        - Server reuse logic
+        - Starting new servers
+        - Port management
+        Should be called from __init__.
+        """
+        from .utils import (
+            _extract_port_from_vllm_cmd,
+            _is_server_running,
+            _kill_vllm_on_port,
+            _start_vllm_server,
+            get_base_client,
+        )
+        if not hasattr(self, "vllm_cmd") or not self.vllm_cmd:
+            return
+        port = _extract_port_from_vllm_cmd(self.vllm_cmd)
+        reuse_existing = False
+        if self.vllm_reuse:
+            try:
+                reuse_client = get_base_client(port, cache=False)
+                models_response = reuse_client.models.list()
+                if getattr(models_response, "data", None):
+                    reuse_existing = True
+                    logger.info(
+                        f"VLLM server already running on port {port}, reusing existing server (vllm_reuse=True)"
+                    )
+                else:
+                    logger.info(f"No models returned from VLLM server on port {port}; starting a new server")
+            except Exception as exc:
+                logger.info(
+                    f"Unable to reach VLLM server on port {port} (list_models failed): {exc}. Starting a new server."
+                )
+        if not self.vllm_reuse:
+            if _is_server_running(port):
+                logger.info(f"VLLM server already running on port {port}, killing it first (vllm_reuse=False)")
+                _kill_vllm_on_port(port)
+            logger.info(f"Starting new VLLM server on port {port}")
+            self.vllm_process = _start_vllm_server(self.vllm_cmd, self.vllm_timeout)
+        elif not reuse_existing:
+            logger.info(f"Starting VLLM server on port {port}")
+            self.vllm_process = _start_vllm_server(self.vllm_cmd, self.vllm_timeout)
+    def _load_lora_adapter(self) -> None:
+        """
+        Load LoRA adapter from the specified lora_path.
+        This method:
+        1. Validates that lora_path is a valid LoRA directory
+        2. Checks if LoRA is already loaded (unless force_lora_unload)
+        3. Loads the LoRA adapter and updates the model name
+        """
+        from .utils import (
+            _is_lora_path,
+            _get_port_from_client,
+            _load_lora_adapter,
+        )
+        if not self.lora_path:
+            return
+        if not _is_lora_path(self.lora_path):
+            raise ValueError(f"Invalid LoRA path '{self.lora_path}': Directory must contain 'adapter_config.json'")
+        logger.info(f"Loading LoRA adapter from: {self.lora_path}")
+        # Get the expected LoRA name (basename of the path)
+        lora_name = os.path.basename(self.lora_path.rstrip("/\\"))
+        if not lora_name:  # Handle edge case of empty basename
+            lora_name = os.path.basename(os.path.dirname(self.lora_path))
+        # Get list of available models to check if LoRA is already loaded
+        try:
+            available_models = [m.id for m in self.client.models.list().data]
+        except Exception as e:
+            logger.warning(f"Failed to list models, proceeding with LoRA load: {str(e)[:100]}")
+            available_models = []
+        # Check if LoRA is already loaded
+        if lora_name in available_models and not self.force_lora_unload:
+            logger.info(f"LoRA adapter '{lora_name}' is already loaded, using existing model")
+            self.model_kwargs["model"] = lora_name
+            return
+        # Force unload if requested
+        if self.force_lora_unload and lora_name in available_models:
+            logger.info(f"Force unloading LoRA adapter '{lora_name}' before reloading")
+            port = _get_port_from_client(self.client)
+            if port is not None:
+                try:
+                    VLLMMixin.unload_lora(port, lora_name)
+                    logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to unload LoRA adapter: {str(e)[:100]}")
+        # Get port from client for API calls
+        port = _get_port_from_client(self.client)
+        if port is None:
+            raise ValueError(
+                f"Cannot load LoRA adapter '{self.lora_path}': "
+                f"Unable to determine port from client base_url. "
+                f"LoRA loading requires a client initialized with port."
+            )
+        try:
+            # Load the LoRA adapter
+            loaded_lora_name = _load_lora_adapter(self.lora_path, port)
+            logger.info(f"Successfully loaded LoRA adapter: {loaded_lora_name}")
+            # Update model name to the loaded LoRA name
+            self.model_kwargs["model"] = loaded_lora_name
+        except requests.RequestException as e:
+            # Check if error is due to LoRA already being loaded
+            error_msg = str(e)
+            if "400" in error_msg or "Bad Request" in error_msg:
+                logger.info(f"LoRA adapter may already be loaded, attempting to use '{lora_name}'")
+                # Refresh the model list to check if it's now available
+                try:
+                    updated_models = [m.id for m in self.client.models.list().data]
+                    if lora_name in updated_models:
+                        logger.info(f"Found LoRA adapter '{lora_name}' in updated model list")
+                        self.model_kwargs["model"] = lora_name
+                        return
+                except Exception:
+                    pass  # Fall through to original error
+            raise ValueError(f"Failed to load LoRA adapter from '{self.lora_path}': {error_msg[:100]}")
+    def unload_lora_adapter(self, lora_path: str) -> None:
+        """
+        Unload a LoRA adapter.
+        Args:
+            lora_path: Path to the LoRA adapter directory to unload
+        Raises:
+            ValueError: If unable to determine port from client
+        """
+        from .utils import _get_port_from_client, _unload_lora_adapter
+        port = _get_port_from_client(self.client)
+        if port is None:
+            raise ValueError(
+                "Cannot unload LoRA adapter: "
+                "Unable to determine port from client base_url. "
+                "LoRA operations require a client initialized with port."
+            )
+        _unload_lora_adapter(lora_path, port)
+        lora_name = os.path.basename(lora_path.rstrip("/\\"))
+        logger.info(f"Unloaded LoRA adapter: {lora_name}")
+    @staticmethod
+    def unload_lora(port: int, lora_name: str) -> None:
+        """
+        Static method to unload a LoRA adapter by name.
+        Args:
+            port: Port number for the API endpoint
+            lora_name: Name of the LoRA adapter to unload
+        Raises:
+            requests.RequestException: If the API call fails
+        """
+        try:
+            response = requests.post(
+                f"http://localhost:{port}/v1/unload_lora_adapter",
+                headers={
+                    "accept": "application/json",
+                    "Content-Type": "application/json",
+                },
+                json={"lora_name": lora_name, "lora_int_id": 0},
+            )
+            response.raise_for_status()
+            logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+        except requests.RequestException as e:
+            logger.error(f"Error unloading LoRA adapter '{lora_name}': {str(e)[:100]}")
+            raise
+    def cleanup_vllm_server(self) -> None:
+        """Stop the VLLM server process if started by this instance."""
+        from .utils import stop_vllm_process
+        if hasattr(self, "vllm_process") and self.vllm_process is not None:
+            stop_vllm_process(self.vllm_process)
+            self.vllm_process = None
+    @staticmethod
+    def kill_all_vllm() -> int:
+        """
+        Kill all tracked VLLM server processes.
+        Returns:
+            Number of processes killed
+        """
+        from .utils import kill_all_vllm_processes
+        return kill_all_vllm_processes()
+    @staticmethod
+    def kill_vllm_on_port(port: int) -> bool:
+        """
+        Kill VLLM server running on a specific port.
+        Args:
+            port: Port number to kill server on
+        Returns:
+            True if a server was killed, False if no server was running
+        """
+        from .utils import _kill_vllm_on_port
+        return _kill_vllm_on_port(port)
+class ModelUtilsMixin:
+    """Mixin for model utility methods."""
+    @staticmethod
+    def list_models(client: Union[OpenAI, int, str, None] = None) -> List[str]:
+        """
+        List available models from the OpenAI client.
+        Args:
+            client: OpenAI client, port number, or base_url string
+        Returns:
+            List of available model names
+        """
+        from .utils import get_base_client
+        client_instance = get_base_client(client, cache=False)
+        models = client_instance.models.list().data
+        return [m.id for m in models]

llm_utils/lm/openai_memoize.py CHANGED Viewed

@@ -42,13 +42,16 @@ class MOpenAI(OpenAI):
     def __init__(self, *args, cache=True, **kwargs):
         super().__init__(*args, **kwargs)
+        self._orig_post = self.post
         if cache:
-            # Create a memoized wrapper for the instance's post method.
-            # The memoize decorator now preserves exact type information,
-            # so no casting is needed.
-            orig_post = self.post
-            memoized = memoize(orig_post)
-            self.post = memoized
+            self.set_cache(cache)
+    def set_cache(self, cache: bool) -> None:
+        """Enable or disable caching of the post method."""
+        if cache and self.post == self._orig_post:
+            self.post = memoize(self._orig_post)  # type: ignore
+        elif not cache and self.post != self._orig_post:
+            self.post = self._orig_post
 class MAsyncOpenAI(AsyncOpenAI):
@@ -76,5 +79,13 @@ class MAsyncOpenAI(AsyncOpenAI):
     def __init__(self, *args, cache=True, **kwargs):
         super().__init__(*args, **kwargs)
+        self._orig_post = self.post
         if cache:
-            self.post = memoize(self.post) # type: ignore
+            self.set_cache(cache)
+    def set_cache(self, cache: bool) -> None:
+        """Enable or disable caching of the post method."""
+        if cache and self.post == self._orig_post:
+            self.post = memoize(self._orig_post)  # type: ignore
+        elif not cache and self.post != self._orig_post:
+            self.post = self._orig_post

llm_utils/lm/signature.py CHANGED Viewed

@@ -5,43 +5,38 @@ This module provides a declarative way to define LLM input/output schemas
 with field descriptions and type annotations.
 """
-from typing import Any, Dict, List, Type, Union, get_type_hints, Annotated, get_origin, get_args
+from typing import Any, Dict, List, Type, get_type_hints, Annotated, get_origin, get_args, cast
 from pydantic import BaseModel, Field
 import inspect
-class InputField:
-    """Represents an input field in a signature."""
+class _FieldProxy:
+    """Proxy that stores field information while appearing type-compatible."""
-    def __init__(self, desc: str = "", **kwargs):
+    def __init__(self, field_type: str, desc: str = "", **kwargs):
+        self.field_type = field_type  # 'input' or 'output'
         self.desc = desc
         self.kwargs = kwargs
-    def __class_getitem__(cls, item):
-        """Support for InputField[type] syntax."""
-        return item
-class OutputField:
-    """Represents an output field in a signature."""
-    def __init__(self, desc: str = "", **kwargs):
-        self.desc = desc
-        self.kwargs = kwargs
-    def __class_getitem__(cls, item):
-        """Support for OutputField[type] syntax."""
-        return item
+def InputField(desc: str = "", **kwargs) -> Any:
+    """Create an input field descriptor."""
+    return cast(Any, _FieldProxy('input', desc=desc, **kwargs))
+def OutputField(desc: str = "", **kwargs) -> Any:
+    """Create an output field descriptor."""
+    return cast(Any, _FieldProxy('output', desc=desc, **kwargs))
 # Type aliases for cleaner syntax
 def Input(desc: str = "", **kwargs) -> Any:
-    """Create an input field descriptor."""
+    """Create an input field descriptor that's compatible with type annotations."""
     return InputField(desc=desc, **kwargs)
 def Output(desc: str = "", **kwargs) -> Any:
-    """Create an output field descriptor."""
+    """Create an output field descriptor that's compatible with type annotations."""
     return OutputField(desc=desc, **kwargs)
@@ -67,24 +62,24 @@ class SignatureMeta(type):
                 if args:
                     # First arg is the actual type
                     field_type = args[0]
-                    # Look for InputField or OutputField in the metadata
+                    # Look for _FieldProxy in the metadata
                     for metadata in args[1:]:
-                        if isinstance(metadata, (InputField, OutputField)):
+                        if isinstance(metadata, _FieldProxy):
                             field_desc = metadata
                             break
             # Handle old syntax with direct assignment
-            if field_desc is None and isinstance(field_value, (InputField, OutputField)):
+            if field_desc is None and isinstance(field_value, _FieldProxy):
                 field_desc = field_value
             # Store field information
-            if isinstance(field_desc, InputField):
+            if field_desc and field_desc.field_type == 'input':
                 input_fields[field_name] = {
                     'type': field_type,
                     'desc': field_desc.desc,
                     **field_desc.kwargs
                 }
-            elif isinstance(field_desc, OutputField):
+            elif field_desc and field_desc.field_type == 'output':
                 output_fields[field_name] = {
                     'type': field_type,
                     'desc': field_desc.desc,
@@ -136,10 +131,10 @@ class Signature(metaclass=SignatureMeta):
         return instruction
     @classmethod
-    def get_input_model(cls) -> Union[Type[BaseModel], type[str]]:
+    def get_input_model(cls) -> Type[BaseModel]:
         """Generate Pydantic input model from input fields."""
         if not cls._input_fields:
-            return str
+            raise ValueError(f"Signature {cls.__name__} must have at least one input field")
         fields = {}
         annotations = {}
@@ -170,10 +165,10 @@ class Signature(metaclass=SignatureMeta):
         return input_model
     @classmethod
-    def get_output_model(cls) -> Union[Type[BaseModel], type[str]]:
+    def get_output_model(cls) -> Type[BaseModel]:
         """Generate Pydantic output model from output fields."""
         if not cls._output_fields:
-            return str
+            raise ValueError(f"Signature {cls.__name__} must have at least one output field")
         fields = {}
         annotations = {}
@@ -259,17 +254,11 @@ if __name__ == "__main__":
         print("\nInput Model:")
         input_model = judge_class.get_input_model()
-        if input_model is not str and hasattr(input_model, 'model_json_schema'):
-            print(input_model.model_json_schema())  # type: ignore
-        else:
-            print("String input model")
+        print(input_model.model_json_schema())
         print("\nOutput Model:")
         output_model = judge_class.get_output_model()
-        if output_model is not str and hasattr(output_model, 'model_json_schema'):
-            print(output_model.model_json_schema())  # type: ignore
-        else:
-            print("String output model")
+        print(output_model.model_json_schema())
         # Test instance usage
         judge = judge_class()

speedy-utils 1.1.23__py3-none-any.whl → 1.1.25__py3-none-any.whl

speedy-utils 1.1.23py3-none-any.whl → 1.1.25py3-none-any.whl