PyPI - speedy-utils - Versions diffs - 1.1.22__py3-none-any.whl → 1.1.24__py3-none-any.whl - Mend

speedy-utils 1.1.22py3-none-any.whl → 1.1.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

llm_utils/__init__.py +19 -7
llm_utils/chat_format/__init__.py +2 -0
llm_utils/chat_format/display.py +115 -44
llm_utils/lm/__init__.py +20 -2
llm_utils/lm/llm.py +413 -0
llm_utils/lm/llm_signature.py +35 -0
llm_utils/lm/mixins.py +379 -0
llm_utils/lm/openai_memoize.py +18 -7
llm_utils/lm/signature.py +271 -0
llm_utils/lm/utils.py +61 -76
speedy_utils/__init__.py +28 -1
speedy_utils/all.py +30 -1
speedy_utils/common/utils_io.py +36 -26
speedy_utils/common/utils_misc.py +25 -1
speedy_utils/multi_worker/thread.py +145 -58
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/METADATA +1 -1
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/RECORD +19 -17
llm_utils/lm/llm_task.py +0 -614
llm_utils/lm/lm.py +0 -207
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/entry_points.txt +0 -0

llm_utils/lm/mixins.py ADDED Viewed

@@ -0,0 +1,379 @@
+"""Mixin classes for LLM functionality extensions."""
+import os
+import subprocess
+from time import sleep
+from typing import Any, Dict, List, Optional, Type, Union
+import requests
+from loguru import logger
+from openai import OpenAI
+from pydantic import BaseModel
+class TemperatureRangeMixin:
+    """Mixin for sampling with different temperature ranges."""
+    def temperature_range_sampling(
+        self,
+        input_data: Union[str, BaseModel, List[Dict]],
+        temperature_ranges: tuple[float, float],
+        n: int = 32,
+        response_model: Optional[Type[BaseModel] | Type[str]] = None,
+        **runtime_kwargs,
+    ) -> List[Dict[str, Any]]:
+        """
+        Sample LLM responses with a range of temperatures.
+        This method generates multiple responses by systematically varying
+        the temperature parameter, which controls randomness in the output.
+        Args:
+            input_data: Input data (string, BaseModel, or message list)
+            temperature_ranges: Tuple of (min_temp, max_temp) to sample
+            n: Number of temperature samples to generate (must be >= 2)
+            response_model: Optional response model override
+            **runtime_kwargs: Additional runtime parameters
+        Returns:
+            List of response dictionaries from all temperature samples
+        """
+        from speedy_utils.multi_worker.thread import multi_thread
+        min_temp, max_temp = temperature_ranges
+        if n < 2:
+            raise ValueError(f"n must be >= 2, got {n}")
+        step = (max_temp - min_temp) / (n - 1)
+        list_kwargs = []
+        for i in range(n):
+            kwargs = dict(
+                temperature=min_temp + i * step,
+                i=i,
+                **runtime_kwargs,
+            )
+            list_kwargs.append(kwargs)
+        def f(kwargs):
+            i = kwargs.pop("i")
+            sleep(i * 0.05)
+            return self.__inner_call__(
+                input_data,
+                response_model=response_model,
+                **kwargs,
+            )[0]
+        choices = multi_thread(f, list_kwargs, progress=False)
+        return [c for c in choices if c is not None]
+class TwoStepPydanticMixin:
+    """Mixin for two-step Pydantic parsing functionality."""
+    def two_step_pydantic_parse(
+        self,
+        input_data: Union[str, BaseModel, List[Dict]],
+        response_model: Type[BaseModel],
+        **runtime_kwargs,
+    ) -> List[Dict[str, Any]]:
+        """
+        Parse responses in two steps: text completion then Pydantic parsing.
+        This is useful for models that may include reasoning or extra text
+        before the JSON output.
+        Args:
+            input_data: Input data (string, BaseModel, or message list)
+            response_model: Pydantic model to parse into
+            **runtime_kwargs: Additional runtime parameters
+        Returns:
+            List of parsed response dictionaries
+        """
+        # Step 1: Get text completions
+        results = self.text_completion(input_data, **runtime_kwargs)
+        parsed_results = []
+        for result in results:
+            response_text = result["parsed"]
+            messages = result["messages"]
+            # Handle reasoning models that use <think> tags
+            if "</think>" in response_text:
+                response_text = response_text.split("</think>")[1]
+            try:
+                # Try direct parsing
+                parsed = response_model.model_validate_json(response_text)
+            except Exception:
+                # Fallback: use LLM to extract JSON
+                logger.warning("Failed to parse JSON directly, using LLM to extract")
+                _parsed_messages = [
+                    {
+                        "role": "system",
+                        "content": ("You are a helpful assistant that extracts JSON from text."),
+                    },
+                    {
+                        "role": "user",
+                        "content": (f"Extract JSON from the following text:\n{response_text}"),
+                    },
+                ]
+                parsed_result = self.pydantic_parse(
+                    _parsed_messages,
+                    response_model=response_model,
+                    **runtime_kwargs,
+                )[0]
+                parsed = parsed_result["parsed"]
+            parsed_results.append({"parsed": parsed, "messages": messages})
+        return parsed_results
+class VLLMMixin:
+    """Mixin for VLLM server management and LoRA operations."""
+    def _setup_vllm_server(self) -> None:
+        """
+        Setup VLLM server if vllm_cmd is provided.
+        This method handles:
+        - Server reuse logic
+        - Starting new servers
+        - Port management
+        Should be called from __init__.
+        """
+        from .utils import (
+            _extract_port_from_vllm_cmd,
+            _is_server_running,
+            _kill_vllm_on_port,
+            _start_vllm_server,
+            get_base_client,
+        )
+        if not hasattr(self, "vllm_cmd") or not self.vllm_cmd:
+            return
+        port = _extract_port_from_vllm_cmd(self.vllm_cmd)
+        reuse_existing = False
+        if self.vllm_reuse:
+            try:
+                reuse_client = get_base_client(port, cache=False)
+                models_response = reuse_client.models.list()
+                if getattr(models_response, "data", None):
+                    reuse_existing = True
+                    logger.info(
+                        f"VLLM server already running on port {port}, reusing existing server (vllm_reuse=True)"
+                    )
+                else:
+                    logger.info(f"No models returned from VLLM server on port {port}; starting a new server")
+            except Exception as exc:
+                logger.info(
+                    f"Unable to reach VLLM server on port {port} (list_models failed): {exc}. Starting a new server."
+                )
+        if not self.vllm_reuse:
+            if _is_server_running(port):
+                logger.info(f"VLLM server already running on port {port}, killing it first (vllm_reuse=False)")
+                _kill_vllm_on_port(port)
+            logger.info(f"Starting new VLLM server on port {port}")
+            self.vllm_process = _start_vllm_server(self.vllm_cmd, self.vllm_timeout)
+        elif not reuse_existing:
+            logger.info(f"Starting VLLM server on port {port}")
+            self.vllm_process = _start_vllm_server(self.vllm_cmd, self.vllm_timeout)
+    def _load_lora_adapter(self) -> None:
+        """
+        Load LoRA adapter from the specified lora_path.
+        This method:
+        1. Validates that lora_path is a valid LoRA directory
+        2. Checks if LoRA is already loaded (unless force_lora_unload)
+        3. Loads the LoRA adapter and updates the model name
+        """
+        from .utils import (
+            _is_lora_path,
+            _get_port_from_client,
+            _load_lora_adapter,
+        )
+        if not self.lora_path:
+            return
+        if not _is_lora_path(self.lora_path):
+            raise ValueError(f"Invalid LoRA path '{self.lora_path}': Directory must contain 'adapter_config.json'")
+        logger.info(f"Loading LoRA adapter from: {self.lora_path}")
+        # Get the expected LoRA name (basename of the path)
+        lora_name = os.path.basename(self.lora_path.rstrip("/\\"))
+        if not lora_name:  # Handle edge case of empty basename
+            lora_name = os.path.basename(os.path.dirname(self.lora_path))
+        # Get list of available models to check if LoRA is already loaded
+        try:
+            available_models = [m.id for m in self.client.models.list().data]
+        except Exception as e:
+            logger.warning(f"Failed to list models, proceeding with LoRA load: {str(e)[:100]}")
+            available_models = []
+        # Check if LoRA is already loaded
+        if lora_name in available_models and not self.force_lora_unload:
+            logger.info(f"LoRA adapter '{lora_name}' is already loaded, using existing model")
+            self.model_kwargs["model"] = lora_name
+            return
+        # Force unload if requested
+        if self.force_lora_unload and lora_name in available_models:
+            logger.info(f"Force unloading LoRA adapter '{lora_name}' before reloading")
+            port = _get_port_from_client(self.client)
+            if port is not None:
+                try:
+                    VLLMMixin.unload_lora(port, lora_name)
+                    logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to unload LoRA adapter: {str(e)[:100]}")
+        # Get port from client for API calls
+        port = _get_port_from_client(self.client)
+        if port is None:
+            raise ValueError(
+                f"Cannot load LoRA adapter '{self.lora_path}': "
+                f"Unable to determine port from client base_url. "
+                f"LoRA loading requires a client initialized with port."
+            )
+        try:
+            # Load the LoRA adapter
+            loaded_lora_name = _load_lora_adapter(self.lora_path, port)
+            logger.info(f"Successfully loaded LoRA adapter: {loaded_lora_name}")
+            # Update model name to the loaded LoRA name
+            self.model_kwargs["model"] = loaded_lora_name
+        except requests.RequestException as e:
+            # Check if error is due to LoRA already being loaded
+            error_msg = str(e)
+            if "400" in error_msg or "Bad Request" in error_msg:
+                logger.info(f"LoRA adapter may already be loaded, attempting to use '{lora_name}'")
+                # Refresh the model list to check if it's now available
+                try:
+                    updated_models = [m.id for m in self.client.models.list().data]
+                    if lora_name in updated_models:
+                        logger.info(f"Found LoRA adapter '{lora_name}' in updated model list")
+                        self.model_kwargs["model"] = lora_name
+                        return
+                except Exception:
+                    pass  # Fall through to original error
+            raise ValueError(f"Failed to load LoRA adapter from '{self.lora_path}': {error_msg[:100]}")
+    def unload_lora_adapter(self, lora_path: str) -> None:
+        """
+        Unload a LoRA adapter.
+        Args:
+            lora_path: Path to the LoRA adapter directory to unload
+        Raises:
+            ValueError: If unable to determine port from client
+        """
+        from .utils import _get_port_from_client, _unload_lora_adapter
+        port = _get_port_from_client(self.client)
+        if port is None:
+            raise ValueError(
+                "Cannot unload LoRA adapter: "
+                "Unable to determine port from client base_url. "
+                "LoRA operations require a client initialized with port."
+            )
+        _unload_lora_adapter(lora_path, port)
+        lora_name = os.path.basename(lora_path.rstrip("/\\"))
+        logger.info(f"Unloaded LoRA adapter: {lora_name}")
+    @staticmethod
+    def unload_lora(port: int, lora_name: str) -> None:
+        """
+        Static method to unload a LoRA adapter by name.
+        Args:
+            port: Port number for the API endpoint
+            lora_name: Name of the LoRA adapter to unload
+        Raises:
+            requests.RequestException: If the API call fails
+        """
+        try:
+            response = requests.post(
+                f"http://localhost:{port}/v1/unload_lora_adapter",
+                headers={
+                    "accept": "application/json",
+                    "Content-Type": "application/json",
+                },
+                json={"lora_name": lora_name, "lora_int_id": 0},
+            )
+            response.raise_for_status()
+            logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+        except requests.RequestException as e:
+            logger.error(f"Error unloading LoRA adapter '{lora_name}': {str(e)[:100]}")
+            raise
+    def cleanup_vllm_server(self) -> None:
+        """Stop the VLLM server process if started by this instance."""
+        from .utils import stop_vllm_process
+        if hasattr(self, "vllm_process") and self.vllm_process is not None:
+            stop_vllm_process(self.vllm_process)
+            self.vllm_process = None
+    @staticmethod
+    def kill_all_vllm() -> int:
+        """
+        Kill all tracked VLLM server processes.
+        Returns:
+            Number of processes killed
+        """
+        from .utils import kill_all_vllm_processes
+        return kill_all_vllm_processes()
+    @staticmethod
+    def kill_vllm_on_port(port: int) -> bool:
+        """
+        Kill VLLM server running on a specific port.
+        Args:
+            port: Port number to kill server on
+        Returns:
+            True if a server was killed, False if no server was running
+        """
+        from .utils import _kill_vllm_on_port
+        return _kill_vllm_on_port(port)
+class ModelUtilsMixin:
+    """Mixin for model utility methods."""
+    @staticmethod
+    def list_models(client: Union[OpenAI, int, str, None] = None) -> List[str]:
+        """
+        List available models from the OpenAI client.
+        Args:
+            client: OpenAI client, port number, or base_url string
+        Returns:
+            List of available model names
+        """
+        from .utils import get_base_client
+        client_instance = get_base_client(client, cache=False)
+        models = client_instance.models.list().data
+        return [m.id for m in models]

llm_utils/lm/openai_memoize.py CHANGED Viewed

@@ -42,13 +42,16 @@ class MOpenAI(OpenAI):
     def __init__(self, *args, cache=True, **kwargs):
         super().__init__(*args, **kwargs)
+        self._orig_post = self.post
         if cache:
-            # Create a memoized wrapper for the instance's post method.
-            # The memoize decorator now preserves exact type information,
-            # so no casting is needed.
-            orig_post = self.post
-            memoized = memoize(orig_post)
-            self.post = memoized
+            self.set_cache(cache)
+    def set_cache(self, cache: bool) -> None:
+        """Enable or disable caching of the post method."""
+        if cache and self.post == self._orig_post:
+            self.post = memoize(self._orig_post)  # type: ignore
+        elif not cache and self.post != self._orig_post:
+            self.post = self._orig_post
 class MAsyncOpenAI(AsyncOpenAI):
@@ -76,5 +79,13 @@ class MAsyncOpenAI(AsyncOpenAI):
     def __init__(self, *args, cache=True, **kwargs):
         super().__init__(*args, **kwargs)
+        self._orig_post = self.post
         if cache:
-            self.post = memoize(self.post) # type: ignore
+            self.set_cache(cache)
+    def set_cache(self, cache: bool) -> None:
+        """Enable or disable caching of the post method."""
+        if cache and self.post == self._orig_post:
+            self.post = memoize(self._orig_post)  # type: ignore
+        elif not cache and self.post != self._orig_post:
+            self.post = self._orig_post

llm_utils/lm/signature.py ADDED Viewed

@@ -0,0 +1,271 @@
+"""
+DSPy-like signature system for structured LLM interactions.
+This module provides a declarative way to define LLM input/output schemas
+with field descriptions and type annotations.
+"""
+from typing import Any, Dict, List, Type, get_type_hints, Annotated, get_origin, get_args, cast
+from pydantic import BaseModel, Field
+import inspect
+class _FieldProxy:
+    """Proxy that stores field information while appearing type-compatible."""
+    def __init__(self, field_type: str, desc: str = "", **kwargs):
+        self.field_type = field_type  # 'input' or 'output'
+        self.desc = desc
+        self.kwargs = kwargs
+def InputField(desc: str = "", **kwargs) -> Any:
+    """Create an input field descriptor."""
+    return cast(Any, _FieldProxy('input', desc=desc, **kwargs))
+def OutputField(desc: str = "", **kwargs) -> Any:
+    """Create an output field descriptor."""
+    return cast(Any, _FieldProxy('output', desc=desc, **kwargs))
+# Type aliases for cleaner syntax
+def Input(desc: str = "", **kwargs) -> Any:
+    """Create an input field descriptor that's compatible with type annotations."""
+    return InputField(desc=desc, **kwargs)
+def Output(desc: str = "", **kwargs) -> Any:
+    """Create an output field descriptor that's compatible with type annotations."""
+    return OutputField(desc=desc, **kwargs)
+class SignatureMeta(type):
+    """Metaclass for Signature that processes field annotations."""
+    def __new__(cls, name, bases, namespace, **kwargs):
+        # Get type hints for this class
+        annotations = namespace.get('__annotations__', {})
+        # Store field information
+        input_fields = {}
+        output_fields = {}
+        for field_name, field_type in annotations.items():
+            field_value = namespace.get(field_name)
+            field_desc = None
+            # Handle Annotated[Type, Field(...)] syntax using get_origin/get_args
+            if get_origin(field_type) is Annotated:
+                # Extract args from Annotated type
+                args = get_args(field_type)
+                if args:
+                    # First arg is the actual type
+                    field_type = args[0]
+                    # Look for _FieldProxy in the metadata
+                    for metadata in args[1:]:
+                        if isinstance(metadata, _FieldProxy):
+                            field_desc = metadata
+                            break
+            # Handle old syntax with direct assignment
+            if field_desc is None and isinstance(field_value, _FieldProxy):
+                field_desc = field_value
+            # Store field information
+            if field_desc and field_desc.field_type == 'input':
+                input_fields[field_name] = {
+                    'type': field_type,
+                    'desc': field_desc.desc,
+                    **field_desc.kwargs
+                }
+            elif field_desc and field_desc.field_type == 'output':
+                output_fields[field_name] = {
+                    'type': field_type,
+                    'desc': field_desc.desc,
+                    **field_desc.kwargs
+                }
+        # Store in class attributes
+        namespace['_input_fields'] = input_fields
+        namespace['_output_fields'] = output_fields
+        return super().__new__(cls, name, bases, namespace)
+class Signature(metaclass=SignatureMeta):
+    """Base class for defining LLM signatures with input and output fields."""
+    _input_fields: Dict[str, Dict[str, Any]] = {}
+    _output_fields: Dict[str, Dict[str, Any]] = {}
+    def __init__(self, **kwargs):
+        """Initialize signature with field values."""
+        for field_name, value in kwargs.items():
+            setattr(self, field_name, value)
+    @classmethod
+    def get_instruction(cls) -> str:
+        """Generate instruction text from docstring and field descriptions."""
+        instruction = cls.__doc__ or "Complete the following task."
+        instruction = instruction.strip()
+        # Add input field descriptions
+        if cls._input_fields:
+            instruction += "\n\n**Input Fields:**\n"
+            for field_name, field_info in cls._input_fields.items():
+                desc = field_info.get('desc', '')
+                field_type = field_info['type']
+                type_str = getattr(field_type, '__name__', str(field_type))
+                instruction += f"- {field_name} ({type_str}): {desc}\n"
+        # Add output field descriptions
+        if cls._output_fields:
+            instruction += "\n**Output Fields:**\n"
+            for field_name, field_info in cls._output_fields.items():
+                desc = field_info.get('desc', '')
+                field_type = field_info['type']
+                type_str = getattr(field_type, '__name__', str(field_type))
+                instruction += f"- {field_name} ({type_str}): {desc}\n"
+        return instruction
+    @classmethod
+    def get_input_model(cls) -> Type[BaseModel]:
+        """Generate Pydantic input model from input fields."""
+        if not cls._input_fields:
+            raise ValueError(f"Signature {cls.__name__} must have at least one input field")
+        fields = {}
+        annotations = {}
+        for field_name, field_info in cls._input_fields.items():
+            field_type = field_info['type']
+            desc = field_info.get('desc', '')
+            # Create Pydantic field
+            field_kwargs = {k: v for k, v in field_info.items()
+                          if k not in ['type', 'desc']}
+            if desc:
+                field_kwargs['description'] = desc
+            fields[field_name] = Field(**field_kwargs) if field_kwargs else Field()
+            annotations[field_name] = field_type
+        # Create dynamic Pydantic model
+        input_model = type(
+            f"{cls.__name__}Input",
+            (BaseModel,),
+            {
+                '__annotations__': annotations,
+                **fields
+            }
+        )
+        return input_model
+    @classmethod
+    def get_output_model(cls) -> Type[BaseModel]:
+        """Generate Pydantic output model from output fields."""
+        if not cls._output_fields:
+            raise ValueError(f"Signature {cls.__name__} must have at least one output field")
+        fields = {}
+        annotations = {}
+        for field_name, field_info in cls._output_fields.items():
+            field_type = field_info['type']
+            desc = field_info.get('desc', '')
+            # Create Pydantic field
+            field_kwargs = {k: v for k, v in field_info.items()
+                          if k not in ['type', 'desc']}
+            if desc:
+                field_kwargs['description'] = desc
+            fields[field_name] = Field(**field_kwargs) if field_kwargs else Field()
+            annotations[field_name] = field_type
+        # Create dynamic Pydantic model
+        output_model = type(
+            f"{cls.__name__}Output",
+            (BaseModel,),
+            {
+                '__annotations__': annotations,
+                **fields
+            }
+        )
+        return output_model
+    def format_input(self, **kwargs) -> str:
+        """Format input fields as a string."""
+        input_data = {}
+        # Collect input field values
+        for field_name in self._input_fields:
+            if field_name in kwargs:
+                input_data[field_name] = kwargs[field_name]
+            elif hasattr(self, field_name):
+                input_data[field_name] = getattr(self, field_name)
+        # Format as key-value pairs
+        formatted_lines = []
+        for field_name, value in input_data.items():
+            field_info = self._input_fields[field_name]
+            desc = field_info.get('desc', '')
+            if desc:
+                formatted_lines.append(f"{field_name} ({desc}): {value}")
+            else:
+                formatted_lines.append(f"{field_name}: {value}")
+        return '\n'.join(formatted_lines)
+# Export functions for easier importing
+__all__ = ['Signature', 'InputField', 'OutputField', 'Input', 'Output']
+# Example usage for testing
+if __name__ == "__main__":
+    # Define a signature like DSPy - using Annotated approach
+    class FactJudge(Signature):
+        """Judge if the answer is factually correct based on the context."""
+        context: Annotated[str, Input("Context for the prediction")]
+        question: Annotated[str, Input("Question to be answered")]
+        answer: Annotated[str, Input("Answer for the question")]
+        factually_correct: Annotated[bool, Output("Is the answer factually correct based on the context?")]
+    # Alternative syntax still works but will show type warnings
+    class FactJudgeOldSyntax(Signature):
+        """Judge if the answer is factually correct based on the context."""
+        context: str = InputField(desc="Context for the prediction")  # type: ignore
+        question: str = InputField(desc="Question to be answered")  # type: ignore
+        answer: str = InputField(desc="Answer for the question")  # type: ignore
+        factually_correct: bool = OutputField(desc="Is the answer factually correct based on the context?")  # type: ignore
+    # Test both signatures
+    for judge_class in [FactJudge, FactJudgeOldSyntax]:
+        print(f"\n=== Testing {judge_class.__name__} ===")
+        print("Instruction:")
+        print(judge_class.get_instruction())
+        print("\nInput Model:")
+        input_model = judge_class.get_input_model()
+        print(input_model.model_json_schema())
+        print("\nOutput Model:")
+        output_model = judge_class.get_output_model()
+        print(output_model.model_json_schema())
+        # Test instance usage
+        judge = judge_class()
+        input_text = judge.format_input(
+            context="The sky is blue during daytime.",
+            question="What color is the sky?",
+            answer="Blue"
+        )
+        print("\nFormatted Input:")
+        print(input_text)

speedy-utils 1.1.22__py3-none-any.whl → 1.1.24__py3-none-any.whl

speedy-utils 1.1.22py3-none-any.whl → 1.1.24py3-none-any.whl