PyPI - speedy-utils - Versions diffs - 1.1.22__py3-none-any.whl → 1.1.24__py3-none-any.whl - Mend

speedy-utils 1.1.22py3-none-any.whl → 1.1.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

llm_utils/__init__.py +19 -7
llm_utils/chat_format/__init__.py +2 -0
llm_utils/chat_format/display.py +115 -44
llm_utils/lm/__init__.py +20 -2
llm_utils/lm/llm.py +413 -0
llm_utils/lm/llm_signature.py +35 -0
llm_utils/lm/mixins.py +379 -0
llm_utils/lm/openai_memoize.py +18 -7
llm_utils/lm/signature.py +271 -0
llm_utils/lm/utils.py +61 -76
speedy_utils/__init__.py +28 -1
speedy_utils/all.py +30 -1
speedy_utils/common/utils_io.py +36 -26
speedy_utils/common/utils_misc.py +25 -1
speedy_utils/multi_worker/thread.py +145 -58
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/METADATA +1 -1
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/RECORD +19 -17
llm_utils/lm/llm_task.py +0 -614
llm_utils/lm/lm.py +0 -207
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.22.dist-info → speedy_utils-1.1.24.dist-info}/entry_points.txt +0 -0

llm_utils/lm/llm.py ADDED Viewed

@@ -0,0 +1,413 @@
+# type: ignore
+"""
+Simplified LLM Task module for handling language model interactions with structured input/output.
+"""
+import os
+import subprocess
+from typing import Any, Dict, List, Optional, Type, Union, cast
+import requests
+from loguru import logger
+from openai import OpenAI, AuthenticationError, BadRequestError, RateLimitError
+from openai.types.chat import ChatCompletionMessageParam
+from pydantic import BaseModel
+from speedy_utils.common.utils_io import jdumps
+from .utils import (
+    _extract_port_from_vllm_cmd,
+    _start_vllm_server,
+    _kill_vllm_on_port,
+    _is_server_running,
+    get_base_client,
+    _is_lora_path,
+    _get_port_from_client,
+    _load_lora_adapter,
+    _unload_lora_adapter,
+    kill_all_vllm_processes,
+    stop_vllm_process,
+)
+from .base_prompt_builder import BasePromptBuilder
+from .mixins import (
+    TemperatureRangeMixin,
+    TwoStepPydanticMixin,
+    VLLMMixin,
+    ModelUtilsMixin,
+)
+# Type aliases for better readability
+Messages = List[ChatCompletionMessageParam]
+class LLM(
+    TemperatureRangeMixin,
+    TwoStepPydanticMixin,
+    VLLMMixin,
+    ModelUtilsMixin,
+):
+    """LLM task with structured input/output handling."""
+    def __init__(
+        self,
+        instruction: Optional[str] = None,
+        input_model: Union[Type[BaseModel], type[str]] = str,
+        output_model: Type[BaseModel] | Type[str] = None,
+        client: Union[OpenAI, int, str, None] = None,
+        cache=True,
+        is_reasoning_model: bool = False,
+        force_lora_unload: bool = False,
+        lora_path: Optional[str] = None,
+        vllm_cmd: Optional[str] = None,
+        vllm_timeout: int = 1200,
+        vllm_reuse: bool = True,
+        **model_kwargs,
+    ):
+        """Initialize LLMTask."""
+        self.instruction = instruction
+        self.input_model = input_model
+        self.output_model = output_model
+        self.model_kwargs = model_kwargs
+        self.is_reasoning_model = is_reasoning_model
+        self.force_lora_unload = force_lora_unload
+        self.lora_path = lora_path
+        self.vllm_cmd = vllm_cmd
+        self.vllm_timeout = vllm_timeout
+        self.vllm_reuse = vllm_reuse
+        self.vllm_process: Optional[subprocess.Popen] = None
+        self.last_ai_response = None  # Store raw response from client
+        self.cache = cache
+        # Handle VLLM server startup if vllm_cmd is provided
+        if self.vllm_cmd:
+            self._setup_vllm_server()
+            # Set client to use the VLLM server port if not explicitly provided
+            port = _extract_port_from_vllm_cmd(self.vllm_cmd)
+            if client is None:
+                client = port
+        self.client = get_base_client(client, cache=cache, vllm_cmd=self.vllm_cmd, vllm_process=self.vllm_process)
+        # check connection of client
+        try:
+            self.client.models.list()
+        except Exception as e:
+            logger.error(f"Failed to connect to OpenAI client: {str(e)}, base_url={self.client.base_url}")
+            raise e
+        if not self.model_kwargs.get("model", ""):
+            self.model_kwargs["model"] = self.client.models.list().data[0].id
+        # Handle LoRA loading if lora_path is provided
+        if self.lora_path:
+            self._load_lora_adapter()
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit with cleanup."""
+        self.cleanup_vllm_server()
+    def _prepare_input(self, input_data: Union[str, BaseModel, List[Dict]]) -> Messages:
+        """Convert input to messages format."""
+        if isinstance(input_data, list):
+            assert isinstance(input_data[0], dict) and "role" in input_data[0], (
+                "If input_data is a list, it must be a list of messages with 'role' and 'content' keys."
+            )
+            return cast(Messages, input_data)
+        else:
+            # Convert input to string format
+            if isinstance(input_data, str):
+                user_content = input_data
+            elif hasattr(input_data, "model_dump_json"):
+                user_content = input_data.model_dump_json()
+            elif isinstance(input_data, dict):
+                user_content = jdumps(input_data)
+            else:
+                user_content = str(input_data)
+            # Build messages
+            messages = (
+                [
+                    {"role": "system", "content": self.instruction},
+                ]
+                if self.instruction is not None
+                else []
+            )
+            messages.append({"role": "user", "content": user_content})
+            return cast(Messages, messages)
+    def text_completion(self, input_data: Union[str, BaseModel, list[Dict]], **runtime_kwargs) -> List[Dict[str, Any]]:
+        """Execute LLM task and return text responses."""
+        # Prepare messages
+        messages = self._prepare_input(input_data)
+        # Merge runtime kwargs with default model kwargs (runtime takes precedence)
+        effective_kwargs = {**self.model_kwargs, **runtime_kwargs}
+        model_name = effective_kwargs.get("model", self.model_kwargs["model"])
+        # Extract model name from kwargs for API call
+        api_kwargs = {k: v for k, v in effective_kwargs.items() if k != "model"}
+        try:
+            completion = self.client.chat.completions.create(model=model_name, messages=messages, **api_kwargs)
+            # Store raw response from client
+            self.last_ai_response = completion
+        except (AuthenticationError, RateLimitError, BadRequestError) as exc:
+            error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
+            logger.error(error_msg)
+            raise
+        except Exception as e:
+            is_length_error = "Length" in str(e) or "maximum context length" in str(e)
+            if is_length_error:
+                raise ValueError(f"Input too long for model {model_name}. Error: {str(e)[:100]}...")
+            # Re-raise all other exceptions
+            raise
+        # print(completion)
+        results: List[Dict[str, Any]] = []
+        for choice in completion.choices:
+            choice_messages = cast(
+                Messages,
+                messages + [{"role": "assistant", "content": choice.message.content}],
+            )
+            result_dict = {"parsed": choice.message.content, "messages": choice_messages}
+            # Add reasoning content if this is a reasoning model
+            if self.is_reasoning_model and hasattr(choice.message, "reasoning_content"):
+                result_dict["reasoning_content"] = choice.message.reasoning_content
+            results.append(result_dict)
+        return results
+    def pydantic_parse(
+        self,
+        input_data: Union[str, BaseModel, list[Dict]],
+        response_model: Optional[Type[BaseModel]] | Type[str] = None,
+        **runtime_kwargs,
+    ) -> List[Dict[str, Any]]:
+        """Execute LLM task and return parsed Pydantic model responses."""
+        # Prepare messages
+        messages = self._prepare_input(input_data)
+        # Merge runtime kwargs with default model kwargs (runtime takes precedence)
+        effective_kwargs = {**self.model_kwargs, **runtime_kwargs}
+        model_name = effective_kwargs.get("model", self.model_kwargs["model"])
+        # Extract model name from kwargs for API call
+        api_kwargs = {k: v for k, v in effective_kwargs.items() if k != "model"}
+        pydantic_model_to_use_opt = response_model or self.output_model
+        if pydantic_model_to_use_opt is None:
+            raise ValueError(
+                "No response model specified. Either set output_model in constructor or pass response_model parameter."
+            )
+        pydantic_model_to_use: Type[BaseModel] = cast(Type[BaseModel], pydantic_model_to_use_opt)
+        try:
+            completion = self.client.chat.completions.parse(
+                model=model_name,
+                messages=messages,
+                response_format=pydantic_model_to_use,
+                **api_kwargs,
+            )
+            # Store raw response from client
+            self.last_ai_response = completion
+        except (AuthenticationError, RateLimitError, BadRequestError) as exc:
+            error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
+            logger.error(error_msg)
+            raise
+        except Exception as e:
+            is_length_error = "Length" in str(e) or "maximum context length" in str(e)
+            if is_length_error:
+                raise ValueError(f"Input too long for model {model_name}. Error: {str(e)[:100]}...")
+            # Re-raise all other exceptions
+            raise
+        results: List[Dict[str, Any]] = []
+        for choice in completion.choices:  # type: ignore[attr-defined]
+            choice_messages = cast(
+                Messages,
+                messages + [{"role": "assistant", "content": choice.message.content}],
+            )
+            # Ensure consistent Pydantic model output for both fresh and cached responses
+            parsed_content = choice.message.parsed  # type: ignore[attr-defined]
+            if isinstance(parsed_content, dict):
+                # Cached response: validate dict back to Pydantic model
+                parsed_content = pydantic_model_to_use.model_validate(parsed_content)
+            elif not isinstance(parsed_content, pydantic_model_to_use):
+                # Fallback: ensure it's the correct type
+                parsed_content = pydantic_model_to_use.model_validate(parsed_content)
+            result_dict = {"parsed": parsed_content, "messages": choice_messages}
+            # Add reasoning content if this is a reasoning model
+            if self.is_reasoning_model and hasattr(choice.message, "reasoning_content"):
+                result_dict["reasoning_content"] = choice.message.reasoning_content
+            results.append(result_dict)
+        return results
+    def __call__(
+        self,
+        input_data: Union[str, BaseModel, list[Dict]],
+        response_model: Optional[Type[BaseModel] | Type[str]] = None,
+        two_step_parse_pydantic: bool = False,
+        temperature_ranges: Optional[tuple[float, float]] = None,
+        n: int = 1,
+        cache=None,
+        **openai_client_kwargs,
+    ) -> List[Dict[str, Any]]:
+        """
+        Execute LLM task.
+        Args:
+            input_data: Input data (string, BaseModel, or message list)
+            response_model: Optional response model override
+            two_step_parse_pydantic: Use two-step parsing (text then parse)
+            temperature_ranges: If set, tuple of (min_temp, max_temp) to sample
+            n: Number of temperature samples (only used with temperature_ranges, must be >= 2)
+            **runtime_kwargs: Additional runtime parameters
+        Returns:
+            List of response dictionaries
+        """
+        if cache is not None:
+            if hasattr(self.client, "set_cache"):
+                self.client.set_cache(cache)
+            else:
+                logger.warning("Client does not support caching.")
+        # Handle temperature range sampling
+        if temperature_ranges is not None:
+            if n < 2:
+                raise ValueError(f"n must be >= 2 when using temperature_ranges, got {n}")
+            return self.temperature_range_sampling(
+                input_data,
+                temperature_ranges=temperature_ranges,
+                n=n,
+                response_model=response_model,
+                **openai_client_kwargs,
+            )
+        openai_client_kwargs["n"] = n
+        # Handle two-step Pydantic parsing
+        pydantic_model = response_model or self.output_model
+        if two_step_parse_pydantic and pydantic_model not in (str, None):
+            choices = self.two_step_pydantic_parse(
+                input_data,
+                response_model=pydantic_model,
+                **openai_client_kwargs,
+            )
+        else:
+            choices = self.__inner_call__(
+                input_data,
+                response_model=response_model,
+                two_step_parse_pydantic=False,
+                **openai_client_kwargs,
+            )
+        # Track conversation history
+        _last_conv = choices[0]["messages"] if choices else []
+        if not hasattr(self, "_last_conversations"):
+            self._last_conversations = []
+        else:
+            self._last_conversations = self._last_conversations[-100:]
+        self._last_conversations.append(_last_conv)
+        return choices
+    def inspect_history(self, idx: int = -1, k_last_messages: int = 2) -> List[Dict[str, Any]]:
+        """Inspect the message history of a specific response choice."""
+        if hasattr(self, "_last_conversations"):
+            from llm_utils import show_chat_v2
+            conv = self._last_conversations[idx]
+            if k_last_messages > 0:
+                conv = conv[-k_last_messages:]
+            return show_chat_v2(conv)
+        else:
+            raise ValueError("No message history available. Make a call first.")
+    def __inner_call__(
+        self,
+        input_data: Union[str, BaseModel, list[Dict]],
+        response_model: Optional[Type[BaseModel] | Type[str]] = None,
+        two_step_parse_pydantic: bool = False,
+        **runtime_kwargs,
+    ) -> List[Dict[str, Any]]:
+        """
+        Internal call handler. Delegates to text() or parse() based on model.
+        Note: two_step_parse_pydantic is deprecated here; use the public
+        __call__ method which routes to the mixin.
+        """
+        pydantic_model_to_use = response_model or self.output_model
+        if pydantic_model_to_use is str or pydantic_model_to_use is None:
+            return self.text_completion(input_data, **runtime_kwargs)
+        else:
+            return self.pydantic_parse(
+                input_data,
+                response_model=response_model,
+                **runtime_kwargs,
+            )
+    # Backward compatibility aliases
+    def text(self, *args, **kwargs) -> List[Dict[str, Any]]:
+        """Alias for text_completion() for backward compatibility."""
+        return self.text_completion(*args, **kwargs)
+    def parse(self, *args, **kwargs) -> List[Dict[str, Any]]:
+        """Alias for pydantic_parse() for backward compatibility."""
+        return self.pydantic_parse(*args, **kwargs)
+    @classmethod
+    def from_prompt_builder(
+        builder: BasePromptBuilder,
+        client: Union[OpenAI, int, str, None] = None,
+        cache=True,
+        is_reasoning_model: bool = False,
+        lora_path: Optional[str] = None,
+        vllm_cmd: Optional[str] = None,
+        vllm_timeout: int = 120,
+        vllm_reuse: bool = True,
+        **model_kwargs,
+    ) -> "LLM":
+        """
+        Create an LLMTask instance from a BasePromptBuilder instance.
+        This method extracts the instruction, input model, and output model
+        from the provided builder and initializes an LLMTask accordingly.
+        Args:
+            builder: BasePromptBuilder instance
+            client: OpenAI client, port number, or base_url string
+            cache: Whether to use cached responses (default True)
+            is_reasoning_model: Whether model is reasoning model (default False)
+            lora_path: Optional path to LoRA adapter directory
+            vllm_cmd: Optional VLLM command to start server automatically
+            vllm_timeout: Timeout in seconds to wait for VLLM server (default 120)
+            vllm_reuse: If True (default), reuse existing server on target port
+            **model_kwargs: Additional model parameters
+        """
+        instruction = builder.get_instruction()
+        input_model = builder.get_input_model()
+        output_model = builder.get_output_model()
+        # Extract data from the builder to initialize LLMTask
+        return LLM(
+            instruction=instruction,
+            input_model=input_model,
+            output_model=output_model,
+            client=client,
+            cache=cache,
+            is_reasoning_model=is_reasoning_model,
+            lora_path=lora_path,
+            vllm_cmd=vllm_cmd,
+            vllm_timeout=vllm_timeout,
+            vllm_reuse=vllm_reuse,
+            **model_kwargs,
+        )

llm_utils/lm/llm_signature.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+LLM-as-a-Judge implementation with template support and SFT export utilities.
+This module provides a base class for creating LLM judges with structured
+prompts, variable substitution, and export capabilities for fine-tuning.
+"""
+import json
+from typing import Any, Dict, List, Optional, Type, Union
+from pydantic import BaseModel
+from ..chat_format import get_conversation_one_turn
+from .llm import LLM
+from .signature import Signature
+class LLMSignature(LLM):
+    """Base class for LLM judges with template support and SFT export."""
+    def __init__(self, signature: Type[Signature], **kwargs):
+        """
+        Initialize LLMJudgeBase.
+        Args:
+            system_prompt_template: System prompt template with {variable} placeholders
+            signature: Optional Signature class for structured I/O
+            **kwargs: Additional arguments passed to LLMTask
+        """
+        self.signature = signature
+        self.sft_data: List[Dict[str, Any]] = []  # Store SFT training examples
+        # Set instruction from signature if available
+        kwargs.setdefault("instruction", signature.get_instruction())
+        kwargs.setdefault("output_model", signature.get_output_model())
+        super().__init__(**kwargs)

speedy-utils 1.1.22__py3-none-any.whl → 1.1.24__py3-none-any.whl

speedy-utils 1.1.22py3-none-any.whl → 1.1.24py3-none-any.whl