PyPI - speedy-utils - Versions diffs - 1.1.26__py3-none-any.whl → 1.1.28__py3-none-any.whl - Mend

speedy-utils 1.1.26py3-none-any.whl → 1.1.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

llm_utils/__init__.py +16 -4
llm_utils/chat_format/__init__.py +10 -10
llm_utils/chat_format/display.py +33 -21
llm_utils/chat_format/transform.py +17 -19
llm_utils/chat_format/utils.py +6 -4
llm_utils/group_messages.py +17 -14
llm_utils/lm/__init__.py +6 -5
llm_utils/lm/async_lm/__init__.py +1 -0
llm_utils/lm/async_lm/_utils.py +10 -9
llm_utils/lm/async_lm/async_llm_task.py +141 -137
llm_utils/lm/async_lm/async_lm.py +48 -42
llm_utils/lm/async_lm/async_lm_base.py +59 -60
llm_utils/lm/async_lm/lm_specific.py +4 -3
llm_utils/lm/base_prompt_builder.py +93 -70
llm_utils/lm/llm.py +126 -108
llm_utils/lm/llm_signature.py +4 -2
llm_utils/lm/lm_base.py +72 -73
llm_utils/lm/mixins.py +102 -62
llm_utils/lm/openai_memoize.py +124 -87
llm_utils/lm/signature.py +105 -92
llm_utils/lm/utils.py +42 -23
llm_utils/scripts/vllm_load_balancer.py +23 -30
llm_utils/scripts/vllm_serve.py +8 -7
llm_utils/vector_cache/__init__.py +9 -3
llm_utils/vector_cache/cli.py +1 -1
llm_utils/vector_cache/core.py +59 -63
llm_utils/vector_cache/types.py +7 -5
llm_utils/vector_cache/utils.py +12 -8
speedy_utils/__imports.py +244 -0
speedy_utils/__init__.py +90 -194
speedy_utils/all.py +125 -227
speedy_utils/common/clock.py +37 -42
speedy_utils/common/function_decorator.py +6 -12
speedy_utils/common/logger.py +43 -52
speedy_utils/common/notebook_utils.py +13 -21
speedy_utils/common/patcher.py +21 -17
speedy_utils/common/report_manager.py +42 -44
speedy_utils/common/utils_cache.py +152 -169
speedy_utils/common/utils_io.py +137 -103
speedy_utils/common/utils_misc.py +15 -21
speedy_utils/common/utils_print.py +22 -28
speedy_utils/multi_worker/process.py +66 -79
speedy_utils/multi_worker/thread.py +78 -155
speedy_utils/scripts/mpython.py +38 -36
speedy_utils/scripts/openapi_client_codegen.py +10 -10
{speedy_utils-1.1.26.dist-info → speedy_utils-1.1.28.dist-info}/METADATA +1 -1
speedy_utils-1.1.28.dist-info/RECORD +57 -0
vision_utils/README.md +202 -0
vision_utils/__init__.py +5 -0
vision_utils/io_utils.py +470 -0
vision_utils/plot.py +345 -0
speedy_utils-1.1.26.dist-info/RECORD +0 -52
{speedy_utils-1.1.26.dist-info → speedy_utils-1.1.28.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.26.dist-info → speedy_utils-1.1.28.dist-info}/entry_points.txt +0 -0

llm_utils/lm/llm.py CHANGED Viewed

@@ -10,35 +10,36 @@ from typing import Any, Dict, List, Optional, Type, Union, cast
 import requests
 from loguru import logger
-from openai import OpenAI, AuthenticationError, BadRequestError, RateLimitError
+from openai import AuthenticationError, BadRequestError, OpenAI, RateLimitError
 from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
 from speedy_utils.common.utils_io import jdumps
+from .base_prompt_builder import BasePromptBuilder
+from .mixins import (
+    ModelUtilsMixin,
+    TemperatureRangeMixin,
+    TwoStepPydanticMixin,
+    VLLMMixin,
+)
 from .utils import (
     _extract_port_from_vllm_cmd,
-    _start_vllm_server,
-    _kill_vllm_on_port,
-    _is_server_running,
-    get_base_client,
-    _is_lora_path,
     _get_port_from_client,
+    _is_lora_path,
+    _is_server_running,
+    _kill_vllm_on_port,
     _load_lora_adapter,
+    _start_vllm_server,
     _unload_lora_adapter,
+    get_base_client,
     kill_all_vllm_processes,
     stop_vllm_process,
 )
-from .base_prompt_builder import BasePromptBuilder
-from .mixins import (
-    TemperatureRangeMixin,
-    TwoStepPydanticMixin,
-    VLLMMixin,
-    ModelUtilsMixin,
-)
 # Type aliases for better readability
-Messages = List[ChatCompletionMessageParam]
+Messages = list[ChatCompletionMessageParam]
 class LLM(
@@ -51,15 +52,15 @@ class LLM(
     def __init__(
         self,
-        instruction: Optional[str] = None,
-        input_model: Union[Type[BaseModel], type[str]] = str,
-        output_model: Type[BaseModel] | Type[str] = None,
-        client: Union[OpenAI, int, str, None] = None,
+        instruction: str | None = None,
+        input_model: type[BaseModel] | type[str] = str,
+        output_model: type[BaseModel] | type[str] = None,
+        client: OpenAI | int | str | None = None,
         cache=True,
         is_reasoning_model: bool = False,
         force_lora_unload: bool = False,
-        lora_path: Optional[str] = None,
-        vllm_cmd: Optional[str] = None,
+        lora_path: str | None = None,
+        vllm_cmd: str | None = None,
         vllm_timeout: int = 1200,
         vllm_reuse: bool = True,
         **model_kwargs,
@@ -75,7 +76,7 @@ class LLM(
         self.vllm_cmd = vllm_cmd
         self.vllm_timeout = vllm_timeout
         self.vllm_reuse = vllm_reuse
-        self.vllm_process: Optional[subprocess.Popen] = None
+        self.vllm_process: subprocess.Popen | None = None
         self.last_ai_response = None  # Store raw response from client
         self.cache = cache
@@ -88,16 +89,20 @@ class LLM(
             if client is None:
                 client = port
-        self.client = get_base_client(client, cache=cache, vllm_cmd=self.vllm_cmd, vllm_process=self.vllm_process)
+        self.client = get_base_client(
+            client, cache=cache, vllm_cmd=self.vllm_cmd, vllm_process=self.vllm_process
+        )
         # check connection of client
         try:
             self.client.models.list()
         except Exception as e:
-            logger.error(f"Failed to connect to OpenAI client: {str(e)}, base_url={self.client.base_url}")
+            logger.error(
+                f'Failed to connect to OpenAI client: {str(e)}, base_url={self.client.base_url}'
+            )
             raise e
-        if not self.model_kwargs.get("model", ""):
-            self.model_kwargs["model"] = self.client.models.list().data[0].id
+        if not self.model_kwargs.get('model', ''):
+            self.model_kwargs['model'] = self.client.models.list().data[0].id
         # Handle LoRA loading if lora_path is provided
         if self.lora_path:
@@ -111,102 +116,112 @@ class LLM(
         """Context manager exit with cleanup."""
         self.cleanup_vllm_server()
-    def _prepare_input(self, input_data: Union[str, BaseModel, List[Dict]]) -> Messages:
+    def _prepare_input(self, input_data: str | BaseModel | list[dict]) -> Messages:
         """Convert input to messages format."""
         if isinstance(input_data, list):
-            assert isinstance(input_data[0], dict) and "role" in input_data[0], (
+            assert isinstance(input_data[0], dict) and 'role' in input_data[0], (
                 "If input_data is a list, it must be a list of messages with 'role' and 'content' keys."
             )
             return cast(Messages, input_data)
+        # Convert input to string format
+        if isinstance(input_data, str):
+            user_content = input_data
+        elif hasattr(input_data, 'model_dump_json'):
+            user_content = input_data.model_dump_json()
+        elif isinstance(input_data, dict):
+            user_content = jdumps(input_data)
         else:
-            # Convert input to string format
-            if isinstance(input_data, str):
-                user_content = input_data
-            elif hasattr(input_data, "model_dump_json"):
-                user_content = input_data.model_dump_json()
-            elif isinstance(input_data, dict):
-                user_content = jdumps(input_data)
-            else:
-                user_content = str(input_data)
-            # Build messages
-            messages = (
-                [
-                    {"role": "system", "content": self.instruction},
-                ]
-                if self.instruction is not None
-                else []
-            )
+            user_content = str(input_data)
+        # Build messages
+        messages = (
+            [
+                {'role': 'system', 'content': self.instruction},
+            ]
+            if self.instruction is not None
+            else []
+        )
-            messages.append({"role": "user", "content": user_content})
-            return cast(Messages, messages)
+        messages.append({'role': 'user', 'content': user_content})
+        return cast(Messages, messages)
-    def text_completion(self, input_data: Union[str, BaseModel, list[Dict]], **runtime_kwargs) -> List[Dict[str, Any]]:
+    def text_completion(
+        self, input_data: str | BaseModel | list[dict], **runtime_kwargs
+    ) -> list[dict[str, Any]]:
         """Execute LLM task and return text responses."""
         # Prepare messages
         messages = self._prepare_input(input_data)
         # Merge runtime kwargs with default model kwargs (runtime takes precedence)
         effective_kwargs = {**self.model_kwargs, **runtime_kwargs}
-        model_name = effective_kwargs.get("model", self.model_kwargs["model"])
+        model_name = effective_kwargs.get('model', self.model_kwargs['model'])
         # Extract model name from kwargs for API call
-        api_kwargs = {k: v for k, v in effective_kwargs.items() if k != "model"}
+        api_kwargs = {k: v for k, v in effective_kwargs.items() if k != 'model'}
         try:
-            completion = self.client.chat.completions.create(model=model_name, messages=messages, **api_kwargs)
+            completion = self.client.chat.completions.create(
+                model=model_name, messages=messages, **api_kwargs
+            )
             # Store raw response from client
             self.last_ai_response = completion
         except (AuthenticationError, RateLimitError, BadRequestError) as exc:
-            error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
+            error_msg = f'OpenAI API error ({type(exc).__name__}): {exc}'
             logger.error(error_msg)
             raise
         except Exception as e:
-            is_length_error = "Length" in str(e) or "maximum context length" in str(e)
+            is_length_error = 'Length' in str(e) or 'maximum context length' in str(e)
             if is_length_error:
-                raise ValueError(f"Input too long for model {model_name}. Error: {str(e)[:100]}...")
+                raise ValueError(
+                    f'Input too long for model {model_name}. Error: {str(e)[:100]}...'
+                ) from e
             # Re-raise all other exceptions
             raise
         # print(completion)
-        results: List[Dict[str, Any]] = []
+        results: list[dict[str, Any]] = []
         for choice in completion.choices:
             choice_messages = cast(
                 Messages,
-                messages + [{"role": "assistant", "content": choice.message.content}],
+                messages + [{'role': 'assistant', 'content': choice.message.content}],
             )
-            result_dict = {"parsed": choice.message.content, "messages": choice_messages}
+            result_dict = {
+                'parsed': choice.message.content,
+                'messages': choice_messages,
+            }
             # Add reasoning content if this is a reasoning model
-            if self.is_reasoning_model and hasattr(choice.message, "reasoning_content"):
-                result_dict["reasoning_content"] = choice.message.reasoning_content
+            if self.is_reasoning_model and hasattr(choice.message, 'reasoning_content'):
+                result_dict['reasoning_content'] = choice.message.reasoning_content
             results.append(result_dict)
         return results
     def pydantic_parse(
         self,
-        input_data: Union[str, BaseModel, list[Dict]],
-        response_model: Optional[Type[BaseModel]] | Type[str] = None,
+        input_data: str | BaseModel | list[dict],
+        response_model: type[BaseModel] | None | type[str] = None,
         **runtime_kwargs,
-    ) -> List[Dict[str, Any]]:
+    ) -> list[dict[str, Any]]:
         """Execute LLM task and return parsed Pydantic model responses."""
         # Prepare messages
         messages = self._prepare_input(input_data)
         # Merge runtime kwargs with default model kwargs (runtime takes precedence)
         effective_kwargs = {**self.model_kwargs, **runtime_kwargs}
-        model_name = effective_kwargs.get("model", self.model_kwargs["model"])
+        model_name = effective_kwargs.get('model', self.model_kwargs['model'])
         # Extract model name from kwargs for API call
-        api_kwargs = {k: v for k, v in effective_kwargs.items() if k != "model"}
+        api_kwargs = {k: v for k, v in effective_kwargs.items() if k != 'model'}
         pydantic_model_to_use_opt = response_model or self.output_model
         if pydantic_model_to_use_opt is None:
             raise ValueError(
-                "No response model specified. Either set output_model in constructor or pass response_model parameter."
+                'No response model specified. Either set output_model in constructor or pass response_model parameter.'
             )
-        pydantic_model_to_use: Type[BaseModel] = cast(Type[BaseModel], pydantic_model_to_use_opt)
+        pydantic_model_to_use: type[BaseModel] = cast(
+            type[BaseModel], pydantic_model_to_use_opt
+        )
         try:
             completion = self.client.chat.completions.parse(
                 model=model_name,
@@ -217,21 +232,22 @@ class LLM(
             # Store raw response from client
             self.last_ai_response = completion
         except (AuthenticationError, RateLimitError, BadRequestError) as exc:
-            error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
+            error_msg = f'OpenAI API error ({type(exc).__name__}): {exc}'
             logger.error(error_msg)
             raise
         except Exception as e:
-            is_length_error = "Length" in str(e) or "maximum context length" in str(e)
+            is_length_error = 'Length' in str(e) or 'maximum context length' in str(e)
             if is_length_error:
-                raise ValueError(f"Input too long for model {model_name}. Error: {str(e)[:100]}...")
-            # Re-raise all other exceptions
+                raise ValueError(
+                    f'Input too long for model {model_name}. Error: {str(e)[:100]}...'
+                ) from e
             raise
-        results: List[Dict[str, Any]] = []
+        results: list[dict[str, Any]] = []
         for choice in completion.choices:  # type: ignore[attr-defined]
             choice_messages = cast(
                 Messages,
-                messages + [{"role": "assistant", "content": choice.message.content}],
+                messages + [{'role': 'assistant', 'content': choice.message.content}],
             )
             # Ensure consistent Pydantic model output for both fresh and cached responses
@@ -243,25 +259,25 @@ class LLM(
                 # Fallback: ensure it's the correct type
                 parsed_content = pydantic_model_to_use.model_validate(parsed_content)
-            result_dict = {"parsed": parsed_content, "messages": choice_messages}
+            result_dict = {'parsed': parsed_content, 'messages': choice_messages}
             # Add reasoning content if this is a reasoning model
-            if self.is_reasoning_model and hasattr(choice.message, "reasoning_content"):
-                result_dict["reasoning_content"] = choice.message.reasoning_content
+            if self.is_reasoning_model and hasattr(choice.message, 'reasoning_content'):
+                result_dict['reasoning_content'] = choice.message.reasoning_content
             results.append(result_dict)
         return results
     def __call__(
         self,
-        input_data: Union[str, BaseModel, list[Dict]],
-        response_model: Optional[Type[BaseModel] | Type[str]] = None,
+        input_data: str | BaseModel | list[dict],
+        response_model: type[BaseModel] | type[str] | None = None,
         two_step_parse_pydantic: bool = False,
-        temperature_ranges: Optional[tuple[float, float]] = None,
+        temperature_ranges: tuple[float, float] | None = None,
         n: int = 1,
         cache=None,
         **openai_client_kwargs,
-    ) -> List[Dict[str, Any]]:
+    ) -> list[dict[str, Any]]:
         """
         Execute LLM task.
@@ -277,14 +293,16 @@ class LLM(
             List of response dictionaries
         """
         if cache is not None:
-            if hasattr(self.client, "set_cache"):
+            if hasattr(self.client, 'set_cache'):
                 self.client.set_cache(cache)
             else:
-                logger.warning("Client does not support caching.")
+                logger.warning('Client does not support caching.')
         # Handle temperature range sampling
         if temperature_ranges is not None:
             if n < 2:
-                raise ValueError(f"n must be >= 2 when using temperature_ranges, got {n}")
+                raise ValueError(
+                    f'n must be >= 2 when using temperature_ranges, got {n}'
+                )
             return self.temperature_range_sampling(
                 input_data,
                 temperature_ranges=temperature_ranges,
@@ -292,7 +310,7 @@ class LLM(
                 response_model=response_model,
                 **openai_client_kwargs,
             )
-        openai_client_kwargs["n"] = n
+        openai_client_kwargs['n'] = n
         # Handle two-step Pydantic parsing
         pydantic_model = response_model or self.output_model
@@ -311,33 +329,34 @@ class LLM(
             )
         # Track conversation history
-        _last_conv = choices[0]["messages"] if choices else []
-        if not hasattr(self, "_last_conversations"):
+        _last_conv = choices[0]['messages'] if choices else []
+        if not hasattr(self, '_last_conversations'):
             self._last_conversations = []
         else:
             self._last_conversations = self._last_conversations[-100:]
         self._last_conversations.append(_last_conv)
         return choices
-    def inspect_history(self, idx: int = -1, k_last_messages: int = 2) -> List[Dict[str, Any]]:
+    def inspect_history(
+        self, idx: int = -1, k_last_messages: int = 2
+    ) -> list[dict[str, Any]]:
         """Inspect the message history of a specific response choice."""
-        if hasattr(self, "_last_conversations"):
+        if hasattr(self, '_last_conversations'):
             from llm_utils import show_chat_v2
             conv = self._last_conversations[idx]
             if k_last_messages > 0:
                 conv = conv[-k_last_messages:]
             return show_chat_v2(conv)
-        else:
-            raise ValueError("No message history available. Make a call first.")
+        raise ValueError('No message history available. Make a call first.')
     def __inner_call__(
         self,
-        input_data: Union[str, BaseModel, list[Dict]],
-        response_model: Optional[Type[BaseModel] | Type[str]] = None,
+        input_data: str | BaseModel | list[dict],
+        response_model: type[BaseModel] | type[str] | None = None,
         two_step_parse_pydantic: bool = False,
         **runtime_kwargs,
-    ) -> List[Dict[str, Any]]:
+    ) -> list[dict[str, Any]]:
         """
         Internal call handler. Delegates to text() or parse() based on model.
@@ -348,34 +367,33 @@ class LLM(
         if pydantic_model_to_use is str or pydantic_model_to_use is None:
             return self.text_completion(input_data, **runtime_kwargs)
-        else:
-            return self.pydantic_parse(
-                input_data,
-                response_model=response_model,
-                **runtime_kwargs,
-            )
+        return self.pydantic_parse(
+            input_data,
+            response_model=response_model,
+            **runtime_kwargs,
+        )
     # Backward compatibility aliases
-    def text(self, *args, **kwargs) -> List[Dict[str, Any]]:
+    def text(self, *args, **kwargs) -> list[dict[str, Any]]:
         """Alias for text_completion() for backward compatibility."""
         return self.text_completion(*args, **kwargs)
-    def parse(self, *args, **kwargs) -> List[Dict[str, Any]]:
+    def parse(self, *args, **kwargs) -> list[dict[str, Any]]:
         """Alias for pydantic_parse() for backward compatibility."""
         return self.pydantic_parse(*args, **kwargs)
     @classmethod
     def from_prompt_builder(
-        builder: BasePromptBuilder,
-        client: Union[OpenAI, int, str, None] = None,
+        cls: BasePromptBuilder,
+        client: OpenAI | int | str | None = None,
         cache=True,
         is_reasoning_model: bool = False,
-        lora_path: Optional[str] = None,
-        vllm_cmd: Optional[str] = None,
+        lora_path: str | None = None,
+        vllm_cmd: str | None = None,
         vllm_timeout: int = 120,
         vllm_reuse: bool = True,
         **model_kwargs,
-    ) -> "LLM":
+    ) -> 'LLM':
         """
         Create an LLMTask instance from a BasePromptBuilder instance.
@@ -393,9 +411,9 @@ class LLM(
             vllm_reuse: If True (default), reuse existing server on target port
             **model_kwargs: Additional model parameters
         """
-        instruction = builder.get_instruction()
-        input_model = builder.get_input_model()
-        output_model = builder.get_output_model()
+        instruction = cls.get_instruction()
+        input_model = cls.get_input_model()
+        output_model = cls.get_output_model()
         # Extract data from the builder to initialize LLMTask
         return LLM(

llm_utils/lm/llm_signature.py CHANGED Viewed

@@ -7,7 +7,9 @@ prompts, variable substitution, and export capabilities for fine-tuning.
 import json
 from typing import Any, Dict, List, Optional, Type, Union
 from pydantic import BaseModel
 from ..chat_format import get_conversation_one_turn
 from .llm import LLM
 from .signature import Signature
@@ -16,7 +18,7 @@ from .signature import Signature
 class LLMSignature(LLM):
     """Base class for LLM judges with template support and SFT export."""
-    def __init__(self, signature: Type[Signature], **kwargs):
+    def __init__(self, signature: type[Signature], **kwargs):
         """
         Initialize LLMJudgeBase.
@@ -26,7 +28,7 @@ class LLMSignature(LLM):
             **kwargs: Additional arguments passed to LLMTask
         """
         self.signature = signature
-        self.sft_data: List[Dict[str, Any]] = []  # Store SFT training examples
+        self.sft_data: list[dict[str, Any]] = []  # Store SFT training examples
         # Set instruction from signature if available
         kwargs.setdefault("instruction", signature.get_instruction())

speedy-utils 1.1.26__py3-none-any.whl → 1.1.28__py3-none-any.whl

speedy-utils 1.1.26py3-none-any.whl → 1.1.28py3-none-any.whl