PyPI - speedy-utils - Versions diffs - 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl - Mend

speedy-utils 1.1.27py3-none-any.whl → 1.1.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

llm_utils/__init__.py +16 -4
llm_utils/chat_format/__init__.py +10 -10
llm_utils/chat_format/display.py +33 -21
llm_utils/chat_format/transform.py +17 -19
llm_utils/chat_format/utils.py +6 -4
llm_utils/group_messages.py +17 -14
llm_utils/lm/__init__.py +6 -5
llm_utils/lm/async_lm/__init__.py +1 -0
llm_utils/lm/async_lm/_utils.py +10 -9
llm_utils/lm/async_lm/async_llm_task.py +141 -137
llm_utils/lm/async_lm/async_lm.py +48 -42
llm_utils/lm/async_lm/async_lm_base.py +59 -60
llm_utils/lm/async_lm/lm_specific.py +4 -3
llm_utils/lm/base_prompt_builder.py +93 -70
llm_utils/lm/llm.py +126 -108
llm_utils/lm/llm_signature.py +4 -2
llm_utils/lm/lm_base.py +72 -73
llm_utils/lm/mixins.py +102 -62
llm_utils/lm/openai_memoize.py +124 -87
llm_utils/lm/signature.py +105 -92
llm_utils/lm/utils.py +42 -23
llm_utils/scripts/vllm_load_balancer.py +23 -30
llm_utils/scripts/vllm_serve.py +8 -7
llm_utils/vector_cache/__init__.py +9 -3
llm_utils/vector_cache/cli.py +1 -1
llm_utils/vector_cache/core.py +59 -63
llm_utils/vector_cache/types.py +7 -5
llm_utils/vector_cache/utils.py +12 -8
speedy_utils/__imports.py +244 -0
speedy_utils/__init__.py +90 -194
speedy_utils/all.py +125 -227
speedy_utils/common/clock.py +37 -42
speedy_utils/common/function_decorator.py +6 -12
speedy_utils/common/logger.py +43 -52
speedy_utils/common/notebook_utils.py +13 -21
speedy_utils/common/patcher.py +21 -17
speedy_utils/common/report_manager.py +42 -44
speedy_utils/common/utils_cache.py +152 -169
speedy_utils/common/utils_io.py +137 -103
speedy_utils/common/utils_misc.py +15 -21
speedy_utils/common/utils_print.py +22 -28
speedy_utils/multi_worker/process.py +66 -79
speedy_utils/multi_worker/thread.py +78 -155
speedy_utils/scripts/mpython.py +38 -36
speedy_utils/scripts/openapi_client_codegen.py +10 -10
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/METADATA +1 -1
speedy_utils-1.1.29.dist-info/RECORD +57 -0
vision_utils/README.md +202 -0
vision_utils/__init__.py +4 -0
vision_utils/io_utils.py +735 -0
vision_utils/plot.py +345 -0
speedy_utils-1.1.27.dist-info/RECORD +0 -52
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/entry_points.txt +0 -0

llm_utils/lm/lm_base.py CHANGED Viewed

@@ -40,23 +40,23 @@ class LMBase:
     def __init__(
         self,
         *,
-        base_url: Optional[str] = None,
-        api_key: Optional[str] = None,
+        base_url: str | None = None,
+        api_key: str | None = None,
         cache: bool = True,
-        ports: Optional[List[int]] = None,
+        ports: list[int] | None = None,
     ) -> None:
         self.base_url = base_url
-        self.api_key = api_key or os.getenv("OPENAI_API_KEY", "abc")
+        self.api_key = api_key or os.getenv('OPENAI_API_KEY', 'abc')
         self._cache = cache
         self.ports = ports
     @property
-    def client(self) -> MOpenAI:
+    def client(self) -> MOpenAI:  # type: ignore
         # if have multiple ports
         if self.ports and self.base_url:
             import random
             import re
             port = random.choice(self.ports)
             # Replace port in base_url if it exists
             base_url_pattern = r'(https?://[^:/]+):?\d*(/.*)?'
@@ -64,16 +64,16 @@ class LMBase:
             if match:
                 host_part = match.group(1)
                 path_part = match.group(2) or '/v1'
-                api_base = f"{host_part}:{port}{path_part}"
+                api_base = f'{host_part}:{port}{path_part}'
             else:
                 api_base = self.base_url
-            logger.debug(f"Using port: {port}")
+            logger.debug(f'Using port: {port}')
         else:
             api_base = self.base_url
         if api_base is None:
-            raise ValueError("base_url must be provided")
+            raise ValueError('base_url must be provided')
         client = MOpenAI(
             api_key=self.api_key,
             base_url=api_base,
@@ -89,8 +89,8 @@ class LMBase:
     def __call__(  # type: ignore
         self,
         *,
-        prompt: Optional[str] = ...,
-        messages: Optional[RawMsgs] = ...,
+        prompt: str | None = ...,
+        messages: RawMsgs | None = ...,
         response_format: type[str] = str,
         return_openai_response: bool = ...,
         **kwargs: Any,
@@ -100,9 +100,9 @@ class LMBase:
     def __call__(
         self,
         *,
-        prompt: Optional[str] = ...,
-        messages: Optional[RawMsgs] = ...,
-        response_format: Type[TModel],
+        prompt: str | None = ...,
+        messages: RawMsgs | None = ...,
+        response_format: type[TModel],
         return_openai_response: bool = ...,
         **kwargs: Any,
     ) -> TModel: ...
@@ -114,62 +114,62 @@ class LMBase:
     def _convert_messages(msgs: LegacyMsgs) -> Messages:
         converted: Messages = []
         for msg in msgs:
-            role = msg["role"]
-            content = msg["content"]
-            if role == "user":
+            role = msg['role']
+            content = msg['content']
+            if role == 'user':
                 converted.append(
-                    ChatCompletionUserMessageParam(role="user", content=content)
+                    ChatCompletionUserMessageParam(role='user', content=content)
                 )
-            elif role == "assistant":
+            elif role == 'assistant':
                 converted.append(
                     ChatCompletionAssistantMessageParam(
-                        role="assistant", content=content
+                        role='assistant', content=content
                     )
                 )
-            elif role == "system":
+            elif role == 'system':
                 converted.append(
-                    ChatCompletionSystemMessageParam(role="system", content=content)
+                    ChatCompletionSystemMessageParam(role='system', content=content)
                 )
-            elif role == "tool":
+            elif role == 'tool':
                 converted.append(
                     ChatCompletionToolMessageParam(
-                        role="tool",
+                        role='tool',
                         content=content,
-                        tool_call_id=msg.get("tool_call_id") or "",
+                        tool_call_id=msg.get('tool_call_id') or '',
                     )
                 )
             else:
-                converted.append({"role": role, "content": content})  # type: ignore[arg-type]
+                converted.append({'role': role, 'content': content})  # type: ignore[arg-type]
         return converted
     @staticmethod
     def _parse_output(
-        raw_response: Any, response_format: Union[type[str], Type[BaseModel]]
-    ) -> Union[str, BaseModel]:
-        if hasattr(raw_response, "model_dump"):
+        raw_response: Any, response_format: type[str] | type[BaseModel]
+    ) -> str | BaseModel:
+        if hasattr(raw_response, 'model_dump'):
             raw_response = raw_response.model_dump()
         if response_format is str:
-            if isinstance(raw_response, dict) and "choices" in raw_response:
-                message = raw_response["choices"][0]["message"]
-                return message.get("content", "") or ""
+            if isinstance(raw_response, dict) and 'choices' in raw_response:
+                message = raw_response['choices'][0]['message']
+                return message.get('content', '') or ''
             return cast(str, raw_response)
-        model_cls = cast(Type[BaseModel], response_format)
+        model_cls = cast(type[BaseModel], response_format)
-        if isinstance(raw_response, dict) and "choices" in raw_response:
-            message = raw_response["choices"][0]["message"]
-            if "parsed" in message:
-                return model_cls.model_validate(message["parsed"])
-            content = message.get("content")
+        if isinstance(raw_response, dict) and 'choices' in raw_response:
+            message = raw_response['choices'][0]['message']
+            if 'parsed' in message:
+                return model_cls.model_validate(message['parsed'])
+            content = message.get('content')
             if content is None:
-                raise ValueError("Model returned empty content")
+                raise ValueError('Model returned empty content')
             try:
                 data = json.loads(content)
                 return model_cls.model_validate(data)
             except Exception as exc:
                 raise ValueError(
-                    f"Failed to parse model output as JSON:\n{content}"
+                    f'Failed to parse model output as JSON:\n{content}'
                 ) from exc
         if isinstance(raw_response, model_cls):
@@ -182,7 +182,7 @@ class LMBase:
             return model_cls.model_validate(data)
         except Exception as exc:
             raise ValueError(
-                f"Model did not return valid JSON:\n---\n{raw_response}"
+                f'Model did not return valid JSON:\n---\n{raw_response}'
             ) from exc
     # ------------------------------------------------------------------ #
@@ -190,17 +190,17 @@ class LMBase:
     # ------------------------------------------------------------------ #
     @staticmethod
-    def list_models(base_url: Optional[str] = None) -> List[str]:
+    def list_models(base_url: str | None = None) -> list[str]:
         try:
             if base_url is None:
-                raise ValueError("base_url must be provided")
+                raise ValueError('base_url must be provided')
             client = LMBase(base_url=base_url).client
             base_url_obj: URL = client.base_url
-            logger.debug(f"Base URL: {base_url_obj}")
+            logger.debug(f'Base URL: {base_url_obj}')
             models: SyncPage[Model] = client.models.list()  # type: ignore[assignment]
             return [model.id for model in models.data]
         except Exception as exc:
-            logger.error(f"Failed to list models: {exc}")
+            logger.error(f'Failed to list models: {exc}')
             return []
     def build_system_prompt(
@@ -212,15 +212,15 @@ class LMBase:
         think,
     ):
         if add_json_schema_to_instruction and response_model:
-            schema_block = f"\n\n<output_json_schema>\n{json.dumps(json_schema, indent=2)}\n</output_json_schema>"
+            schema_block = f'\n\n<output_json_schema>\n{json.dumps(json_schema, indent=2)}\n</output_json_schema>'
             # if schema_block not in system_content:
-            if "<output_json_schema>" in system_content:
+            if '<output_json_schema>' in system_content:
                 # remove exsting schema block
                 import re  # replace
                 system_content = re.sub(
-                    r"<output_json_schema>.*?</output_json_schema>",
-                    "",
+                    r'<output_json_schema>.*?</output_json_schema>',
+                    '',
                     system_content,
                     flags=re.DOTALL,
                 )
@@ -228,36 +228,35 @@ class LMBase:
             system_content += schema_block
         if think is True:
-            if "/think" in system_content:
+            if '/think' in system_content:
                 pass
-            elif "/no_think" in system_content:
-                system_content = system_content.replace("/no_think", "/think")
+            elif '/no_think' in system_content:
+                system_content = system_content.replace('/no_think', '/think')
             else:
-                system_content += "\n\n/think"
+                system_content += '\n\n/think'
         elif think is False:
-            if "/no_think" in system_content:
+            if '/no_think' in system_content:
                 pass
-            elif "/think" in system_content:
-                system_content = system_content.replace("/think", "/no_think")
+            elif '/think' in system_content:
+                system_content = system_content.replace('/think', '/no_think')
             else:
-                system_content += "\n\n/no_think"
+                system_content += '\n\n/no_think'
         return system_content
     def inspect_history(self):
         """Inspect the history of the LLM calls."""
-        pass
-def get_model_name(client: OpenAI|str|int) -> str:
+def get_model_name(client: OpenAI | str | int) -> str:
     """
     Get the first available model name from the client.
     Args:
         client: OpenAI client, base_url string, or port number
     Returns:
         Name of the first available model
     Raises:
         ValueError: If no models are available or client is invalid
     """
@@ -269,17 +268,17 @@ def get_model_name(client: OpenAI|str|int) -> str:
             openai_client = OpenAI(base_url=client, api_key='abc')
         elif isinstance(client, int):
             # Port number
-            base_url = f"http://localhost:{client}/v1"
+            base_url = f'http://localhost:{client}/v1'
             openai_client = OpenAI(base_url=base_url, api_key='abc')
         else:
-            raise ValueError(f"Unsupported client type: {type(client)}")
+            raise ValueError(f'Unsupported client type: {type(client)}')
         models = openai_client.models.list()
         if not models.data:
-            raise ValueError("No models available")
+            raise ValueError('No models available')
         return models.data[0].id
     except Exception as exc:
-        logger.error(f"Failed to get model name: {exc}")
-        raise ValueError(f"Could not retrieve model name: {exc}") from exc
+        logger.error(f'Failed to get model name: {exc}')
+        raise ValueError(f'Could not retrieve model name: {exc}') from exc

llm_utils/lm/mixins.py CHANGED Viewed

@@ -1,14 +1,21 @@
 """Mixin classes for LLM functionality extensions."""
+# type: ignore
+from __future__ import annotations
 import os
 import subprocess
 from time import sleep
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union
 import requests
 from loguru import logger
-from openai import OpenAI
-from pydantic import BaseModel
+if TYPE_CHECKING:
+    from openai import OpenAI
+    from pydantic import BaseModel
 class TemperatureRangeMixin:
@@ -16,12 +23,12 @@ class TemperatureRangeMixin:
     def temperature_range_sampling(
         self,
-        input_data: Union[str, BaseModel, List[Dict]],
+        input_data: 'str | BaseModel | list[dict]',
         temperature_ranges: tuple[float, float],
         n: int = 32,
-        response_model: Optional[Type[BaseModel] | Type[str]] = None,
+        response_model: 'type[BaseModel] | type[str] | None' = None,
         **runtime_kwargs,
-    ) -> List[Dict[str, Any]]:
+    ) -> list[dict[str, Any]]:
         """
         Sample LLM responses with a range of temperatures.
@@ -38,11 +45,13 @@ class TemperatureRangeMixin:
         Returns:
             List of response dictionaries from all temperature samples
         """
+        from pydantic import BaseModel
         from speedy_utils.multi_worker.thread import multi_thread
         min_temp, max_temp = temperature_ranges
         if n < 2:
-            raise ValueError(f"n must be >= 2, got {n}")
+            raise ValueError(f'n must be >= 2, got {n}')
         step = (max_temp - min_temp) / (n - 1)
         list_kwargs = []
@@ -56,7 +65,7 @@ class TemperatureRangeMixin:
             list_kwargs.append(kwargs)
         def f(kwargs):
-            i = kwargs.pop("i")
+            i = kwargs.pop('i')
             sleep(i * 0.05)
             return self.__inner_call__(
                 input_data,
@@ -73,10 +82,10 @@ class TwoStepPydanticMixin:
     def two_step_pydantic_parse(
         self,
-        input_data: Union[str, BaseModel, List[Dict]],
-        response_model: Type[BaseModel],
+        input_data,
+        response_model,
         **runtime_kwargs,
-    ) -> List[Dict[str, Any]]:
+    ) -> list[dict[str, Any]]:
         """
         Parse responses in two steps: text completion then Pydantic parsing.
@@ -91,32 +100,45 @@ class TwoStepPydanticMixin:
         Returns:
             List of parsed response dictionaries
         """
+        from pydantic import BaseModel
         # Step 1: Get text completions
         results = self.text_completion(input_data, **runtime_kwargs)
         parsed_results = []
         for result in results:
-            response_text = result["parsed"]
-            messages = result["messages"]
+            response_text = result['parsed']
+            messages = result['messages']
             # Handle reasoning models that use <think> tags
-            if "</think>" in response_text:
-                response_text = response_text.split("</think>")[1]
+            if '</think>' in response_text:
+                response_text = response_text.split('</think>')[1]
             try:
-                # Try direct parsing
-                parsed = response_model.model_validate_json(response_text)
+                # Try direct parsing - support both Pydantic v1 and v2
+                if hasattr(response_model, 'model_validate_json'):
+                    # Pydantic v2
+                    parsed = response_model.model_validate_json(response_text)
+                else:
+                    # Pydantic v1
+                    import json
+                    parsed = response_model.parse_obj(json.loads(response_text))
             except Exception:
                 # Fallback: use LLM to extract JSON
-                logger.warning("Failed to parse JSON directly, using LLM to extract")
+                logger.warning('Failed to parse JSON directly, using LLM to extract')
                 _parsed_messages = [
                     {
-                        "role": "system",
-                        "content": ("You are a helpful assistant that extracts JSON from text."),
+                        'role': 'system',
+                        'content': (
+                            'You are a helpful assistant that extracts JSON from text.'
+                        ),
                     },
                     {
-                        "role": "user",
-                        "content": (f"Extract JSON from the following text:\n{response_text}"),
+                        'role': 'user',
+                        'content': (
+                            f'Extract JSON from the following text:\n{response_text}'
+                        ),
                     },
                 ]
                 parsed_result = self.pydantic_parse(
@@ -124,9 +146,9 @@ class TwoStepPydanticMixin:
                     response_model=response_model,
                     **runtime_kwargs,
                 )[0]
-                parsed = parsed_result["parsed"]
+                parsed = parsed_result['parsed']
-            parsed_results.append({"parsed": parsed, "messages": messages})
+            parsed_results.append({'parsed': parsed, 'messages': messages})
         return parsed_results
@@ -153,7 +175,7 @@ class VLLMMixin:
             get_base_client,
         )
-        if not hasattr(self, "vllm_cmd") or not self.vllm_cmd:
+        if not hasattr(self, 'vllm_cmd') or not self.vllm_cmd:
             return
         port = _extract_port_from_vllm_cmd(self.vllm_cmd)
@@ -163,26 +185,30 @@ class VLLMMixin:
             try:
                 reuse_client = get_base_client(port, cache=False)
                 models_response = reuse_client.models.list()
-                if getattr(models_response, "data", None):
+                if getattr(models_response, 'data', None):
                     reuse_existing = True
                     logger.info(
-                        f"VLLM server already running on port {port}, reusing existing server (vllm_reuse=True)"
+                        f'VLLM server already running on port {port}, reusing existing server (vllm_reuse=True)'
                     )
                 else:
-                    logger.info(f"No models returned from VLLM server on port {port}; starting a new server")
+                    logger.info(
+                        f'No models returned from VLLM server on port {port}; starting a new server'
+                    )
             except Exception as exc:
                 logger.info(
-                    f"Unable to reach VLLM server on port {port} (list_models failed): {exc}. Starting a new server."
+                    f'Unable to reach VLLM server on port {port} (list_models failed): {exc}. Starting a new server.'
                 )
         if not self.vllm_reuse:
             if _is_server_running(port):
-                logger.info(f"VLLM server already running on port {port}, killing it first (vllm_reuse=False)")
+                logger.info(
+                    f'VLLM server already running on port {port}, killing it first (vllm_reuse=False)'
+                )
                 _kill_vllm_on_port(port)
-            logger.info(f"Starting new VLLM server on port {port}")
+            logger.info(f'Starting new VLLM server on port {port}')
             self.vllm_process = _start_vllm_server(self.vllm_cmd, self.vllm_timeout)
         elif not reuse_existing:
-            logger.info(f"Starting VLLM server on port {port}")
+            logger.info(f'Starting VLLM server on port {port}')
             self.vllm_process = _start_vllm_server(self.vllm_cmd, self.vllm_timeout)
     def _load_lora_adapter(self) -> None:
@@ -195,8 +221,8 @@ class VLLMMixin:
         3. Loads the LoRA adapter and updates the model name
         """
         from .utils import (
-            _is_lora_path,
             _get_port_from_client,
+            _is_lora_path,
             _load_lora_adapter,
         )
@@ -204,12 +230,14 @@ class VLLMMixin:
             return
         if not _is_lora_path(self.lora_path):
-            raise ValueError(f"Invalid LoRA path '{self.lora_path}': Directory must contain 'adapter_config.json'")
+            raise ValueError(
+                f"Invalid LoRA path '{self.lora_path}': Directory must contain 'adapter_config.json'"
+            )
-        logger.info(f"Loading LoRA adapter from: {self.lora_path}")
+        logger.info(f'Loading LoRA adapter from: {self.lora_path}')
         # Get the expected LoRA name (basename of the path)
-        lora_name = os.path.basename(self.lora_path.rstrip("/\\"))
+        lora_name = os.path.basename(self.lora_path.rstrip('/\\'))
         if not lora_name:  # Handle edge case of empty basename
             lora_name = os.path.basename(os.path.dirname(self.lora_path))
@@ -217,13 +245,17 @@ class VLLMMixin:
         try:
             available_models = [m.id for m in self.client.models.list().data]
         except Exception as e:
-            logger.warning(f"Failed to list models, proceeding with LoRA load: {str(e)[:100]}")
+            logger.warning(
+                f'Failed to list models, proceeding with LoRA load: {str(e)[:100]}'
+            )
             available_models = []
         # Check if LoRA is already loaded
         if lora_name in available_models and not self.force_lora_unload:
-            logger.info(f"LoRA adapter '{lora_name}' is already loaded, using existing model")
-            self.model_kwargs["model"] = lora_name
+            logger.info(
+                f"LoRA adapter '{lora_name}' is already loaded, using existing model"
+            )
+            self.model_kwargs['model'] = lora_name
             return
         # Force unload if requested
@@ -233,43 +265,49 @@ class VLLMMixin:
             if port is not None:
                 try:
                     VLLMMixin.unload_lora(port, lora_name)
-                    logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+                    logger.info(f'Successfully unloaded LoRA adapter: {lora_name}')
                 except Exception as e:
-                    logger.warning(f"Failed to unload LoRA adapter: {str(e)[:100]}")
+                    logger.warning(f'Failed to unload LoRA adapter: {str(e)[:100]}')
         # Get port from client for API calls
         port = _get_port_from_client(self.client)
         if port is None:
             raise ValueError(
                 f"Cannot load LoRA adapter '{self.lora_path}': "
-                f"Unable to determine port from client base_url. "
-                f"LoRA loading requires a client initialized with port."
+                f'Unable to determine port from client base_url. '
+                f'LoRA loading requires a client initialized with port.'
             )
         try:
             # Load the LoRA adapter
             loaded_lora_name = _load_lora_adapter(self.lora_path, port)
-            logger.info(f"Successfully loaded LoRA adapter: {loaded_lora_name}")
+            logger.info(f'Successfully loaded LoRA adapter: {loaded_lora_name}')
             # Update model name to the loaded LoRA name
-            self.model_kwargs["model"] = loaded_lora_name
+            self.model_kwargs['model'] = loaded_lora_name
         except requests.RequestException as e:
             # Check if error is due to LoRA already being loaded
             error_msg = str(e)
-            if "400" in error_msg or "Bad Request" in error_msg:
-                logger.info(f"LoRA adapter may already be loaded, attempting to use '{lora_name}'")
+            if '400' in error_msg or 'Bad Request' in error_msg:
+                logger.info(
+                    f"LoRA adapter may already be loaded, attempting to use '{lora_name}'"
+                )
                 # Refresh the model list to check if it's now available
                 try:
                     updated_models = [m.id for m in self.client.models.list().data]
                     if lora_name in updated_models:
-                        logger.info(f"Found LoRA adapter '{lora_name}' in updated model list")
-                        self.model_kwargs["model"] = lora_name
+                        logger.info(
+                            f"Found LoRA adapter '{lora_name}' in updated model list"
+                        )
+                        self.model_kwargs['model'] = lora_name
                         return
                 except Exception:
                     pass  # Fall through to original error
-            raise ValueError(f"Failed to load LoRA adapter from '{self.lora_path}': {error_msg[:100]}")
+            raise ValueError(
+                f"Failed to load LoRA adapter from '{self.lora_path}': {error_msg[:100]}"
+            ) from e
     def unload_lora_adapter(self, lora_path: str) -> None:
         """
@@ -286,14 +324,14 @@ class VLLMMixin:
         port = _get_port_from_client(self.client)
         if port is None:
             raise ValueError(
-                "Cannot unload LoRA adapter: "
-                "Unable to determine port from client base_url. "
-                "LoRA operations require a client initialized with port."
+                'Cannot unload LoRA adapter: '
+                'Unable to determine port from client base_url. '
+                'LoRA operations require a client initialized with port.'
             )
         _unload_lora_adapter(lora_path, port)
-        lora_name = os.path.basename(lora_path.rstrip("/\\"))
-        logger.info(f"Unloaded LoRA adapter: {lora_name}")
+        lora_name = os.path.basename(lora_path.rstrip('/\\'))
+        logger.info(f'Unloaded LoRA adapter: {lora_name}')
     @staticmethod
     def unload_lora(port: int, lora_name: str) -> None:
@@ -309,15 +347,15 @@ class VLLMMixin:
         """
         try:
             response = requests.post(
-                f"http://localhost:{port}/v1/unload_lora_adapter",
+                f'http://localhost:{port}/v1/unload_lora_adapter',
                 headers={
-                    "accept": "application/json",
-                    "Content-Type": "application/json",
+                    'accept': 'application/json',
+                    'Content-Type': 'application/json',
                 },
-                json={"lora_name": lora_name, "lora_int_id": 0},
+                json={'lora_name': lora_name, 'lora_int_id': 0},
             )
             response.raise_for_status()
-            logger.info(f"Successfully unloaded LoRA adapter: {lora_name}")
+            logger.info(f'Successfully unloaded LoRA adapter: {lora_name}')
         except requests.RequestException as e:
             logger.error(f"Error unloading LoRA adapter '{lora_name}': {str(e)[:100]}")
             raise
@@ -326,7 +364,7 @@ class VLLMMixin:
         """Stop the VLLM server process if started by this instance."""
         from .utils import stop_vllm_process
-        if hasattr(self, "vllm_process") and self.vllm_process is not None:
+        if hasattr(self, 'vllm_process') and self.vllm_process is not None:
             stop_vllm_process(self.vllm_process)
             self.vllm_process = None
@@ -362,7 +400,7 @@ class ModelUtilsMixin:
     """Mixin for model utility methods."""
     @staticmethod
-    def list_models(client: Union[OpenAI, int, str, None] = None) -> List[str]:
+    def list_models(client=None) -> list[str]:
         """
         List available models from the OpenAI client.
@@ -372,6 +410,8 @@ class ModelUtilsMixin:
         Returns:
             List of available model names
         """
+        from openai import OpenAI
         from .utils import get_base_client
         client_instance = get_base_client(client, cache=False)

speedy-utils 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl

speedy-utils 1.1.27py3-none-any.whl → 1.1.29py3-none-any.whl