PyPI - speedy-utils - Versions diffs - 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl - Mend

speedy-utils 1.1.27py3-none-any.whl → 1.1.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

llm_utils/__init__.py +16 -4
llm_utils/chat_format/__init__.py +10 -10
llm_utils/chat_format/display.py +33 -21
llm_utils/chat_format/transform.py +17 -19
llm_utils/chat_format/utils.py +6 -4
llm_utils/group_messages.py +17 -14
llm_utils/lm/__init__.py +6 -5
llm_utils/lm/async_lm/__init__.py +1 -0
llm_utils/lm/async_lm/_utils.py +10 -9
llm_utils/lm/async_lm/async_llm_task.py +141 -137
llm_utils/lm/async_lm/async_lm.py +48 -42
llm_utils/lm/async_lm/async_lm_base.py +59 -60
llm_utils/lm/async_lm/lm_specific.py +4 -3
llm_utils/lm/base_prompt_builder.py +93 -70
llm_utils/lm/llm.py +126 -108
llm_utils/lm/llm_signature.py +4 -2
llm_utils/lm/lm_base.py +72 -73
llm_utils/lm/mixins.py +102 -62
llm_utils/lm/openai_memoize.py +124 -87
llm_utils/lm/signature.py +105 -92
llm_utils/lm/utils.py +42 -23
llm_utils/scripts/vllm_load_balancer.py +23 -30
llm_utils/scripts/vllm_serve.py +8 -7
llm_utils/vector_cache/__init__.py +9 -3
llm_utils/vector_cache/cli.py +1 -1
llm_utils/vector_cache/core.py +59 -63
llm_utils/vector_cache/types.py +7 -5
llm_utils/vector_cache/utils.py +12 -8
speedy_utils/__imports.py +244 -0
speedy_utils/__init__.py +90 -194
speedy_utils/all.py +125 -227
speedy_utils/common/clock.py +37 -42
speedy_utils/common/function_decorator.py +6 -12
speedy_utils/common/logger.py +43 -52
speedy_utils/common/notebook_utils.py +13 -21
speedy_utils/common/patcher.py +21 -17
speedy_utils/common/report_manager.py +42 -44
speedy_utils/common/utils_cache.py +152 -169
speedy_utils/common/utils_io.py +137 -103
speedy_utils/common/utils_misc.py +15 -21
speedy_utils/common/utils_print.py +22 -28
speedy_utils/multi_worker/process.py +66 -79
speedy_utils/multi_worker/thread.py +78 -155
speedy_utils/scripts/mpython.py +38 -36
speedy_utils/scripts/openapi_client_codegen.py +10 -10
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/METADATA +1 -1
speedy_utils-1.1.29.dist-info/RECORD +57 -0
vision_utils/README.md +202 -0
vision_utils/__init__.py +4 -0
vision_utils/io_utils.py +735 -0
vision_utils/plot.py +345 -0
speedy_utils-1.1.27.dist-info/RECORD +0 -52
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/entry_points.txt +0 -0

llm_utils/lm/async_lm/async_llm_task.py CHANGED Viewed

@@ -12,16 +12,17 @@ from venv import logger
 from openai.types.chat import ChatCompletionMessageParam
 from pydantic import BaseModel
-from speedy_utils.all import dump_json_or_pickle, identify
 from llm_utils.chat_format.display import get_conversation_one_turn
 from llm_utils.lm.async_lm._utils import InputModelType, OutputModelType, ParsedOutput
 from llm_utils.lm.async_lm.async_lm import AsyncLM
+from speedy_utils import dump_json_or_pickle, identify
 # Type aliases for better readability
-TModel = TypeVar("TModel", bound=BaseModel)
-Messages = List[ChatCompletionMessageParam]
-LegacyMsgs = List[Dict[str, str]]
+TModel = TypeVar('TModel', bound=BaseModel)
+Messages = list[ChatCompletionMessageParam]
+LegacyMsgs = list[dict[str, str]]
 RawMsgs = Union[Messages, LegacyMsgs]
 # Default configuration constants
@@ -31,38 +32,38 @@ RawMsgs = Union[Messages, LegacyMsgs]
 class LMConfiguration:
     """Configuration class for language model parameters."""
-    model: Optional[str] = None
-    temperature: Optional[float] = None
-    max_tokens: Optional[int] = None
-    base_url: Optional[str] = None
-    api_key: Optional[str] = None
-    cache: Optional[bool] = True
-    think: Optional[Literal[True, False]] = None
-    add_json_schema_to_instruction: Optional[bool] = None
-    use_beta: Optional[bool] = False
-    ports: Optional[List[int]] = None
-    top_p: Optional[float] = None
-    presence_penalty: Optional[float] = None
-    top_k: Optional[int] = None
-    repetition_penalty: Optional[float] = None
-    def to_dict(self) -> Dict[str, Any]:
+    model: str | None = None
+    temperature: float | None = None
+    max_tokens: int | None = None
+    base_url: str | None = None
+    api_key: str | None = None
+    cache: bool | None = True
+    think: Literal[True, False] | None = None
+    add_json_schema_to_instruction: bool | None = None
+    use_beta: bool | None = False
+    ports: list[int] | None = None
+    top_p: float | None = None
+    presence_penalty: float | None = None
+    top_k: int | None = None
+    repetition_penalty: float | None = None
+    def to_dict(self) -> dict[str, Any]:
         """Convert configuration to dictionary format."""
         return {
-            "model": self.model,
-            "temperature": self.temperature,
-            "max_tokens": self.max_tokens,
-            "base_url": self.base_url,
-            "api_key": self.api_key,
-            "cache": self.cache,
-            "think": self.think,
-            "add_json_schema_to_instruction": self.add_json_schema_to_instruction,
-            "use_beta": self.use_beta,
-            "ports": self.ports,
-            "top_p": self.top_p,
-            "presence_penalty": self.presence_penalty,
-            "top_k": self.top_k,
-            "repetition_penalty": self.repetition_penalty,
+            'model': self.model,
+            'temperature': self.temperature,
+            'max_tokens': self.max_tokens,
+            'base_url': self.base_url,
+            'api_key': self.api_key,
+            'cache': self.cache,
+            'think': self.think,
+            'add_json_schema_to_instruction': self.add_json_schema_to_instruction,
+            'use_beta': self.use_beta,
+            'ports': self.ports,
+            'top_p': self.top_p,
+            'presence_penalty': self.presence_penalty,
+            'top_k': self.top_k,
+            'repetition_penalty': self.repetition_penalty,
         }
@@ -83,41 +84,41 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
     OutputModel: OutputModelType
     # default class attributes for configuration
-    DEFAULT_MODEL: Optional[str] = None
-    DEFAULT_CACHE_DIR: Optional[pathlib.Path] = None
-    DEFAULT_TEMPERATURE: Optional[float] = None
-    DEFAULT_MAX_TOKENS: Optional[int] = None
-    DEFAULT_TOP_P: Optional[float] = None
-    DEFAULT_PRESENCE_PENALTY: Optional[float] = None
-    DEFAULT_TOP_K: Optional[int] = None
-    DEFAULT_REPETITION_PENALTY: Optional[float] = None
-    DEFAULT_CACHE: Optional[bool] = True
-    DEFAULT_THINK: Optional[Literal[True, False]] = None
-    DEFAULT_PORTS: Optional[List[int]] = None
-    DEFAULT_USE_BETA: Optional[bool] = False
-    DEFAULT_ADD_JSON_SCHEMA_TO_INSTRUCTION: Optional[bool] = True
-    DEFAULT_COLLECT_DATA: Optional[bool] = None
-    DEFAULT_BASE_URL: Optional[str] = None
-    DEFAULT_API_KEY: Optional[str] = None
+    DEFAULT_MODEL: str | None = None
+    DEFAULT_CACHE_DIR: pathlib.Path | None = None
+    DEFAULT_TEMPERATURE: float | None = None
+    DEFAULT_MAX_TOKENS: int | None = None
+    DEFAULT_TOP_P: float | None = None
+    DEFAULT_PRESENCE_PENALTY: float | None = None
+    DEFAULT_TOP_K: int | None = None
+    DEFAULT_REPETITION_PENALTY: float | None = None
+    DEFAULT_CACHE: bool | None = True
+    DEFAULT_THINK: Literal[True, False] | None = None
+    DEFAULT_PORTS: list[int] | None = None
+    DEFAULT_USE_BETA: bool | None = False
+    DEFAULT_ADD_JSON_SCHEMA_TO_INSTRUCTION: bool | None = True
+    DEFAULT_COLLECT_DATA: bool | None = None
+    DEFAULT_BASE_URL: str | None = None
+    DEFAULT_API_KEY: str | None = None
     IS_DATA_COLLECTION: bool = False
     def __init__(
         self,
-        model: Optional[str] = None,
-        temperature: Optional[float] = None,
-        max_tokens: Optional[int] = None,
-        base_url: Optional[str] = None,
-        api_key: Optional[str] = None,
-        cache: Optional[bool] = None,
-        think: Optional[Literal[True, False]] = None,
-        add_json_schema_to_instruction: Optional[bool] = None,
-        use_beta: Optional[bool] = None,
-        ports: Optional[List[int]] = None,
-        top_p: Optional[float] = None,
-        presence_penalty: Optional[float] = None,
-        top_k: Optional[int] = None,
-        repetition_penalty: Optional[float] = None,
+        model: str | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        base_url: str | None = None,
+        api_key: str | None = None,
+        cache: bool | None = None,
+        think: Literal[True, False] | None = None,
+        add_json_schema_to_instruction: bool | None = None,
+        use_beta: bool | None = None,
+        ports: list[int] | None = None,
+        top_p: float | None = None,
+        presence_penalty: float | None = None,
+        top_k: int | None = None,
+        repetition_penalty: float | None = None,
     ) -> None:
         """
         Initialize the AsyncLLMTask with language model configuration.
@@ -126,31 +127,37 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         """
         self._config = LMConfiguration(
             model=model if model is not None else self.DEFAULT_MODEL,
-            temperature=temperature
-            if temperature is not None
-            else self.DEFAULT_TEMPERATURE,
-            max_tokens=max_tokens
-            if max_tokens is not None
-            else self.DEFAULT_MAX_TOKENS,
+            temperature=(
+                temperature if temperature is not None else self.DEFAULT_TEMPERATURE
+            ),
+            max_tokens=(
+                max_tokens if max_tokens is not None else self.DEFAULT_MAX_TOKENS
+            ),
             base_url=base_url if base_url is not None else self.DEFAULT_BASE_URL,
             api_key=api_key if api_key is not None else self.DEFAULT_API_KEY,
             cache=cache if cache is not None else self.DEFAULT_CACHE,
             think=think if think is not None else self.DEFAULT_THINK,
-            add_json_schema_to_instruction=add_json_schema_to_instruction
-            if add_json_schema_to_instruction is not None
-            else self.DEFAULT_ADD_JSON_SCHEMA_TO_INSTRUCTION,
+            add_json_schema_to_instruction=(
+                add_json_schema_to_instruction
+                if add_json_schema_to_instruction is not None
+                else self.DEFAULT_ADD_JSON_SCHEMA_TO_INSTRUCTION
+            ),
             use_beta=use_beta if use_beta is not None else self.DEFAULT_USE_BETA,
             ports=ports if ports is not None else self.DEFAULT_PORTS,
             top_p=top_p if top_p is not None else self.DEFAULT_TOP_P,
-            presence_penalty=presence_penalty
-            if presence_penalty is not None
-            else self.DEFAULT_PRESENCE_PENALTY,
+            presence_penalty=(
+                presence_penalty
+                if presence_penalty is not None
+                else self.DEFAULT_PRESENCE_PENALTY
+            ),
             top_k=top_k if top_k is not None else self.DEFAULT_TOP_K,
-            repetition_penalty=repetition_penalty
-            if repetition_penalty is not None
-            else self.DEFAULT_REPETITION_PENALTY,
+            repetition_penalty=(
+                repetition_penalty
+                if repetition_penalty is not None
+                else self.DEFAULT_REPETITION_PENALTY
+            ),
         )
-        self._lm: Optional[AsyncLM] = None
+        self._lm: AsyncLM | None = None
     @property
     def lm(self) -> AsyncLM:
@@ -178,21 +185,21 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
             TypeError: If output model type cannot be determined
         """
         # Try to get type from generic base classes
-        orig_bases = getattr(self.__class__, "__orig_bases__", None)
+        orig_bases = getattr(self.__class__, '__orig_bases__', None)
         if (
             orig_bases
-            and hasattr(orig_bases[0], "__args__")
+            and hasattr(orig_bases[0], '__args__')
             and len(orig_bases[0].__args__) >= 2
         ):
             return orig_bases[0].__args__[1]
         # Fallback to class attribute
-        if hasattr(self, "OutputModel"):
+        if hasattr(self, 'OutputModel'):
             return self.OutputModel  # type: ignore
         raise TypeError(
-            f"{self.__class__.__name__} must define OutputModel as a class attribute "
-            "or use proper generic typing with AsyncLLMTask[InputModel, OutputModel]"
+            f'{self.__class__.__name__} must define OutputModel as a class attribute '
+            'or use proper generic typing with AsyncLLMTask[InputModel, OutputModel]'
         )
     def _get_input_model_type(self) -> type[InputModelType]:
@@ -206,20 +213,20 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
             TypeError: If input model type cannot be determined
         """
         # Try to get type from generic base classes
-        orig_bases = getattr(self.__class__, "__orig_bases__", None)
+        orig_bases = getattr(self.__class__, '__orig_bases__', None)
         if (
             orig_bases
-            and hasattr(orig_bases[0], "__args__")
+            and hasattr(orig_bases[0], '__args__')
             and len(orig_bases[0].__args__) >= 2
         ):
             return orig_bases[0].__args__[0]
         raise TypeError(
-            f"{self.__class__.__name__} must define InputModel as a class attribute "
-            "or use proper generic typing with AsyncLLMTask[InputModel, OutputModel]"
+            f'{self.__class__.__name__} must define InputModel as a class attribute '
+            'or use proper generic typing with AsyncLLMTask[InputModel, OutputModel]'
         )
-    def _validate_and_convert_input(self, data: Union[BaseModel, dict]) -> BaseModel:
+    def _validate_and_convert_input(self, data: BaseModel | dict) -> BaseModel:
         """
         Validate and convert input data to the expected input model type.
@@ -243,10 +250,10 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
                 return input_model_type(**data)
             except Exception as e:
                 raise TypeError(
-                    f"Failed to convert input data to {input_model_type.__name__}: {e}"
+                    f'Failed to convert input data to {input_model_type.__name__}: {e}'
                 ) from e
-        raise TypeError("InputModel must be a subclass of BaseModel")
+        raise TypeError('InputModel must be a subclass of BaseModel')
     def _validate_output_model(self) -> type[BaseModel]:
         """
@@ -263,12 +270,10 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
             isinstance(output_model_type, type)
             and issubclass(output_model_type, BaseModel)
         ):
-            raise TypeError("OutputModel must be a subclass of BaseModel")
+            raise TypeError('OutputModel must be a subclass of BaseModel')
         return output_model_type
-    async def _base_call(
-        self, data: Union[BaseModel, dict]
-    ) -> ParsedOutput[OutputModelType]:
+    async def _base_call(self, data: BaseModel | dict) -> ParsedOutput[OutputModelType]:
         """
         Core method that handles language model interaction with type safety.
@@ -289,7 +294,7 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         return cast(
             ParsedOutput[OutputModelType],
             await self.lm.parse(
-                instruction=self.__doc__ or "",
+                instruction=self.__doc__ or '',
                 prompt=validated_input.model_dump_json(),
             ),
         )
@@ -311,21 +316,21 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         no_think_messages = copy.deepcopy(think_messages)
         # Update system message
-        if no_think_messages and "content" in no_think_messages[0]:
-            system_content = no_think_messages[0]["content"]
+        if no_think_messages and 'content' in no_think_messages[0]:
+            system_content = no_think_messages[0]['content']
             if isinstance(system_content, str):
-                no_think_messages[0]["content"] = system_content.replace(
-                    "/think", "/no_think"
+                no_think_messages[0]['content'] = system_content.replace(
+                    '/think', '/no_think'
                 )
         # Update assistant message (last message)
-        if len(no_think_messages) > 1 and "content" in no_think_messages[-1]:
-            assistant_content = no_think_messages[-1]["content"]
-            if isinstance(assistant_content, str) and "</think>" in assistant_content:
+        if len(no_think_messages) > 1 and 'content' in no_think_messages[-1]:
+            assistant_content = no_think_messages[-1]['content']
+            if isinstance(assistant_content, str) and '</think>' in assistant_content:
                 # Extract content after thinking block
-                post_think_content = assistant_content.split("</think>", 1)[1].strip()
-                no_think_messages[-1]["content"] = (
-                    f"<think>\n\n</think>\n\n{post_think_content}"
+                post_think_content = assistant_content.split('</think>', 1)[1].strip()
+                no_think_messages[-1]['content'] = (
+                    f'<think>\n\n</think>\n\n{post_think_content}'
                 )
         return no_think_messages
@@ -335,10 +340,10 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         input_data: InputModelType,
         think_messages: Messages,
         no_think_messages: Messages,
-        model_kwargs: Dict[str, Any],
+        model_kwargs: dict[str, Any],
         cache_dir: pathlib.Path,
-        expected_response: Optional[OutputModelType] = None,
-        label: Optional[str] = None,
+        expected_response: OutputModelType | None = None,
+        label: str | None = None,
     ) -> None:
         """
         Save training data to cache directory.
@@ -359,26 +364,26 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         # Prepare combined training data
         training_data = {
-            "think_messages": think_messages,
-            "no_think_messages": no_think_messages,
-            "model_kwargs": model_kwargs,
-            "input_data": input_data.model_dump(),
-            "label": label,
+            'think_messages': think_messages,
+            'no_think_messages': no_think_messages,
+            'model_kwargs': model_kwargs,
+            'input_data': input_data.model_dump(),
+            'label': label,
         }
         if expected_response is not None:
-            training_data["expected_response"] = expected_response.model_dump()
+            training_data['expected_response'] = expected_response.model_dump()
         # Save to file
-        training_file = class_cache_dir / f"{input_id}.json"
+        training_file = class_cache_dir / f'{input_id}.json'
         dump_json_or_pickle(training_data, str(training_file))
     async def _generate_training_data_with_thinking_mode(
         self,
         input_data: InputModelType,
-        expected_response: Optional[OutputModelType] = None,
-        label: Optional[str] = None,
-        cache_dir: Optional[pathlib.Path] = None,
+        expected_response: OutputModelType | None = None,
+        label: str | None = None,
+        cache_dir: pathlib.Path | None = None,
     ) -> OutputModelType:
         """
         Generate training data for both thinking and non-thinking modes.
@@ -398,22 +403,22 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         """
         # Execute the base call to get thinking mode data
         output = await self._base_call(input_data)
-        parsed_result = output["parsed"]
-        think_messages = output["messages"]
+        parsed_result = output['parsed']
+        think_messages = output['messages']
         # Create non-thinking mode equivalent
         no_think_messages = self._create_no_think_messages(think_messages)
         # Use default cache directory if none provided
         if cache_dir is None:
-            cache_dir = self.DEFAULT_CACHE_DIR or pathlib.Path("./cache")
+            cache_dir = self.DEFAULT_CACHE_DIR or pathlib.Path('./cache')
         # Save training data
         self._save_training_data(
             input_data=input_data,
             think_messages=think_messages,
             no_think_messages=no_think_messages,
-            model_kwargs=output["model_kwargs"],
+            model_kwargs=output['model_kwargs'],
             cache_dir=cache_dir,
             expected_response=expected_response,
             label=label,
@@ -433,8 +438,8 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
     async def __call__(
         self,
         input_data: InputModelType,
-        expected_response: Optional[OutputModelType] = None,
-        label: Optional[str] = None,
+        expected_response: OutputModelType | None = None,
+        label: str | None = None,
         **kwargs: Any,
     ) -> OutputModelType:
         """
@@ -459,13 +464,12 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
                 expected_response=expected_response,
                 label=label,
             )
-        else:
-            output = await self._base_call(input_data)
-            return output["parsed"]
+        output = await self._base_call(input_data)
+        return output['parsed']
     def generate_training_data(
         self, input_json: str, output_json: str
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """
         Generate training data in ShareGPT format for the given input/output pair.
@@ -488,16 +492,16 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         #         "as class attributes to use generate_training_data"
         #     )
-        system_prompt = self.__doc__ or ""
-        assert isinstance(input_json, str), "Input must be a JSON string"
-        assert isinstance(output_json, str), "Output must be a JSON string"
+        system_prompt = self.__doc__ or ''
+        assert isinstance(input_json, str), 'Input must be a JSON string'
+        assert isinstance(output_json, str), 'Output must be a JSON string'
         messages = get_conversation_one_turn(
             system_msg=system_prompt,
             user_msg=input_json,
             assistant_msg=output_json,
         )
-        return {"messages": messages}
+        return {'messages': messages}
     # Compatibility alias for other LLMTask implementations
     arun = __call__
@@ -506,8 +510,8 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         return self
     async def __aexit__(self, exc_type, exc_val, exc_tb):
-        if hasattr(self._lm, "_last_client"):
-            last_client = self._lm._last_client   # type: ignore
+        if hasattr(self._lm, '_last_client'):
+            last_client = self._lm._last_client  # type: ignore
             await last_client._client.aclose()
         else:
-            logger.warning("No last client to close")
+            logger.warning('No last client to close')

llm_utils/lm/async_lm/async_lm.py CHANGED Viewed

@@ -12,10 +12,10 @@ from typing import (
 from loguru import logger
 from openai import AuthenticationError, BadRequestError, OpenAI, RateLimitError
 from pydantic import BaseModel
-from speedy_utils import jloads
 # from llm_utils.lm.async_lm.async_llm_task import OutputModelType
 from llm_utils.lm.async_lm.async_lm_base import AsyncLMBase
+from speedy_utils import jloads
 from ._utils import (
     LegacyMsgs,
@@ -44,28 +44,32 @@ class AsyncLM(AsyncLMBase):
     def __init__(
         self,
         *,
-        model: Optional[str] = None,
-        response_model: Optional[type[BaseModel]] = None,
+        model: str | None = None,
+        response_model: type[BaseModel] | None = None,
         temperature: float = 0.0,
         max_tokens: int = 2_000,
         host: str = "localhost",
-        port: Optional[Union[int, str]] = None,
-        base_url: Optional[str] = None,
-        api_key: Optional[str] = None,
+        port: int | str | None = None,
+        base_url: str | None = None,
+        api_key: str | None = None,
         cache: bool = True,
         think: Literal[True, False, None] = None,
-        add_json_schema_to_instruction: Optional[bool] = None,
+        add_json_schema_to_instruction: bool | None = None,
         use_beta: bool = False,
-        ports: Optional[List[int]] = None,
+        ports: list[int] | None = None,
         top_p: float = 1.0,
         presence_penalty: float = 0.0,
         top_k: int = 1,
         repetition_penalty: float = 1.0,
-        frequency_penalty: Optional[float] = None,
+        frequency_penalty: float | None = None,
     ) -> None:
         if model is None:
-            models = OpenAI(base_url=f'http://{host}:{port}/v1', api_key='abc').models.list().data
+            models = (
+                OpenAI(base_url=f"http://{host}:{port}/v1", api_key="abc")
+                .models.list()
+                .data
+            )
             assert len(models) == 1, f"Found {len(models)} models, please specify one."
             model = models[0].id
             print(f"Using model: {model}")
@@ -86,24 +90,24 @@ class AsyncLM(AsyncLMBase):
             self.add_json_schema_to_instruction = True
         # Store all model-related parameters in model_kwargs
-        self.model_kwargs = dict(
-            model=model,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_p=top_p,
-            presence_penalty=presence_penalty,
-        )
-        self.extra_body = dict(
-            top_k=top_k,
-            repetition_penalty=repetition_penalty,
-            frequency_penalty=frequency_penalty,
-        )
+        self.model_kwargs = {
+            "model": model,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "top_p": top_p,
+            "presence_penalty": presence_penalty,
+        }
+        self.extra_body = {
+            "top_k": top_k,
+            "repetition_penalty": repetition_penalty,
+            "frequency_penalty": frequency_penalty,
+        }
     async def _unified_client_call(
         self,
         messages: RawMsgs,
-        extra_body: Optional[dict] = None,
-        max_tokens: Optional[int] = None,
+        extra_body: dict | None = None,
+        max_tokens: int | None = None,
     ) -> dict:
         """Unified method for all client interactions (caching handled by MAsyncOpenAI)."""
         converted_messages: Messages = (
@@ -139,7 +143,7 @@ class AsyncLM(AsyncLMBase):
     async def _call_and_parse(
         self,
         messages: list[dict],
-        response_model: Type[OutputModelType],
+        response_model: type[OutputModelType],
         json_schema: dict,
     ) -> tuple[dict, list[dict], OutputModelType]:
         """Unified call and parse with cache and error handling."""
@@ -198,7 +202,7 @@ class AsyncLM(AsyncLMBase):
     async def _call_and_parse_with_beta(
         self,
         messages: list[dict],
-        response_model: Type[OutputModelType],
+        response_model: type[OutputModelType],
         json_schema: dict,
     ) -> tuple[dict, list[dict], OutputModelType]:
         """Call and parse for beta mode with guided JSON."""
@@ -249,9 +253,9 @@ class AsyncLM(AsyncLMBase):
     async def call_with_messages(
         self,
-        prompt: Optional[str] = None,
-        messages: Optional[RawMsgs] = None,
-        max_tokens: Optional[int] = None,
+        prompt: str | None = None,
+        messages: RawMsgs | None = None,
+        max_tokens: int | None = None,
     ):  # -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:# -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:
         """Unified async call for language model, returns (assistant_message.model_dump(), messages)."""
         if (prompt is None) == (messages is None):
@@ -268,9 +272,9 @@ class AsyncLM(AsyncLMBase):
             else cast(Messages, messages)
         )
-        assert self.model_kwargs["model"] is not None, (
-            "Model must be set before making a call."
-        )
+        assert (
+            self.model_kwargs["model"] is not None
+        ), "Model must be set before making a call."
         # Use unified client call
         raw_response = await self._unified_client_call(
@@ -293,17 +297,19 @@ class AsyncLM(AsyncLMBase):
             msg_dump = dict(assistant_msg)
         return msg_dump, full_messages
     def call_sync(
         self,
-        prompt: Optional[str] = None,
-        messages: Optional[RawMsgs] = None,
-        max_tokens: Optional[int] = None,
+        prompt: str | None = None,
+        messages: RawMsgs | None = None,
+        max_tokens: int | None = None,
     ):
         """Synchronous wrapper around the async __call__ method."""
         import asyncio
-        return asyncio.run(self.__call__(prompt=prompt, messages=messages, max_tokens=max_tokens))
+        return asyncio.run(
+            self.__call__(prompt=prompt, messages=messages, max_tokens=max_tokens)
+        )
     async def parse(
         self,
         instruction,
@@ -311,9 +317,9 @@ class AsyncLM(AsyncLMBase):
     ) -> ParsedOutput[BaseModel]:
         """Parse response using guided JSON generation. Returns (parsed.model_dump(), messages)."""
         if not self._use_beta:
-            assert self.add_json_schema_to_instruction, (
-                "add_json_schema_to_instruction must be True when use_beta is False. otherwise model will not be able to parse the response."
-            )
+            assert (
+                self.add_json_schema_to_instruction
+            ), "add_json_schema_to_instruction must be True when use_beta is False. otherwise model will not be able to parse the response."
         assert self.response_model is not None, "response_model must be set at init."
         json_schema = self.response_model.model_json_schema()
@@ -351,7 +357,7 @@ class AsyncLM(AsyncLMBase):
         )
     def _parse_complete_output(
-        self, completion: Any, response_model: Type[BaseModel]
+        self, completion: Any, response_model: type[BaseModel]
     ) -> BaseModel:
         """Parse completion output to response model."""
         if hasattr(completion, "model_dump"):

speedy-utils 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl

speedy-utils 1.1.27py3-none-any.whl → 1.1.29py3-none-any.whl