PyPI - speedy-utils - Versions diffs - 1.1.9__tar.gz → 1.1.11__tar.gz - Mend

speedy-utils 1.1.9tar.gz → 1.1.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: speedy-utils
-Version: 1.1.9
+Version: 1.1.11
 Summary: Fast and easy-to-use package for data science
 Author: AnhVTH
 Author-email: anhvth.226@gmail.com

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "speedy-utils"
-version = "1.1.9"
+version = "1.1.11"
 description = "Fast and easy-to-use package for data science"
 authors = ["AnhVTH <anhvth.226@gmail.com>"]
 readme = "README.md"

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/src/llm_utils/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from llm_utils.lm.openai_memoize import MOpenAI
 from .chat_format import (
     build_chatml_input,
     display_chat_messages_as_html,
@@ -23,4 +24,5 @@ __all__ = [
     "display_chat_messages_as_html",
     "AsyncLM",
     "AsyncLLMTask",
+    "MOpenAI"
 ]

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/src/llm_utils/lm/async_lm/async_llm_task.py RENAMED Viewed

@@ -389,7 +389,7 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         input_data: InputModelType,
         expected_response: Optional[OutputModelType] = None,
         label: Optional[str] = None,
-        cache_dir: pathlib.Path = DEFAULT_CACHE_DIR,
+        cache_dir: Optional[pathlib.Path] = None,
     ) -> OutputModelType:
         """
         Generate training data for both thinking and non-thinking modes.
@@ -415,6 +415,10 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
         # Create non-thinking mode equivalent
         no_think_messages = self._create_no_think_messages(think_messages)
+        # Use default cache directory if none provided
+        if cache_dir is None:
+            cache_dir = self.DEFAULT_CACHE_DIR or pathlib.Path("./cache")
         # Save training data
         self._save_training_data(
             input_data=input_data,

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/src/llm_utils/lm/async_lm/async_lm.py RENAMED Viewed

@@ -96,67 +96,37 @@ class AsyncLM(AsyncLMBase):
     async def _unified_client_call(
         self,
-        messages: list[dict],
+        messages: RawMsgs,
         extra_body: Optional[dict] = None,
-        cache_suffix: str = "",
+        max_tokens: Optional[int] = None,
     ) -> dict:
-        """Unified method for all client interactions with caching and error handling."""
-        converted_messages = self._convert_messages(messages)
-        cache_key = None
-        completion = None
-        # Handle caching
-        if self._cache:
-            cache_data = {
+        """Unified method for all client interactions (caching handled by MAsyncOpenAI)."""
+        converted_messages: Messages = (
+            self._convert_messages(cast(LegacyMsgs, messages))
+            if messages and isinstance(messages[0], dict)
+            else cast(Messages, messages)
+        )
+        # override max_tokens if provided
+        if max_tokens is not None:
+            self.model_kwargs["max_tokens"] = max_tokens
+        try:
+            # Get completion from API (caching handled by MAsyncOpenAI)
+            call_kwargs = {
                 "messages": converted_messages,
-                "model_kwargs": self.model_kwargs,
-                "extra_body": extra_body or {},
-                "cache_suffix": cache_suffix,
+                **self.model_kwargs,
             }
-            cache_key = self._cache_key(cache_data, {}, str)
-            completion = self._load_cache(cache_key)
-        # Check for cached error responses
-        if (
-            completion
-            and isinstance(completion, dict)
-            and "error" in completion
-            and completion["error"]
-        ):
-            error_type = completion.get("error_type", "Unknown")
-            error_message = completion.get("error_message", "Cached error")
-            logger.warning(f"Found cached error ({error_type}): {error_message}")
-            raise ValueError(f"Cached {error_type}: {error_message}")
+            if extra_body:
+                call_kwargs["extra_body"] = extra_body
-        try:
-            # Get completion from API if not cached
-            if not completion:
-                call_kwargs = {
-                    "messages": converted_messages,
-                    **self.model_kwargs,
-                }
-                if extra_body:
-                    call_kwargs["extra_body"] = extra_body
-                completion = await self.client.chat.completions.create(**call_kwargs)
-                if hasattr(completion, "model_dump"):
-                    completion = completion.model_dump()
-                if cache_key:
-                    self._dump_cache(cache_key, completion)
+            completion = await self.client.chat.completions.create(**call_kwargs)
+            if hasattr(completion, "model_dump"):
+                completion = completion.model_dump()
         except (AuthenticationError, RateLimitError, BadRequestError) as exc:
             error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
             logger.error(error_msg)
-            if isinstance(exc, BadRequestError) and cache_key:
-                error_response = {
-                    "error": True,
-                    "error_type": "BadRequestError",
-                    "error_message": str(exc),
-                    "choices": [],
-                }
-                self._dump_cache(cache_key, error_response)
-                logger.debug(f"Cached BadRequestError for key: {cache_key}")
             raise
         return completion
@@ -179,7 +149,6 @@ class AsyncLM(AsyncLMBase):
             completion = await self._unified_client_call(
                 messages,
                 extra_body={**self.extra_body},
-                cache_suffix=f"_parse_{response_model.__name__}",
             )
             # Parse the response
@@ -234,7 +203,6 @@ class AsyncLM(AsyncLMBase):
             completion = await self._unified_client_call(
                 messages,
                 extra_body={"guided_json": json_schema, **self.extra_body},
-                cache_suffix=f"_beta_parse_{response_model.__name__}",
             )
             # Parse the response
@@ -277,6 +245,7 @@ class AsyncLM(AsyncLMBase):
         self,
         prompt: Optional[str] = None,
         messages: Optional[RawMsgs] = None,
+        max_tokens: Optional[int] = None,
     ):  # -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:# -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:
         """Unified async call for language model, returns (assistant_message.model_dump(), messages)."""
         if (prompt is None) == (messages is None):
@@ -299,7 +268,7 @@ class AsyncLM(AsyncLMBase):
         # Use unified client call
         raw_response = await self._unified_client_call(
-            list(openai_msgs), cache_suffix="_call"
+            list(openai_msgs), max_tokens=max_tokens
         )
         if hasattr(raw_response, "model_dump"):
@@ -385,3 +354,13 @@ class AsyncLM(AsyncLMBase):
             raise ValueError(
                 f"Failed to validate against response model {response_model.__name__}: {exc}\nRaw content: {content}"
             ) from exc
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if hasattr(self, "_last_client"):
+            last_client = self._last_client  # type: ignore
+            await last_client._client.aclose()
+        else:
+            logger.warning("No last client to close")

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/src/llm_utils/lm/async_lm/async_lm_base.py RENAMED Viewed

@@ -1,6 +1,4 @@
 # from ._utils import *
-import base64
-import hashlib
 import json
 import os
 from typing import (
@@ -26,6 +24,8 @@ from openai.types.chat import (
 from openai.types.model import Model
 from pydantic import BaseModel
+from llm_utils.lm.openai_memoize import MAsyncOpenAI
 from ._utils import (
     LegacyMsgs,
     Messages,
@@ -56,7 +56,7 @@ class AsyncLMBase:
         self._init_port = port  # <-- store the port provided at init
     @property
-    def client(self) -> AsyncOpenAI:
+    def client(self) -> MAsyncOpenAI:
         # if have multiple ports
         if self.ports:
             import random
@@ -66,9 +66,10 @@ class AsyncLMBase:
             logger.debug(f"Using port: {port}")
         else:
             api_base = self.base_url or f"http://{self._host}:{self._port}/v1"
-        client = AsyncOpenAI(
+        client = MAsyncOpenAI(
             api_key=self.api_key,
             base_url=api_base,
+            cache=self._cache,
         )
         self._last_client = client
         return client
@@ -176,175 +177,6 @@ class AsyncLMBase:
                 f"Model did not return valid JSON:\n---\n{raw_response}"
             ) from exc
-    # ------------------------------------------------------------------ #
-    # Simple disk cache (sync)
-    # ------------------------------------------------------------------ #
-    @staticmethod
-    def _cache_key(
-        messages: Any, kw: Any, response_format: Union[type[str], Type[BaseModel]]
-    ) -> str:
-        tag = response_format.__name__ if response_format is not str else "text"
-        blob = json.dumps([messages, kw, tag], sort_keys=True).encode()
-        return base64.urlsafe_b64encode(hashlib.sha256(blob).digest()).decode()[:22]
-    @staticmethod
-    def _cache_path(key: str) -> str:
-        return os.path.expanduser(f"~/.cache/lm/{key}.json")
-    def _dump_cache(self, key: str, val: Any) -> None:
-        try:
-            path = self._cache_path(key)
-            os.makedirs(os.path.dirname(path), exist_ok=True)
-            with open(path, "w") as fh:
-                if isinstance(val, BaseModel):
-                    json.dump(val.model_dump(mode="json"), fh)
-                else:
-                    json.dump(val, fh)
-        except Exception as exc:
-            logger.debug(f"cache write skipped: {exc}")
-    def _load_cache(self, key: str) -> Any | None:
-        path = self._cache_path(key)
-        if not os.path.exists(path):
-            return None
-        try:
-            with open(path) as fh:
-                return json.load(fh)
-        except Exception:
-            return None
-    # async def inspect_word_probs(
-    #     self,
-    #     messages: Optional[List[Dict[str, Any]]] = None,
-    #     tokenizer: Optional[Any] = None,
-    #     do_print=True,
-    #     add_think: bool = True,
-    # ) -> tuple[List[Dict[str, Any]], Any, str]:
-    #     """
-    #     Inspect word probabilities in a language model response.
-    #     Args:
-    #         tokenizer: Tokenizer instance to encode words.
-    #         messages: List of messages to analyze.
-    #     Returns:
-    #         A tuple containing:
-    #         - List of word probabilities with their log probabilities.
-    #         - Token log probability dictionaries.
-    #         - Rendered string with colored word probabilities.
-    #     """
-    #     if messages is None:
-    #         messages = await self.last_messages(add_think=add_think)
-    #         if messages is None:
-    #             raise ValueError("No messages provided and no last messages available.")
-    #     if tokenizer is None:
-    #         tokenizer = get_tokenizer(self.model)
-    #     ret = await inspect_word_probs_async(self, tokenizer, messages)
-    #     if do_print:
-    #         print(ret[-1])
-    #     return ret
-    # async def last_messages(
-    #     self, add_think: bool = True
-    # ) -> Optional[List[Dict[str, str]]]:
-    #     """Get the last conversation messages including assistant response."""
-    #     if not hasattr(self, "last_log"):
-    #         return None
-    #     last_conv = self._last_log
-    #     messages = last_conv[1] if len(last_conv) > 1 else None
-    #     last_msg = last_conv[2]
-    #     if not isinstance(last_msg, dict):
-    #         last_conv[2] = last_conv[2].model_dump()  # type: ignore
-    #     msg = last_conv[2]
-    #     # Ensure msg is a dict
-    #     if hasattr(msg, "model_dump"):
-    #         msg = msg.model_dump()
-    #     message = msg["choices"][0]["message"]
-    #     reasoning = message.get("reasoning_content")
-    #     answer = message.get("content")
-    #     if reasoning and add_think:
-    #         final_answer = f"<think>{reasoning}</think>\n{answer}"
-    #     else:
-    #         final_answer = f"<think>\n\n</think>\n{answer}"
-    #     assistant = {"role": "assistant", "content": final_answer}
-    #     messages = messages + [assistant]  # type: ignore
-    #     return messages if messages else None
-    # async def inspect_history(self) -> None:
-    #     """Inspect the conversation history with proper formatting."""
-    #     if not hasattr(self, "last_log"):
-    #         raise ValueError("No history available. Please call the model first.")
-    #     prompt, messages, response = self._last_log
-    #     if hasattr(response, "model_dump"):
-    #         response = response.model_dump()
-    #     if not messages:
-    #         messages = [{"role": "user", "content": prompt}]
-    #     print("\n\n")
-    #     print(_blue("[Conversation History]") + "\n")
-    #     for msg in messages:
-    #         role = msg["role"]
-    #         content = msg["content"]
-    #         print(_red(f"{role.capitalize()}:"))
-    #         if isinstance(content, str):
-    #             print(content.strip())
-    #         elif isinstance(content, list):
-    #             for item in content:
-    #                 if item.get("type") == "text":
-    #                     print(item["text"].strip())
-    #                 elif item.get("type") == "image_url":
-    #                     image_url = item["image_url"]["url"]
-    #                     if "base64" in image_url:
-    #                         len_base64 = len(image_url.split("base64,")[1])
-    #                         print(_blue(f"<IMAGE BASE64 ENCODED({len_base64})>"))
-    #                     else:
-    #                         print(_blue(f"<image_url: {image_url}>"))
-    #         print("\n")
-    #     print(_red("Response:"))
-    #     if isinstance(response, dict) and response.get("choices"):
-    #         message = response["choices"][0].get("message", {})
-    #         reasoning = message.get("reasoning_content")
-    #         parsed = message.get("parsed")
-    #         content = message.get("content")
-    #         if reasoning:
-    #             print(_yellow("<think>"))
-    #             print(reasoning.strip())
-    #             print(_yellow("</think>\n"))
-    #         if parsed:
-    #             print(
-    #                 json.dumps(
-    #                     (
-    #                         parsed.model_dump()
-    #                         if hasattr(parsed, "model_dump")
-    #                         else parsed
-    #                     ),
-    #                     indent=2,
-    #                 )
-    #                 + "\n"
-    #             )
-    #         elif content:
-    #             print(content.strip())
-    #         else:
-    #             print(_green("[No content]"))
-    #         if len(response["choices"]) > 1:
-    #             print(
-    #                 _blue(f"\n(Plus {len(response['choices']) - 1} other completions)")
-    #             )
-    #     else:
-    #         print(_yellow("Warning: Not a standard OpenAI response object"))
-    #         if isinstance(response, str):
-    #             print(_green(response.strip()))
-    #         elif isinstance(response, dict):
-    #             print(_green(json.dumps(response, indent=2)))
-    #         else:
-    #             print(_green(str(response)))
     # ------------------------------------------------------------------ #
     # Misc helpers
     # ------------------------------------------------------------------ #

speedy_utils-1.1.11/src/llm_utils/lm/openai_memoize.py ADDED Viewed

@@ -0,0 +1,72 @@
+from openai import OpenAI, AsyncOpenAI
+from speedy_utils.common.utils_cache import memoize
+class MOpenAI(OpenAI):
+    """
+    MOpenAI(*args, **kwargs)
+    Subclass of OpenAI that transparently memoizes the instance's `post` method.
+    This class forwards all constructor arguments to the OpenAI base class and then
+    replaces the instance's `post` method with a memoized wrapper:
+    Behavior
+    - The memoized `post` caches responses based on the arguments with which it is
+        invoked, preventing repeated identical requests from invoking the underlying
+        OpenAI API repeatedly.
+    - Because `post` is replaced on the instance, the cache is by-default tied to
+        the MOpenAI instance (per-instance cache).
+    - Any initialization arguments are passed unchanged to OpenAI.__init__.
+    Notes and cautions
+    - The exact semantics of caching (cache key construction, expiry, max size,
+        persistence) depend on the implementation of `memoize`. Ensure that the
+        provided `memoize` supports the desired behavior (e.g., hashing of mutable
+        inputs, thread-safety, TTL, cache invalidation).
+    - If the original `post` method has important side effects or relies on
+        non-deterministic behavior, memoization may change program behavior.
+    - If you need a shared cache across instances, or more advanced cache controls,
+        modify `memoize` or wrap at a class/static level instead of assigning to the
+        bound method.
+    Example
+            m = MOpenAI(api_key="...", model="gpt-4")
+            r1 = m.post("Hello")         # executes API call and caches result
+            r2 = m.post("Hello")         # returns cached result (no API call)
+    """
+    def __init__(self, *args, cache=True, **kwargs):
+        super().__init__(*args, **kwargs)
+        if cache:
+            self.post = memoize(self.post)
+class MAsyncOpenAI(AsyncOpenAI):
+    """
+    MAsyncOpenAI(*args, **kwargs)
+    Async subclass of AsyncOpenAI that transparently memoizes the instance's `post` method.
+    This class forwards all constructor arguments to the AsyncOpenAI base class and then
+    replaces the instance's `post` method with a memoized wrapper:
+    Behavior
+    - The memoized `post` caches responses based on the arguments with which it is
+        invoked, preventing repeated identical requests from invoking the underlying
+        OpenAI API repeatedly.
+    - Because `post` is replaced on the instance, the cache is by-default tied to
+        the MAsyncOpenAI instance (per-instance cache).
+    - Any initialization arguments are passed unchanged to AsyncOpenAI.__init__.
+    Example
+            m = MAsyncOpenAI(api_key="...", model="gpt-4")
+            r1 = await m.post("Hello")    # executes API call and caches result
+            r2 = await m.post("Hello")    # returns cached result (no API call)
+    """
+    def __init__(self, *args, cache=True, **kwargs):
+        super().__init__(*args, **kwargs)
+        if cache:
+            self.post = memoize(self.post)

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/src/llm_utils/scripts/vllm_serve.py RENAMED Viewed

@@ -72,6 +72,7 @@ import openai
 import requests
 from loguru import logger
+from llm_utils.lm.openai_memoize import MOpenAI
 from speedy_utils.common.utils_io import load_by_ext
 LORA_DIR: str = os.environ.get("LORA_DIR", "/loras")
@@ -82,7 +83,7 @@ logger.info(f"LORA_DIR: {LORA_DIR}")
 def model_list(host_port: str, api_key: str = "abc") -> None:
     """List models from the vLLM server."""
-    client = openai.OpenAI(base_url=f"http://{host_port}/v1", api_key=api_key)
+    client = MOpenAI(base_url=f"http://{host_port}/v1", api_key=api_key)
     models = client.models.list()
     for model in models:
         print(f"Model ID: {model.id}")

{speedy_utils-1.1.9 → speedy_utils-1.1.11}/src/speedy_utils/__init__.py RENAMED Viewed

@@ -108,7 +108,7 @@ from .common.notebook_utils import (
 )
 # Cache utilities
-from .common.utils_cache import amemoize, identify, identify_uuid, memoize
+from .common.utils_cache import identify, identify_uuid, memoize
 # IO utilities
 from .common.utils_io import (
@@ -197,7 +197,6 @@ __all__ = [
     # Function decorators
     "retry_runtime",
     # Cache utilities
-    "amemoize",
     "memoize",
     "identify",
     "identify_uuid",
@@ -227,5 +226,4 @@ __all__ = [
     "multi_thread",
     # Notebook utilities
     "change_dir",
-    "amemoize",
 ]

speedy-utils 1.1.9__tar.gz → 1.1.11__tar.gz

speedy-utils 1.1.9tar.gz → 1.1.11tar.gz