PyPI - speedy-utils - Versions diffs - 1.1.10__tar.gz → 1.1.12__tar.gz - Mend

speedy-utils 1.1.10tar.gz → 1.1.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: speedy-utils
-Version: 1.1.10
+Version: 1.1.12
 Summary: Fast and easy-to-use package for data science
 Author: AnhVTH
 Author-email: anhvth.226@gmail.com

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "speedy-utils"
-version = "1.1.10"
+version = "1.1.12"
 description = "Fast and easy-to-use package for data science"
 authors = ["AnhVTH <anhvth.226@gmail.com>"]
 readme = "README.md"

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
+from llm_utils.lm.openai_memoize import MOpenAI
 from .chat_format import (
     build_chatml_input,
     display_chat_messages_as_html,
@@ -23,4 +24,5 @@ __all__ = [
     "display_chat_messages_as_html",
     "AsyncLM",
     "AsyncLLMTask",
+    "MOpenAI"
 ]

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_lm.py RENAMED Viewed

@@ -9,7 +9,7 @@ from typing import (
 )
 from loguru import logger
-from openai import AuthenticationError, BadRequestError, RateLimitError
+from openai import AuthenticationError, BadRequestError, OpenAI, RateLimitError
 from pydantic import BaseModel
 from speedy_utils import jloads
@@ -43,8 +43,8 @@ class AsyncLM(AsyncLMBase):
     def __init__(
         self,
-        model: str,
         *,
+        model: Optional[str] = None,
         response_model: Optional[type[BaseModel]] = None,
         temperature: float = 0.0,
         max_tokens: int = 2_000,
@@ -63,6 +63,13 @@ class AsyncLM(AsyncLMBase):
         repetition_penalty: float = 1.0,
         frequency_penalty: Optional[float] = None,
     ) -> None:
+        if model is None:
+            models = OpenAI(base_url=f'http://{host}:{port}/v1', api_key='abc').models.list().data
+            assert len(models) == 1, f"Found {len(models)} models, please specify one."
+            model = models[0].id
+            print(f"Using model: {model}")
         super().__init__(
             host=host,
             port=port,
@@ -98,69 +105,35 @@ class AsyncLM(AsyncLMBase):
         self,
         messages: RawMsgs,
         extra_body: Optional[dict] = None,
-        cache_suffix: str = "",
+        max_tokens: Optional[int] = None,
     ) -> dict:
-        """Unified method for all client interactions with caching and error handling."""
+        """Unified method for all client interactions (caching handled by MAsyncOpenAI)."""
         converted_messages: Messages = (
             self._convert_messages(cast(LegacyMsgs, messages))
             if messages and isinstance(messages[0], dict)
             else cast(Messages, messages)
         )
-        cache_key = None
-        completion = None
+        # override max_tokens if provided
+        if max_tokens is not None:
+            self.model_kwargs["max_tokens"] = max_tokens
-        # Handle caching
-        if self._cache:
-            cache_data = {
+        try:
+            # Get completion from API (caching handled by MAsyncOpenAI)
+            call_kwargs = {
                 "messages": converted_messages,
-                "model_kwargs": self.model_kwargs,
-                "extra_body": extra_body or {},
-                "cache_suffix": cache_suffix,
+                **self.model_kwargs,
             }
-            cache_key = self._cache_key(cache_data, {}, str)
-            completion = self._load_cache(cache_key)
-        # Check for cached error responses
-        if (
-            completion
-            and isinstance(completion, dict)
-            and "error" in completion
-            and completion["error"]
-        ):
-            error_type = completion.get("error_type", "Unknown")
-            error_message = completion.get("error_message", "Cached error")
-            logger.warning(f"Found cached error ({error_type}): {error_message}")
-            raise ValueError(f"Cached {error_type}: {error_message}")
+            if extra_body:
+                call_kwargs["extra_body"] = extra_body
-        try:
-            # Get completion from API if not cached
-            if not completion:
-                call_kwargs = {
-                    "messages": converted_messages,
-                    **self.model_kwargs,
-                }
-                if extra_body:
-                    call_kwargs["extra_body"] = extra_body
-                completion = await self.client.chat.completions.create(**call_kwargs)
-                if hasattr(completion, "model_dump"):
-                    completion = completion.model_dump()
-                if cache_key:
-                    self._dump_cache(cache_key, completion)
+            completion = await self.client.chat.completions.create(**call_kwargs)
+            if hasattr(completion, "model_dump"):
+                completion = completion.model_dump()
         except (AuthenticationError, RateLimitError, BadRequestError) as exc:
             error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
             logger.error(error_msg)
-            if isinstance(exc, BadRequestError) and cache_key:
-                error_response = {
-                    "error": True,
-                    "error_type": "BadRequestError",
-                    "error_message": str(exc),
-                    "choices": [],
-                }
-                self._dump_cache(cache_key, error_response)
-                logger.debug(f"Cached BadRequestError for key: {cache_key}")
             raise
         return completion
@@ -183,7 +156,6 @@ class AsyncLM(AsyncLMBase):
             completion = await self._unified_client_call(
                 messages,
                 extra_body={**self.extra_body},
-                cache_suffix=f"_parse_{response_model.__name__}",
             )
             # Parse the response
@@ -238,7 +210,6 @@ class AsyncLM(AsyncLMBase):
             completion = await self._unified_client_call(
                 messages,
                 extra_body={"guided_json": json_schema, **self.extra_body},
-                cache_suffix=f"_beta_parse_{response_model.__name__}",
             )
             # Parse the response
@@ -281,6 +252,7 @@ class AsyncLM(AsyncLMBase):
         self,
         prompt: Optional[str] = None,
         messages: Optional[RawMsgs] = None,
+        max_tokens: Optional[int] = None,
     ):  # -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:# -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:
         """Unified async call for language model, returns (assistant_message.model_dump(), messages)."""
         if (prompt is None) == (messages is None):
@@ -303,7 +275,7 @@ class AsyncLM(AsyncLMBase):
         # Use unified client call
         raw_response = await self._unified_client_call(
-            list(openai_msgs), cache_suffix="_call"
+            list(openai_msgs), max_tokens=max_tokens
         )
         if hasattr(raw_response, "model_dump"):

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_lm_base.py RENAMED Viewed

@@ -1,6 +1,4 @@
 # from ._utils import *
-import base64
-import hashlib
 import json
 import os
 from typing import (
@@ -26,6 +24,8 @@ from openai.types.chat import (
 from openai.types.model import Model
 from pydantic import BaseModel
+from llm_utils.lm.openai_memoize import MAsyncOpenAI
 from ._utils import (
     LegacyMsgs,
     Messages,
@@ -56,7 +56,7 @@ class AsyncLMBase:
         self._init_port = port  # <-- store the port provided at init
     @property
-    def client(self) -> AsyncOpenAI:
+    def client(self) -> MAsyncOpenAI:
         # if have multiple ports
         if self.ports:
             import random
@@ -66,9 +66,10 @@ class AsyncLMBase:
             logger.debug(f"Using port: {port}")
         else:
             api_base = self.base_url or f"http://{self._host}:{self._port}/v1"
-        client = AsyncOpenAI(
+        client = MAsyncOpenAI(
             api_key=self.api_key,
             base_url=api_base,
+            cache=self._cache,
         )
         self._last_client = client
         return client
@@ -176,175 +177,6 @@ class AsyncLMBase:
                 f"Model did not return valid JSON:\n---\n{raw_response}"
             ) from exc
-    # ------------------------------------------------------------------ #
-    # Simple disk cache (sync)
-    # ------------------------------------------------------------------ #
-    @staticmethod
-    def _cache_key(
-        messages: Any, kw: Any, response_format: Union[type[str], Type[BaseModel]]
-    ) -> str:
-        tag = response_format.__name__ if response_format is not str else "text"
-        blob = json.dumps([messages, kw, tag], sort_keys=True).encode()
-        return base64.urlsafe_b64encode(hashlib.sha256(blob).digest()).decode()[:22]
-    @staticmethod
-    def _cache_path(key: str) -> str:
-        return os.path.expanduser(f"~/.cache/lm/{key}.json")
-    def _dump_cache(self, key: str, val: Any) -> None:
-        try:
-            path = self._cache_path(key)
-            os.makedirs(os.path.dirname(path), exist_ok=True)
-            with open(path, "w") as fh:
-                if isinstance(val, BaseModel):
-                    json.dump(val.model_dump(mode="json"), fh)
-                else:
-                    json.dump(val, fh)
-        except Exception as exc:
-            logger.debug(f"cache write skipped: {exc}")
-    def _load_cache(self, key: str) -> Any | None:
-        path = self._cache_path(key)
-        if not os.path.exists(path):
-            return None
-        try:
-            with open(path) as fh:
-                return json.load(fh)
-        except Exception:
-            return None
-    # async def inspect_word_probs(
-    #     self,
-    #     messages: Optional[List[Dict[str, Any]]] = None,
-    #     tokenizer: Optional[Any] = None,
-    #     do_print=True,
-    #     add_think: bool = True,
-    # ) -> tuple[List[Dict[str, Any]], Any, str]:
-    #     """
-    #     Inspect word probabilities in a language model response.
-    #     Args:
-    #         tokenizer: Tokenizer instance to encode words.
-    #         messages: List of messages to analyze.
-    #     Returns:
-    #         A tuple containing:
-    #         - List of word probabilities with their log probabilities.
-    #         - Token log probability dictionaries.
-    #         - Rendered string with colored word probabilities.
-    #     """
-    #     if messages is None:
-    #         messages = await self.last_messages(add_think=add_think)
-    #         if messages is None:
-    #             raise ValueError("No messages provided and no last messages available.")
-    #     if tokenizer is None:
-    #         tokenizer = get_tokenizer(self.model)
-    #     ret = await inspect_word_probs_async(self, tokenizer, messages)
-    #     if do_print:
-    #         print(ret[-1])
-    #     return ret
-    # async def last_messages(
-    #     self, add_think: bool = True
-    # ) -> Optional[List[Dict[str, str]]]:
-    #     """Get the last conversation messages including assistant response."""
-    #     if not hasattr(self, "last_log"):
-    #         return None
-    #     last_conv = self._last_log
-    #     messages = last_conv[1] if len(last_conv) > 1 else None
-    #     last_msg = last_conv[2]
-    #     if not isinstance(last_msg, dict):
-    #         last_conv[2] = last_conv[2].model_dump()  # type: ignore
-    #     msg = last_conv[2]
-    #     # Ensure msg is a dict
-    #     if hasattr(msg, "model_dump"):
-    #         msg = msg.model_dump()
-    #     message = msg["choices"][0]["message"]
-    #     reasoning = message.get("reasoning_content")
-    #     answer = message.get("content")
-    #     if reasoning and add_think:
-    #         final_answer = f"<think>{reasoning}</think>\n{answer}"
-    #     else:
-    #         final_answer = f"<think>\n\n</think>\n{answer}"
-    #     assistant = {"role": "assistant", "content": final_answer}
-    #     messages = messages + [assistant]  # type: ignore
-    #     return messages if messages else None
-    # async def inspect_history(self) -> None:
-    #     """Inspect the conversation history with proper formatting."""
-    #     if not hasattr(self, "last_log"):
-    #         raise ValueError("No history available. Please call the model first.")
-    #     prompt, messages, response = self._last_log
-    #     if hasattr(response, "model_dump"):
-    #         response = response.model_dump()
-    #     if not messages:
-    #         messages = [{"role": "user", "content": prompt}]
-    #     print("\n\n")
-    #     print(_blue("[Conversation History]") + "\n")
-    #     for msg in messages:
-    #         role = msg["role"]
-    #         content = msg["content"]
-    #         print(_red(f"{role.capitalize()}:"))
-    #         if isinstance(content, str):
-    #             print(content.strip())
-    #         elif isinstance(content, list):
-    #             for item in content:
-    #                 if item.get("type") == "text":
-    #                     print(item["text"].strip())
-    #                 elif item.get("type") == "image_url":
-    #                     image_url = item["image_url"]["url"]
-    #                     if "base64" in image_url:
-    #                         len_base64 = len(image_url.split("base64,")[1])
-    #                         print(_blue(f"<IMAGE BASE64 ENCODED({len_base64})>"))
-    #                     else:
-    #                         print(_blue(f"<image_url: {image_url}>"))
-    #         print("\n")
-    #     print(_red("Response:"))
-    #     if isinstance(response, dict) and response.get("choices"):
-    #         message = response["choices"][0].get("message", {})
-    #         reasoning = message.get("reasoning_content")
-    #         parsed = message.get("parsed")
-    #         content = message.get("content")
-    #         if reasoning:
-    #             print(_yellow("<think>"))
-    #             print(reasoning.strip())
-    #             print(_yellow("</think>\n"))
-    #         if parsed:
-    #             print(
-    #                 json.dumps(
-    #                     (
-    #                         parsed.model_dump()
-    #                         if hasattr(parsed, "model_dump")
-    #                         else parsed
-    #                     ),
-    #                     indent=2,
-    #                 )
-    #                 + "\n"
-    #             )
-    #         elif content:
-    #             print(content.strip())
-    #         else:
-    #             print(_green("[No content]"))
-    #         if len(response["choices"]) > 1:
-    #             print(
-    #                 _blue(f"\n(Plus {len(response['choices']) - 1} other completions)")
-    #             )
-    #     else:
-    #         print(_yellow("Warning: Not a standard OpenAI response object"))
-    #         if isinstance(response, str):
-    #             print(_green(response.strip()))
-    #         elif isinstance(response, dict):
-    #             print(_green(json.dumps(response, indent=2)))
-    #         else:
-    #             print(_green(str(response)))
     # ------------------------------------------------------------------ #
     # Misc helpers
     # ------------------------------------------------------------------ #

speedy_utils-1.1.12/src/llm_utils/lm/openai_memoize.py ADDED Viewed

@@ -0,0 +1,72 @@
+from openai import OpenAI, AsyncOpenAI
+from speedy_utils.common.utils_cache import memoize
+class MOpenAI(OpenAI):
+    """
+    MOpenAI(*args, **kwargs)
+    Subclass of OpenAI that transparently memoizes the instance's `post` method.
+    This class forwards all constructor arguments to the OpenAI base class and then
+    replaces the instance's `post` method with a memoized wrapper:
+    Behavior
+    - The memoized `post` caches responses based on the arguments with which it is
+        invoked, preventing repeated identical requests from invoking the underlying
+        OpenAI API repeatedly.
+    - Because `post` is replaced on the instance, the cache is by-default tied to
+        the MOpenAI instance (per-instance cache).
+    - Any initialization arguments are passed unchanged to OpenAI.__init__.
+    Notes and cautions
+    - The exact semantics of caching (cache key construction, expiry, max size,
+        persistence) depend on the implementation of `memoize`. Ensure that the
+        provided `memoize` supports the desired behavior (e.g., hashing of mutable
+        inputs, thread-safety, TTL, cache invalidation).
+    - If the original `post` method has important side effects or relies on
+        non-deterministic behavior, memoization may change program behavior.
+    - If you need a shared cache across instances, or more advanced cache controls,
+        modify `memoize` or wrap at a class/static level instead of assigning to the
+        bound method.
+    Example
+            m = MOpenAI(api_key="...", model="gpt-4")
+            r1 = m.post("Hello")         # executes API call and caches result
+            r2 = m.post("Hello")         # returns cached result (no API call)
+    """
+    def __init__(self, *args, cache=True, **kwargs):
+        super().__init__(*args, **kwargs)
+        if cache:
+            self.post = memoize(self.post)
+class MAsyncOpenAI(AsyncOpenAI):
+    """
+    MAsyncOpenAI(*args, **kwargs)
+    Async subclass of AsyncOpenAI that transparently memoizes the instance's `post` method.
+    This class forwards all constructor arguments to the AsyncOpenAI base class and then
+    replaces the instance's `post` method with a memoized wrapper:
+    Behavior
+    - The memoized `post` caches responses based on the arguments with which it is
+        invoked, preventing repeated identical requests from invoking the underlying
+        OpenAI API repeatedly.
+    - Because `post` is replaced on the instance, the cache is by-default tied to
+        the MAsyncOpenAI instance (per-instance cache).
+    - Any initialization arguments are passed unchanged to AsyncOpenAI.__init__.
+    Example
+            m = MAsyncOpenAI(api_key="...", model="gpt-4")
+            r1 = await m.post("Hello")    # executes API call and caches result
+            r2 = await m.post("Hello")    # returns cached result (no API call)
+    """
+    def __init__(self, *args, cache=True, **kwargs):
+        super().__init__(*args, **kwargs)
+        if cache:
+            self.post = memoize(self.post)

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/scripts/vllm_serve.py RENAMED Viewed

@@ -72,6 +72,7 @@ import openai
 import requests
 from loguru import logger
+from llm_utils.lm.openai_memoize import MOpenAI
 from speedy_utils.common.utils_io import load_by_ext
 LORA_DIR: str = os.environ.get("LORA_DIR", "/loras")
@@ -82,7 +83,7 @@ logger.info(f"LORA_DIR: {LORA_DIR}")
 def model_list(host_port: str, api_key: str = "abc") -> None:
     """List models from the vLLM server."""
-    client = openai.OpenAI(base_url=f"http://{host_port}/v1", api_key=api_key)
+    client = MOpenAI(base_url=f"http://{host_port}/v1", api_key=api_key)
     models = client.models.list()
     for model in models:
         print(f"Model ID: {model.id}")

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_cache.py RENAMED Viewed

@@ -459,7 +459,12 @@ def both_memoize(
         disk_result: Optional[R] = None
         with disk_lock:
             if osp.exists(cache_path):
-                disk_result = load_json_or_pickle(cache_path)
+                try:
+                    disk_result = load_json_or_pickle(cache_path)
+                except Exception:
+                    if osp.exists(cache_path):
+                        os.remove(cache_path)
+                    disk_result = None
         if disk_result is not None:
             with mem_lock:
@@ -555,6 +560,7 @@ def _async_both_memoize(
 # Public decorator (only export memoize)
 # --------------------------------------------------------------------------------------
 @overload
 def memoize(
     _func: Callable[P, R],
@@ -619,24 +625,34 @@ def memoize(
     """
     if "~/" in cache_dir:
         cache_dir = osp.expanduser(cache_dir)
+    from speedy_utils import timef
     def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
         is_async = inspect.iscoroutinefunction(func)
+        # Apply timing decorator if verbose=True
+        target_func = timef(func) if verbose else func
         if cache_type == "memory":
             if is_async:
-                return _async_memory_memoize(func, size, keys, ignore_self, key)  # type: ignore[return-value]
-            return _memory_memoize(func, size, keys, ignore_self, key)  # type: ignore[return-value]
+                return _async_memory_memoize(target_func, size, keys, ignore_self, key)  # type: ignore[return-value]
+            return _memory_memoize(target_func, size, keys, ignore_self, key)  # type: ignore[return-value]
         if cache_type == "disk":
             if is_async:
-                return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose, key)  # type: ignore[return-value]
-            return _disk_memoize(func, keys, cache_dir, ignore_self, verbose, key)  # type: ignore[return-value]
+                return _async_disk_memoize(
+                    target_func, keys, cache_dir, ignore_self, verbose, key
+                )  # type: ignore[return-value]
+            return _disk_memoize(
+                target_func, keys, cache_dir, ignore_self, verbose, key
+            )  # type: ignore[return-value]
         # cache_type == "both"
         if is_async:
-            return _async_both_memoize(func, keys, cache_dir, ignore_self, size, key)  # type: ignore[return-value]
-        return both_memoize(func, keys, cache_dir, ignore_self, size, key)  # type: ignore[return-value]
+            return _async_both_memoize(
+                target_func, keys, cache_dir, ignore_self, size, key
+            )  # type: ignore[return-value]
+        return both_memoize(target_func, keys, cache_dir, ignore_self, size, key)  # type: ignore[return-value]
     # Support both @memoize and @memoize(...)
     if _func is None:

{speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_io.py RENAMED Viewed

@@ -10,6 +10,7 @@ from pathlib import Path
 from typing import Any
 from json_repair import loads as jloads
+from pydantic import BaseModel
 from .utils_misc import mkdir_or_exist
@@ -46,8 +47,19 @@ def dump_json_or_pickle(
     elif fname.endswith(".jsonl"):
         dump_jsonl(obj, fname)
     elif fname.endswith(".pkl"):
-        with open(fname, "wb") as f:
-            pickle.dump(obj, f)
+        try:
+            with open(fname, "wb") as f:
+                pickle.dump(obj, f)
+        except Exception as e:
+            if isinstance(obj, BaseModel):
+                data = obj.model_dump()
+                from fastcore.all import obj2dict, dict2obj
+                obj2 = dict2obj(data)
+                with open(fname, "wb") as f:
+                    pickle.dump(obj2, f)
+            else:
+                raise ValueError(f"Error {e} while dumping {fname}") from e
     else:
         raise NotImplementedError(f"File type {fname} not supported")