PyPI - speedy-utils - Versions diffs - 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl - Mend

speedy-utils 1.1.27py3-none-any.whl → 1.1.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

llm_utils/__init__.py +16 -4
llm_utils/chat_format/__init__.py +10 -10
llm_utils/chat_format/display.py +33 -21
llm_utils/chat_format/transform.py +17 -19
llm_utils/chat_format/utils.py +6 -4
llm_utils/group_messages.py +17 -14
llm_utils/lm/__init__.py +6 -5
llm_utils/lm/async_lm/__init__.py +1 -0
llm_utils/lm/async_lm/_utils.py +10 -9
llm_utils/lm/async_lm/async_llm_task.py +141 -137
llm_utils/lm/async_lm/async_lm.py +48 -42
llm_utils/lm/async_lm/async_lm_base.py +59 -60
llm_utils/lm/async_lm/lm_specific.py +4 -3
llm_utils/lm/base_prompt_builder.py +93 -70
llm_utils/lm/llm.py +126 -108
llm_utils/lm/llm_signature.py +4 -2
llm_utils/lm/lm_base.py +72 -73
llm_utils/lm/mixins.py +102 -62
llm_utils/lm/openai_memoize.py +124 -87
llm_utils/lm/signature.py +105 -92
llm_utils/lm/utils.py +42 -23
llm_utils/scripts/vllm_load_balancer.py +23 -30
llm_utils/scripts/vllm_serve.py +8 -7
llm_utils/vector_cache/__init__.py +9 -3
llm_utils/vector_cache/cli.py +1 -1
llm_utils/vector_cache/core.py +59 -63
llm_utils/vector_cache/types.py +7 -5
llm_utils/vector_cache/utils.py +12 -8
speedy_utils/__imports.py +244 -0
speedy_utils/__init__.py +90 -194
speedy_utils/all.py +125 -227
speedy_utils/common/clock.py +37 -42
speedy_utils/common/function_decorator.py +6 -12
speedy_utils/common/logger.py +43 -52
speedy_utils/common/notebook_utils.py +13 -21
speedy_utils/common/patcher.py +21 -17
speedy_utils/common/report_manager.py +42 -44
speedy_utils/common/utils_cache.py +152 -169
speedy_utils/common/utils_io.py +137 -103
speedy_utils/common/utils_misc.py +15 -21
speedy_utils/common/utils_print.py +22 -28
speedy_utils/multi_worker/process.py +66 -79
speedy_utils/multi_worker/thread.py +78 -155
speedy_utils/scripts/mpython.py +38 -36
speedy_utils/scripts/openapi_client_codegen.py +10 -10
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/METADATA +1 -1
speedy_utils-1.1.29.dist-info/RECORD +57 -0
vision_utils/README.md +202 -0
vision_utils/__init__.py +4 -0
vision_utils/io_utils.py +735 -0
vision_utils/plot.py +345 -0
speedy_utils-1.1.27.dist-info/RECORD +0 -52
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/entry_points.txt +0 -0

llm_utils/lm/openai_memoize.py CHANGED Viewed

@@ -1,91 +1,128 @@
-from openai import OpenAI, AsyncOpenAI
-from typing import Any, Callable
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any
 from speedy_utils.common.utils_cache import memoize
-class MOpenAI(OpenAI):
-    """
-    MOpenAI(*args, **kwargs)
-    Subclass of OpenAI that transparently memoizes the instance's `post` method.
-    This class forwards all constructor arguments to the OpenAI base class and then
-    replaces the instance's `post` method with a memoized wrapper:
-    Behavior
-    - The memoized `post` caches responses based on the arguments with which it is
-        invoked, preventing repeated identical requests from invoking the underlying
-        OpenAI API repeatedly.
-    - Because `post` is replaced on the instance, the cache is by-default tied to
-        the MOpenAI instance (per-instance cache).
-    - Any initialization arguments are passed unchanged to OpenAI.__init__.
-    Notes and cautions
-    - The exact semantics of caching (cache key construction, expiry, max size,
-        persistence) depend on the implementation of `memoize`. Ensure that the
-        provided `memoize` supports the desired behavior (e.g., hashing of mutable
-        inputs, thread-safety, TTL, cache invalidation).
-    - If the original `post` method has important side effects or relies on
-        non-deterministic behavior, memoization may change program behavior.
-    - If you need a shared cache across instances, or more advanced cache controls,
-        modify `memoize` or wrap at a class/static level instead of assigning to the
-        bound method.
-    - Type information is now fully preserved by the memoize decorator, eliminating
-        the need for type casting.
-    Example
-            m = MOpenAI(api_key="...", model="gpt-4")
-            r1 = m.post("Hello")         # executes API call and caches result
-            r2 = m.post("Hello")         # returns cached result (no API call)
-    """
-    def __init__(self, *args, cache=True, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._orig_post = self.post
-        if cache:
-            self.set_cache(cache)
-    def set_cache(self, cache: bool) -> None:
-        """Enable or disable caching of the post method."""
-        if cache and self.post == self._orig_post:
-            self.post = memoize(self._orig_post)  # type: ignore
-        elif not cache and self.post != self._orig_post:
-            self.post = self._orig_post
-class MAsyncOpenAI(AsyncOpenAI):
-    """
-    MAsyncOpenAI(*args, **kwargs)
-    Async subclass of AsyncOpenAI that transparently memoizes the instance's `post` method.
-    This class forwards all constructor arguments to the AsyncOpenAI base class and then
-    replaces the instance's `post` method with a memoized wrapper:
-    Behavior
-    - The memoized `post` caches responses based on the arguments with which it is
-        invoked, preventing repeated identical requests from invoking the underlying
-        OpenAI API repeatedly.
-    - Because `post` is replaced on the instance, the cache is by-default tied to
-        the MAsyncOpenAI instance (per-instance cache).
-    - Any initialization arguments are passed unchanged to AsyncOpenAI.__init__.
-    Example
-            m = MAsyncOpenAI(api_key="...", model="gpt-4")
-            r1 = await m.post("Hello")    # executes API call and caches result
-            r2 = await m.post("Hello")    # returns cached result (no API call)
-    """
-    def __init__(self, *args, cache=True, **kwargs):
-        super().__init__(*args, **kwargs)
-        self._orig_post = self.post
-        if cache:
-            self.set_cache(cache)
-    def set_cache(self, cache: bool) -> None:
-        """Enable or disable caching of the post method."""
-        if cache and self.post == self._orig_post:
-            self.post = memoize(self._orig_post)  # type: ignore
-        elif not cache and self.post != self._orig_post:
-            self.post = self._orig_post
+if TYPE_CHECKING:
+    from openai import AsyncOpenAI, OpenAI
+def _get_mopenai_class():
+    """Lazily create MOpenAI class to avoid importing openai at module load."""
+    from openai import OpenAI
+    class MOpenAI(OpenAI):
+        """
+        MOpenAI(*args, **kwargs)
+        Subclass of OpenAI that transparently memoizes the instance's `post` method.
+        This class forwards all constructor arguments to the OpenAI base class and then
+        replaces the instance's `post` method with a memoized wrapper:
+        Behavior
+        - The memoized `post` caches responses based on the arguments with which it is
+            invoked, preventing repeated identical requests from invoking the underlying
+            OpenAI API repeatedly.
+        - Because `post` is replaced on the instance, the cache is by-default tied to
+            the MOpenAI instance (per-instance cache).
+        - Any initialization arguments are passed unchanged to OpenAI.__init__.
+        Notes and cautions
+        - The exact semantics of caching (cache key construction, expiry, max size,
+            persistence) depend on the implementation of `memoize`. Ensure that the
+            provided `memoize` supports the desired behavior (e.g., hashing of mutable
+            inputs, thread-safety, TTL, cache invalidation).
+        - If the original `post` method has important side effects or relies on
+            non-deterministic behavior, memoization may change program behavior.
+        - If you need a shared cache across instances, or more advanced cache controls,
+            modify `memoize` or wrap at a class/static level instead of assigning to the
+            bound method.
+        - Type information is now fully preserved by the memoize decorator, eliminating
+            the need for type casting.
+        Example
+                m = MOpenAI(api_key="...", model="gpt-4")
+                r1 = m.post("Hello")         # executes API call and caches result
+                r2 = m.post("Hello")         # returns cached result (no API call)
+        """
+        def __init__(self, *args, cache=True, **kwargs):
+            super().__init__(*args, **kwargs)
+            self._orig_post = self.post
+            if cache:
+                self.set_cache(cache)
+        def set_cache(self, cache: bool) -> None:
+            """Enable or disable caching of the post method."""
+            if cache and self.post == self._orig_post:
+                self.post = memoize(self._orig_post)  # type: ignore
+            elif not cache and self.post != self._orig_post:
+                self.post = self._orig_post
+    return MOpenAI
+def _get_masyncopenai_class():
+    """Lazily create MAsyncOpenAI class to avoid importing openai at module load."""
+    from openai import AsyncOpenAI
+    class MAsyncOpenAI(AsyncOpenAI):
+        """
+        MAsyncOpenAI(*args, **kwargs)
+        Async subclass of AsyncOpenAI that transparently memoizes the instance's `post` method.
+        This class forwards all constructor arguments to the AsyncOpenAI base class and then
+        replaces the instance's `post` method with a memoized wrapper:
+        Behavior
+        - The memoized `post` caches responses based on the arguments with which it is
+            invoked, preventing repeated identical requests from invoking the underlying
+            OpenAI API repeatedly.
+        - Because `post` is replaced on the instance, the cache is by-default tied to
+            the MAsyncOpenAI instance (per-instance cache).
+        - Any initialization arguments are passed unchanged to AsyncOpenAI.__init__.
+        Example
+                m = MAsyncOpenAI(api_key="...", model="gpt-4")
+                r1 = await m.post("Hello")    # executes API call and caches result
+                r2 = await m.post("Hello")    # returns cached result (no API call)
+        """
+        def __init__(self, *args, cache=True, **kwargs):
+            super().__init__(*args, **kwargs)
+            self._orig_post = self.post
+            if cache:
+                self.set_cache(cache)
+        def set_cache(self, cache: bool) -> None:
+            """Enable or disable caching of the post method."""
+            if cache and self.post == self._orig_post:
+                self.post = memoize(self._orig_post)  # type: ignore
+            elif not cache and self.post != self._orig_post:
+                self.post = self._orig_post
+    return MAsyncOpenAI
+# Cache the classes so they're only created once
+_MOpenAI_class = None
+_MAsyncOpenAI_class = None
+def MOpenAI(*args, **kwargs):
+    """Factory function for MOpenAI that lazily loads openai module."""
+    global _MOpenAI_class
+    if _MOpenAI_class is None:
+        _MOpenAI_class = _get_mopenai_class()
+    return _MOpenAI_class(*args, **kwargs)
+def MAsyncOpenAI(*args, **kwargs):
+    """Factory function for MAsyncOpenAI that lazily loads openai module."""
+    global _MAsyncOpenAI_class
+    if _MAsyncOpenAI_class is None:
+        _MAsyncOpenAI_class = _get_masyncopenai_class()
+    return _MAsyncOpenAI_class(*args, **kwargs)

llm_utils/lm/signature.py CHANGED Viewed

@@ -5,14 +5,25 @@ This module provides a declarative way to define LLM input/output schemas
 with field descriptions and type annotations.
 """
-from typing import Any, Dict, List, Type, get_type_hints, Annotated, get_origin, get_args, cast
-from pydantic import BaseModel, Field
 import inspect
+from typing import (
+    Annotated,
+    Any,
+    Dict,
+    List,
+    Type,
+    cast,
+    get_args,
+    get_origin,
+    get_type_hints,
+)
+from pydantic import BaseModel, Field
 class _FieldProxy:
     """Proxy that stores field information while appearing type-compatible."""
     def __init__(self, field_type: str, desc: str = "", **kwargs):
         self.field_type = field_type  # 'input' or 'output'
         self.desc = desc
@@ -21,12 +32,12 @@ class _FieldProxy:
 def InputField(desc: str = "", **kwargs) -> Any:
     """Create an input field descriptor."""
-    return cast(Any, _FieldProxy('input', desc=desc, **kwargs))
+    return cast(Any, _FieldProxy("input", desc=desc, **kwargs))
 def OutputField(desc: str = "", **kwargs) -> Any:
-    """Create an output field descriptor."""
-    return cast(Any, _FieldProxy('output', desc=desc, **kwargs))
+    """Create an output field descriptor."""
+    return cast(Any, _FieldProxy("output", desc=desc, **kwargs))
 # Type aliases for cleaner syntax
@@ -42,19 +53,19 @@ def Output(desc: str = "", **kwargs) -> Any:
 class SignatureMeta(type):
     """Metaclass for Signature that processes field annotations."""
     def __new__(cls, name, bases, namespace, **kwargs):
         # Get type hints for this class
-        annotations = namespace.get('__annotations__', {})
+        annotations = namespace.get("__annotations__", {})
         # Store field information
         input_fields = {}
         output_fields = {}
         for field_name, field_type in annotations.items():
             field_value = namespace.get(field_name)
             field_desc = None
             # Handle Annotated[Type, Field(...)] syntax using get_origin/get_args
             if get_origin(field_type) is Annotated:
                 # Extract args from Annotated type
@@ -67,163 +78,163 @@ class SignatureMeta(type):
                         if isinstance(metadata, _FieldProxy):
                             field_desc = metadata
                             break
             # Handle old syntax with direct assignment
             if field_desc is None and isinstance(field_value, _FieldProxy):
                 field_desc = field_value
             # Store field information
-            if field_desc and field_desc.field_type == 'input':
+            if field_desc and field_desc.field_type == "input":
                 input_fields[field_name] = {
-                    'type': field_type,
-                    'desc': field_desc.desc,
-                    **field_desc.kwargs
+                    "type": field_type,
+                    "desc": field_desc.desc,
+                    **field_desc.kwargs,
                 }
-            elif field_desc and field_desc.field_type == 'output':
+            elif field_desc and field_desc.field_type == "output":
                 output_fields[field_name] = {
-                    'type': field_type,
-                    'desc': field_desc.desc,
-                    **field_desc.kwargs
+                    "type": field_type,
+                    "desc": field_desc.desc,
+                    **field_desc.kwargs,
                 }
         # Store in class attributes
-        namespace['_input_fields'] = input_fields
-        namespace['_output_fields'] = output_fields
+        namespace["_input_fields"] = input_fields
+        namespace["_output_fields"] = output_fields
         return super().__new__(cls, name, bases, namespace)
 class Signature(metaclass=SignatureMeta):
     """Base class for defining LLM signatures with input and output fields."""
-    _input_fields: Dict[str, Dict[str, Any]] = {}
-    _output_fields: Dict[str, Dict[str, Any]] = {}
+    _input_fields: dict[str, dict[str, Any]] = {}
+    _output_fields: dict[str, dict[str, Any]] = {}
     def __init__(self, **kwargs):
         """Initialize signature with field values."""
         for field_name, value in kwargs.items():
             setattr(self, field_name, value)
     @classmethod
     def get_instruction(cls) -> str:
         """Generate instruction text from docstring and field descriptions."""
         instruction = cls.__doc__ or "Complete the following task."
         instruction = instruction.strip()
         # Add input field descriptions
         if cls._input_fields:
             instruction += "\n\n**Input Fields:**\n"
             for field_name, field_info in cls._input_fields.items():
-                desc = field_info.get('desc', '')
-                field_type = field_info['type']
-                type_str = getattr(field_type, '__name__', str(field_type))
+                desc = field_info.get("desc", "")
+                field_type = field_info["type"]
+                type_str = getattr(field_type, "__name__", str(field_type))
                 instruction += f"- {field_name} ({type_str}): {desc}\n"
         # Add output field descriptions
         if cls._output_fields:
             instruction += "\n**Output Fields:**\n"
             for field_name, field_info in cls._output_fields.items():
-                desc = field_info.get('desc', '')
-                field_type = field_info['type']
-                type_str = getattr(field_type, '__name__', str(field_type))
+                desc = field_info.get("desc", "")
+                field_type = field_info["type"]
+                type_str = getattr(field_type, "__name__", str(field_type))
                 instruction += f"- {field_name} ({type_str}): {desc}\n"
         return instruction
     @classmethod
-    def get_input_model(cls) -> Type[BaseModel]:
+    def get_input_model(cls) -> type[BaseModel]:
         """Generate Pydantic input model from input fields."""
         if not cls._input_fields:
-            raise ValueError(f"Signature {cls.__name__} must have at least one input field")
+            raise ValueError(
+                f"Signature {cls.__name__} must have at least one input field"
+            )
         fields = {}
         annotations = {}
         for field_name, field_info in cls._input_fields.items():
-            field_type = field_info['type']
-            desc = field_info.get('desc', '')
+            field_type = field_info["type"]
+            desc = field_info.get("desc", "")
             # Create Pydantic field
-            field_kwargs = {k: v for k, v in field_info.items()
-                          if k not in ['type', 'desc']}
+            field_kwargs = {
+                k: v for k, v in field_info.items() if k not in ["type", "desc"]
+            }
             if desc:
-                field_kwargs['description'] = desc
+                field_kwargs["description"] = desc
             fields[field_name] = Field(**field_kwargs) if field_kwargs else Field()
             annotations[field_name] = field_type
         # Create dynamic Pydantic model
         input_model = type(
             f"{cls.__name__}Input",
             (BaseModel,),
-            {
-                '__annotations__': annotations,
-                **fields
-            }
+            {"__annotations__": annotations, **fields},
         )
         return input_model
     @classmethod
-    def get_output_model(cls) -> Type[BaseModel]:
+    def get_output_model(cls) -> type[BaseModel]:
         """Generate Pydantic output model from output fields."""
         if not cls._output_fields:
-            raise ValueError(f"Signature {cls.__name__} must have at least one output field")
+            raise ValueError(
+                f"Signature {cls.__name__} must have at least one output field"
+            )
         fields = {}
         annotations = {}
         for field_name, field_info in cls._output_fields.items():
-            field_type = field_info['type']
-            desc = field_info.get('desc', '')
+            field_type = field_info["type"]
+            desc = field_info.get("desc", "")
             # Create Pydantic field
-            field_kwargs = {k: v for k, v in field_info.items()
-                          if k not in ['type', 'desc']}
+            field_kwargs = {
+                k: v for k, v in field_info.items() if k not in ["type", "desc"]
+            }
             if desc:
-                field_kwargs['description'] = desc
+                field_kwargs["description"] = desc
             fields[field_name] = Field(**field_kwargs) if field_kwargs else Field()
             annotations[field_name] = field_type
         # Create dynamic Pydantic model
         output_model = type(
             f"{cls.__name__}Output",
             (BaseModel,),
-            {
-                '__annotations__': annotations,
-                **fields
-            }
+            {"__annotations__": annotations, **fields},
         )
         return output_model
     def format_input(self, **kwargs) -> str:
         """Format input fields as a string."""
         input_data = {}
         # Collect input field values
         for field_name in self._input_fields:
             if field_name in kwargs:
                 input_data[field_name] = kwargs[field_name]
             elif hasattr(self, field_name):
                 input_data[field_name] = getattr(self, field_name)
         # Format as key-value pairs
         formatted_lines = []
         for field_name, value in input_data.items():
             field_info = self._input_fields[field_name]
-            desc = field_info.get('desc', '')
+            desc = field_info.get("desc", "")
             if desc:
                 formatted_lines.append(f"{field_name} ({desc}): {value}")
             else:
                 formatted_lines.append(f"{field_name}: {value}")
-        return '\n'.join(formatted_lines)
+        return "\n".join(formatted_lines)
 # Export functions for easier importing
-__all__ = ['Signature', 'InputField', 'OutputField', 'Input', 'Output']
+__all__ = ["Signature", "InputField", "OutputField", "Input", "Output"]
 # Example usage for testing
@@ -231,41 +242,43 @@ if __name__ == "__main__":
     # Define a signature like DSPy - using Annotated approach
     class FactJudge(Signature):
         """Judge if the answer is factually correct based on the context."""
         context: Annotated[str, Input("Context for the prediction")]
         question: Annotated[str, Input("Question to be answered")]
         answer: Annotated[str, Input("Answer for the question")]
-        factually_correct: Annotated[bool, Output("Is the answer factually correct based on the context?")]
+        factually_correct: Annotated[
+            bool, Output("Is the answer factually correct based on the context?")
+        ]
     # Alternative syntax still works but will show type warnings
     class FactJudgeOldSyntax(Signature):
         """Judge if the answer is factually correct based on the context."""
         context: str = InputField(desc="Context for the prediction")  # type: ignore
         question: str = InputField(desc="Question to be answered")  # type: ignore
         answer: str = InputField(desc="Answer for the question")  # type: ignore
         factually_correct: bool = OutputField(desc="Is the answer factually correct based on the context?")  # type: ignore
     # Test both signatures
     for judge_class in [FactJudge, FactJudgeOldSyntax]:
         print(f"\n=== Testing {judge_class.__name__} ===")
         print("Instruction:")
         print(judge_class.get_instruction())
         print("\nInput Model:")
         input_model = judge_class.get_input_model()
         print(input_model.model_json_schema())
         print("\nOutput Model:")
         output_model = judge_class.get_output_model()
         print(output_model.model_json_schema())
         # Test instance usage
         judge = judge_class()
         input_text = judge.format_input(
             context="The sky is blue during daytime.",
             question="What color is the sky?",
-            answer="Blue"
+            answer="Blue",
         )
         print("\nFormatted Input:")
-        print(input_text)
+        print(input_text)

speedy-utils 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl

speedy-utils 1.1.27py3-none-any.whl → 1.1.29py3-none-any.whl