PyPI - speedy-utils - Versions diffs - 1.1.0__tar.gz → 1.1.3__tar.gz - Mend

speedy-utils 1.1.0tar.gz → 1.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

{speedy_utils-1.1.0 → speedy_utils-1.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: speedy-utils
-Version: 1.1.0
+Version: 1.1.3
 Summary: Fast and easy-to-use package for data science
 Author: AnhVTH
 Author-email: anhvth.226@gmail.com

{speedy_utils-1.1.0 → speedy_utils-1.1.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "speedy-utils"
-version = "1.1.0"
+version = "1.1.3"
 description = "Fast and easy-to-use package for data science"
 authors = ["AnhVTH <anhvth.226@gmail.com>"]
 readme = "README.md"

{speedy_utils-1.1.0 → speedy_utils-1.1.3}/src/llm_utils/lm/async_lm.py RENAMED Viewed

@@ -1,84 +1,10 @@
-"""
-# ============================================================================= #
-# ASYNCHRONOUS LANGUAGE MODEL WRAPPER WITH CONCURRENT EXECUTION SUPPORT
-# ============================================================================= #
-#
-# Title & Intent:
-# High-performance asynchronous language model interface for concurrent LLM operations
-#
-# High-level Summary:
-# This module provides an async drop-in replacement for the synchronous LM class, designed
-# for high-throughput applications requiring concurrent language model operations. It maintains
-# full API compatibility while adding async/await semantics, connection pooling, and efficient
-# resource management. The AsyncLM class supports batch processing, concurrent request handling,
-# and maintains the same caching and type safety guarantees as the synchronous version.
-#
-# Public API / Data Contracts:
-# • AsyncLM(model, temperature=0.0, max_tokens=2000, host="localhost", port=None, **kwargs) - Async wrapper class
-# • async AsyncLM.__call__(prompt=None, messages=None, response_format=str, cache=None, **kwargs) -> str | BaseModel
-# • async AsyncLM.list_models(port=None) -> List[str] - Enumerate available models
-# • async AsyncLM.count_tokens(messages, model=None) -> int - Token counting utility
-# • async AsyncLM.price(messages, model=None, response_tokens=0) -> float - Cost estimation
-# • AsyncLM.set_model(model_name) -> None - Runtime model switching (sync method)
-# • async AsyncLM.batch_call(requests) -> List[Union[str, BaseModel]] - Concurrent batch processing
-# • TModel = TypeVar("TModel", bound=BaseModel) - Generic type for structured responses
-# • Messages = List[ChatCompletionMessageParam] - Typed message format
-#
-# Invariants / Constraints:
-# • MUST be used within async context (asyncio event loop required)
-# • MUST provide either 'prompt' or 'messages' parameter, but not both
-# • MUST properly await all async method calls
-# • Connection pooling MUST handle concurrent requests efficiently
-# • MUST maintain thread safety across concurrent operations
-# • Rate limit handling MUST use async backoff without blocking event loop
-# • MUST preserve all synchronous LM class behaviors and constraints
-# • Resource cleanup MUST occur on context manager exit or explicit close
-#
-# Usage Example:
-# ```python
-# import asyncio
-# from llm_utils.lm.async_lm import AsyncLM
-# from pydantic import BaseModel
-#
-# class SummaryResponse(BaseModel):
-#     summary: str
-#     key_points: List[str]
-#     confidence: float
-#
-# async def main():
-#     # Single async call
-#     lm = AsyncLM(model="gpt-4o-mini", temperature=0.1)
-#     response = await lm(prompt="Summarize quantum computing")
-#     print(response)
-#
-#     # Concurrent batch processing
-#     texts = ["Text 1 to summarize", "Text 2 to summarize", "Text 3 to summarize"]
-#     tasks = [lm(prompt=f"Summarize: {text}", response_format=SummaryResponse) for text in texts]
-#     summaries = await asyncio.gather(*tasks)
-#
-#     for summary in summaries:
-#         print(f"Summary: {summary.summary}")
-#         print(f"Key points: {summary.key_points}")
-#
-# asyncio.run(main())
-# ```
-#
-# TODO & Future Work:
-# • Add async context manager support for automatic resource cleanup
-# • Implement connection pool size optimization based on usage patterns
-# • Add async streaming response support with async generators
-# • Optimize memory usage for large-scale concurrent operations
-# • Add async rate limiting with priority queuing
-#
-# ============================================================================= #
-"""
 import base64
 import hashlib
 import json
 import os
 from abc import ABC
-from functools import lru_cache
+from functools import cache, lru_cache
 from typing import (
     Any,
     Dict,
@@ -110,7 +36,7 @@ from openai.types.chat import (
 )
 from openai.types.model import Model
 from pydantic import BaseModel
+from pydantic import ValidationError
 from llm_utils.chat_format.display import get_conversation_one_turn
 # --------------------------------------------------------------------------- #
@@ -146,10 +72,13 @@ def _yellow(t):
     return _color(33, t)
-class ParsedOutput(TypedDict):
+TParsed = TypeVar("TParsed", bound=BaseModel)
+class ParsedOutput(TypedDict, Generic[TParsed]):
     messages: List
     completion: Any
-    parsed: BaseModel
+    parsed: TParsed
 class AsyncLM:
@@ -460,7 +389,7 @@ class AsyncLM:
     # ------------------------------------------------------------------ #
     async def parse(
         self,
-        response_model: Type[BaseModel],
+        response_model: Type[TParsed],
         instruction: Optional[str] = None,
         prompt: Optional[str] = None,
         messages: Optional[RawMsgs] = None,
@@ -470,7 +399,7 @@ class AsyncLM:
         max_tokens: Optional[int] = None,
         cache: Optional[bool] = True,
         **kwargs,
-    ) -> ParsedOutput:  # -> dict[str, Any]:
+    ) -> ParsedOutput[TParsed]:
         """Parse response using guided JSON generation."""
         if messages is None:
             assert instruction is not None, "Instruction must be provided."
@@ -513,6 +442,7 @@ class AsyncLM:
         use_cache = self.do_cache if cache is None else cache
         cache_key = None
+        completion = None
         if use_cache:
             cache_data = {
                 "messages": messages,
@@ -522,7 +452,7 @@ class AsyncLM:
             }
             cache_key = self._cache_key(cache_data, {}, response_model)
             completion = self._load_cache(cache_key)  # dict
-        else:
+        if not completion:
             completion = await self.client.chat.completions.create(
                 model=self.model,  # type: ignore
                 messages=messages,  # type: ignore
@@ -532,10 +462,12 @@ class AsyncLM:
             completion = completion.model_dump()
             if cache_key:
                 self._dump_cache(cache_key, completion)
+        assert isinstance(completion, dict), (
+            "Completion must be a dictionary with OpenAI response format."
+        )
         self.last_log = [prompt, messages, completion]
-        output = self._parse_complete_output(completion, response_model)
+        output = cast(TParsed, self._parse_complete_output(completion, response_model))
         full_messages = messages + [completion]
         return ParsedOutput(
             messages=full_messages,
@@ -555,7 +487,49 @@ class AsyncLM:
         content = completion["choices"][0]["message"]["content"]
         if not content:
-            raise ValueError("Empty content in response")
+            # Enhanced error for debugging: show input tokens and their count
+            # Try to extract tokens from the completion for debugging
+            input_tokens = None
+            try:
+                input_tokens = completion.get('usage', {}).get('prompt_tokens')
+            except Exception:
+                input_tokens = None
+            # Try to get the prompt/messages for tokenization
+            prompt = None
+            try:
+                prompt = completion.get('messages') or completion.get('prompt')
+            except Exception:
+                prompt = None
+            tokens_preview = ''
+            if prompt is not None:
+                try:
+                    tokenizer = get_tokenizer(self.model)
+                    if isinstance(prompt, list):
+                        prompt_text = '\n'.join(
+                            m.get('content', '') for m in prompt if isinstance(m, dict)
+                        )
+                    else:
+                        prompt_text = str(prompt)
+                    tokens = tokenizer.encode(prompt_text)
+                    n_tokens = len(tokens)
+                    first_100 = tokens[:100]
+                    last_100 = tokens[-100:] if n_tokens > 100 else []
+                    tokens_preview = (
+                        f'\nInput tokens: {n_tokens}'
+                        f'\nFirst 100 tokens: {first_100}'
+                        f'\nLast 100 tokens: {last_100}'
+                    )
+                except Exception as exc:
+                    tokens_preview = f'\n[Tokenization failed: {exc}]'
+            raise ValueError(
+                f'Empty content in response.'
+                f'\nInput tokens (if available): {input_tokens}'
+                f'{tokens_preview}'
+            )
         try:
             data = json.loads(content)
@@ -737,6 +711,7 @@ async def inspect_word_probs_async(lm, tokenizer, messages):
     """Async version of inspect_word_probs."""
     import numpy as np
     async def compute_word_log_probs(
         tokenizer: Any,
@@ -894,12 +869,14 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
     temperature: float = 0.6
     think: bool = False
     add_json_schema: bool = False
+    cache: bool = False
     async def __call__(
         self,
         data: BaseModel | dict,
         temperature: float = 0.1,
         cache: bool = False,
+        think: Optional[bool] = None,  # if not None, overrides self.think
     ) -> tuple[OutputModelType, List[Dict[str, Any]]]:
         # Get the input and output model types from the generic parameters
         type_args = getattr(self.__class__, "__orig_bases__", None)
@@ -940,9 +917,9 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
             instruction=self.__doc__ or "",
             response_model=output_model,
             temperature=temperature or self.temperature,
-            think=self.think,
+            think=think if think is not None else self.think,
             add_json_schema_to_instruction=self.add_json_schema,
-            cache=cache,
+            cache=self.cache or cache,
         )
         return (

speedy-utils 1.1.0__tar.gz → 1.1.3__tar.gz

speedy-utils 1.1.0tar.gz → 1.1.3tar.gz