PyPI - speedy-utils - Versions diffs - 1.1.6__tar.gz → 1.1.8__tar.gz - Mend

speedy-utils 1.1.6tar.gz → 1.1.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{speedy_utils-1.1.6 → speedy_utils-1.1.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: speedy-utils
-Version: 1.1.6
+Version: 1.1.8
 Summary: Fast and easy-to-use package for data science
 Author: AnhVTH
 Author-email: anhvth.226@gmail.com

{speedy_utils-1.1.6 → speedy_utils-1.1.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "speedy-utils"
-version = "1.1.6"
+version = "1.1.8"
 description = "Fast and easy-to-use package for data science"
 authors = ["AnhVTH <anhvth.226@gmail.com>"]
 readme = "README.md"
@@ -66,16 +66,48 @@ svllm-lb = "llm_utils.scripts.vllm_load_balancer:run_load_balancer"
 openapi_client_codegen = "speedy_utils.scripts.openapi_client_codegen:main"
+[tool.ruff]
+exclude = [
+    "**/*.ipynb",
+    "notebooks/**/*.ipynb",
+    "legacy",
+    "**/__pycache__",
+    "**/.cache",
+    "**/.ruff_cache",
+    "**/.pytest_cache",
+    "**/.ipynb_checkpoints",
+    "**/.venv",
+    "**/.vscode",
+    "**/*.egg-info",
+    "**/*.lock",
+    "poetry.lock",
+    "Pipfile.lock",
+    "package-lock.json",
+    "yarn.lock",
+    "unsloth_compiled_cache",
+    "unsloth_training_checkpoints",
+]
+target-version = "py310"
+unsafe-fixes = true # allow deletions Ruff marks unsafe
+[tool.ruff.lint]
+ignore = [
+    "E401", # multiple imports on one line
+    "E402", # module level import not at top of file
+    "E501", # line too long
+    "F401", # unused import
+    "F403", # wildcard import
+    "F405", # name may be undefined, from wildcard import
+    "F841", # local variable assigned but never used
+    "E722", # do not use bare except
+    "E731", # do not assign a lambda expression, use a def
+    "E741", # ambiguous variable name
+    "E902", # io error
+]
+unfixable = ["E401", "E402", "E501", "F401", "F403"]
+extend-select = ["F"] # keep all pyflakes rules
 [tool.ruff.format]
 quote-style = "double"
 line-ending = "lf"
 docstring-code-format = true
-[tool.ruff]
-exclude = ["**/*.ipynb", "poly_frontend_controler/*", "poly_client/", "legacy"]
-ignore = [
-    "E501", # Line too long
-    "F401", # Unused import
-    "F403", # Wildcard import
-    "F841", # Local variable is assigned to but never used
-    "T201", # Use of `print` statement
-]

{speedy_utils-1.1.6 → speedy_utils-1.1.8}/src/llm_utils/__init__.py RENAMED Viewed

@@ -10,7 +10,6 @@ from .chat_format import (
     transform_messages_to_chatml,
 )
 from .lm.async_lm import AsyncLLMTask, AsyncLM
-from .lm.sync_lm import LM, LLMTask
 __all__ = [
     "transform_messages",
@@ -21,10 +20,7 @@ __all__ = [
     "display_conversations",
     "build_chatml_input",
     "format_msgs",
-    # "group_messages_by_len",
-    "LM",
-    "AsyncLM",
     "display_chat_messages_as_html",
-    "LLMTask",
+    "AsyncLM",
     "AsyncLLMTask",
 ]

{speedy_utils-1.1.6 → speedy_utils-1.1.8}/src/llm_utils/chat_format/transform.py RENAMED Viewed

@@ -16,9 +16,9 @@ def identify_format(item):
 def _transform_sharegpt_to_chatml(
     item, default_system_message="You are a helpful assistant.", print_msg=False
 ):
-    assert isinstance(
-        item, dict
-    ), "The item is not in the correct format. Please check the format of the item."
+    assert isinstance(item, dict), (
+        "The item is not in the correct format. Please check the format of the item."
+    )
     messages = []
     system_msg = item.get("system", "")
@@ -116,16 +116,16 @@ def transform_messages_to_chatml(input_data, input_format="auto"):
         input_data = deepcopy(input_data)
         if isinstance(input_data, list):
             input_format = "chatlm"
-            assert (
-                input_data[0].get("role") is not None
-            ), "The input format is not recognized. Please specify the input format."
+            assert input_data[0].get("role") is not None, (
+                "The input format is not recognized. Please specify the input format."
+            )
         elif isinstance(input_data, dict):
             input_data = _transform_sharegpt_to_chatml(input_data)
             input_format = "sharegpt"
         elif isinstance(input_data, str):
-            assert (
-                "<|im_end|>" in input_data
-            ), "The input format is not recognized. Please specify the input format."
+            assert "<|im_end|>" in input_data, (
+                "The input format is not recognized. Please specify the input format."
+            )
             input_format = "chatlm"
             parts = input_data.split("<|im_end|>")
             input_data = []

{speedy_utils-1.1.6 → speedy_utils-1.1.8}/src/llm_utils/group_messages.py RENAMED Viewed

@@ -76,7 +76,7 @@ def group_messages_by_len(
     """
     if messages is None:
         raise ValueError("messages parameter cannot be None")
-    from transformers.models.auto.tokenization_auto import AutoTokenizer # type: ignore
+    from transformers.models.auto.tokenization_auto import AutoTokenizer  # type: ignore
     tokenizer = AutoTokenizer.from_pretrained(model_name)

speedy_utils-1.1.8/src/llm_utils/lm/async_lm/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .async_llm_task import AsyncLLMTask
+from .async_lm import AsyncLM
+__all__ = [
+    "AsyncLM",
+    "AsyncLLMTask",
+]

{speedy_utils-1.1.6 → speedy_utils-1.1.8}/src/llm_utils/lm/async_lm/_utils.py RENAMED Viewed

@@ -48,13 +48,17 @@ def _yellow(t):
     return _color(33, t)
-TParsed = TypeVar("TParsed", bound=BaseModel)
+# TParsed = TypeVar("TParsed", bound=BaseModel)
+InputModelType = TypeVar("InputModelType", bound=BaseModel)
+OutputModelType = TypeVar("OutputModelType", bound=BaseModel)
-class ParsedOutput(TypedDict, Generic[TParsed]):
+class ParsedOutput(TypedDict, Generic[OutputModelType]):
     messages: List
     completion: Any
-    parsed: TParsed
+    parsed: OutputModelType
+    model_kwargs: Dict[str, Any]
 # --------------------------------------------------------------------------- #
@@ -185,7 +189,6 @@ __all__ = [
     "Messages",
     "LegacyMsgs",
     "RawMsgs",
-    "TParsed",
     "ParsedOutput",
     "get_tokenizer",
     "inspect_word_probs_async",

speedy_utils-1.1.8/src/llm_utils/lm/async_lm/async_llm_task.py ADDED Viewed

@@ -0,0 +1,516 @@
+"""
+Async LLM Task module for handling language model interactions with structured input/output.
+"""
+import copy
+import pathlib
+from abc import ABC
+from dataclasses import dataclass
+from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union, cast
+from openai.types.chat import ChatCompletionMessageParam
+from pydantic import BaseModel
+from pytest import Cache
+from speedy_utils import jdumps
+from speedy_utils.all import dump_json_or_pickle, identify
+from llm_utils.chat_format.display import get_conversation_one_turn
+from llm_utils.lm.async_lm._utils import InputModelType, OutputModelType, ParsedOutput
+from llm_utils.lm.async_lm.async_lm import AsyncLM
+# Type aliases for better readability
+TModel = TypeVar("TModel", bound=BaseModel)
+Messages = List[ChatCompletionMessageParam]
+LegacyMsgs = List[Dict[str, str]]
+RawMsgs = Union[Messages, LegacyMsgs]
+# Default configuration constants
+@dataclass
+class LMConfiguration:
+    """Configuration class for language model parameters."""
+    model: Optional[str] = None
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+    host: Optional[str] = None
+    port: Optional[Union[int, str]] = None
+    base_url: Optional[str] = None
+    api_key: Optional[str] = None
+    cache: Optional[bool] = True
+    think: Optional[Literal[True, False]] = None
+    add_json_schema_to_instruction: Optional[bool] = None
+    use_beta: Optional[bool] = False
+    ports: Optional[List[int]] = None
+    top_p: Optional[float] = None
+    presence_penalty: Optional[float] = None
+    top_k: Optional[int] = None
+    repetition_penalty: Optional[float] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert configuration to dictionary format."""
+        return {
+            "model": self.model,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "host": self.host,
+            "port": self.port,
+            "base_url": self.base_url,
+            "api_key": self.api_key,
+            "cache": self.cache,
+            "think": self.think,
+            "add_json_schema_to_instruction": self.add_json_schema_to_instruction,
+            "use_beta": self.use_beta,
+            "ports": self.ports,
+            "top_p": self.top_p,
+            "presence_penalty": self.presence_penalty,
+            "top_k": self.top_k,
+            "repetition_penalty": self.repetition_penalty,
+        }
+class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
+    """
+    Abstract base class for asynchronous language model tasks with structured I/O.
+    This class provides a framework for creating LLM tasks with strongly typed
+    input and output models, automatic training data collection, and support
+    for both thinking and non-thinking modes.
+    Type Parameters:
+        InputModelType: Pydantic model type for task input
+        OutputModelType: Pydantic model type for task output
+    """
+    InputModel: InputModelType
+    OutputModel: OutputModelType
+    # default class attributes for configuration
+    DEFAULT_MODEL: Optional[str] = None
+    DEFAULT_CACHE_DIR: Optional[pathlib.Path] = None
+    DEFAULT_TEMPERATURE: Optional[float] = None
+    DEFAULT_MAX_TOKENS: Optional[int] = None
+    DEFAULT_HOST: Optional[str] = None
+    DEFAULT_PORT: Optional[Union[int, str]] = None
+    DEFAULT_TOP_P: Optional[float] = None
+    DEFAULT_PRESENCE_PENALTY: Optional[float] = None
+    DEFAULT_TOP_K: Optional[int] = None
+    DEFAULT_REPETITION_PENALTY: Optional[float] = None
+    DEFAULT_CACHE: Optional[bool] = True
+    DEFAULT_THINK: Optional[Literal[True, False]] = None
+    DEFAULT_PORTS: Optional[List[int]] = None
+    DEFAULT_USE_BETA: Optional[bool] = False
+    DEFAULT_ADD_JSON_SCHEMA_TO_INSTRUCTION: Optional[bool] = True
+    DEFAULT_COLLECT_DATA: Optional[bool] = None
+    DEFAULT_BASE_URL: Optional[str] = None
+    DEFAULT_API_KEY: Optional[str] = None
+    IS_DATA_COLLECTION: bool = False
+    def __init__(
+        self,
+        model: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        host: Optional[str] = None,
+        port: Optional[Union[int, str]] = None,
+        base_url: Optional[str] = None,
+        api_key: Optional[str] = None,
+        cache: Optional[bool] = None,
+        think: Optional[Literal[True, False]] = None,
+        add_json_schema_to_instruction: Optional[bool] = None,
+        use_beta: Optional[bool] = None,
+        ports: Optional[List[int]] = None,
+        top_p: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        top_k: Optional[int] = None,
+        repetition_penalty: Optional[float] = None,
+    ) -> None:
+        """
+        Initialize the AsyncLLMTask with language model configuration.
+        All arguments are optional; defaults are taken from class attributes if not provided.
+        """
+        self._config = LMConfiguration(
+            model=model if model is not None else self.DEFAULT_MODEL,
+            temperature=temperature
+            if temperature is not None
+            else self.DEFAULT_TEMPERATURE,
+            max_tokens=max_tokens
+            if max_tokens is not None
+            else self.DEFAULT_MAX_TOKENS,
+            host=host if host is not None else self.DEFAULT_HOST,
+            port=port if port is not None else self.DEFAULT_PORT,
+            base_url=base_url if base_url is not None else self.DEFAULT_BASE_URL,
+            api_key=api_key if api_key is not None else self.DEFAULT_API_KEY,
+            cache=cache if cache is not None else self.DEFAULT_CACHE,
+            think=think if think is not None else self.DEFAULT_THINK,
+            add_json_schema_to_instruction=add_json_schema_to_instruction
+            if add_json_schema_to_instruction is not None
+            else self.DEFAULT_ADD_JSON_SCHEMA_TO_INSTRUCTION,
+            use_beta=use_beta if use_beta is not None else self.DEFAULT_USE_BETA,
+            ports=ports if ports is not None else self.DEFAULT_PORTS,
+            top_p=top_p if top_p is not None else self.DEFAULT_TOP_P,
+            presence_penalty=presence_penalty
+            if presence_penalty is not None
+            else self.DEFAULT_PRESENCE_PENALTY,
+            top_k=top_k if top_k is not None else self.DEFAULT_TOP_K,
+            repetition_penalty=repetition_penalty
+            if repetition_penalty is not None
+            else self.DEFAULT_REPETITION_PENALTY,
+        )
+        self._lm: Optional[AsyncLM] = None
+    @property
+    def lm(self) -> AsyncLM:
+        """
+        Lazy-loaded AsyncLM instance with proper configuration.
+        Returns:
+            Configured AsyncLM instance for this task
+        """
+        if self._lm is None:
+            self._lm = AsyncLM(
+                **self._config.to_dict(),
+                response_model=self._get_output_model_type(),
+            )
+        return self._lm
+    def _get_output_model_type(self) -> type[OutputModelType]:
+        """
+        Extract the output model type from generic type arguments.
+        Returns:
+            The OutputModelType class
+        Raises:
+            TypeError: If output model type cannot be determined
+        """
+        # Try to get type from generic base classes
+        orig_bases = getattr(self.__class__, "__orig_bases__", None)
+        if (
+            orig_bases
+            and hasattr(orig_bases[0], "__args__")
+            and len(orig_bases[0].__args__) >= 2
+        ):
+            return orig_bases[0].__args__[1]
+        # Fallback to class attribute
+        if hasattr(self, "OutputModel"):
+            return self.OutputModel  # type: ignore
+        raise TypeError(
+            f"{self.__class__.__name__} must define OutputModel as a class attribute "
+            "or use proper generic typing with AsyncLLMTask[InputModel, OutputModel]"
+        )
+    def _get_input_model_type(self) -> type[InputModelType]:
+        """
+        Extract the input model type from generic type arguments.
+        Returns:
+            The InputModelType class
+        Raises:
+            TypeError: If input model type cannot be determined
+        """
+        # Try to get type from generic base classes
+        orig_bases = getattr(self.__class__, "__orig_bases__", None)
+        if (
+            orig_bases
+            and hasattr(orig_bases[0], "__args__")
+            and len(orig_bases[0].__args__) >= 2
+        ):
+            return orig_bases[0].__args__[0]
+        raise TypeError(
+            f"{self.__class__.__name__} must define InputModel as a class attribute "
+            "or use proper generic typing with AsyncLLMTask[InputModel, OutputModel]"
+        )
+    def _validate_and_convert_input(self, data: Union[BaseModel, dict]) -> BaseModel:
+        """
+        Validate and convert input data to the expected input model type.
+        Args:
+            data: Input data as BaseModel instance or dictionary
+        Returns:
+            Validated BaseModel instance
+        Raises:
+            TypeError: If input data cannot be converted to InputModel
+        """
+        if isinstance(data, BaseModel):
+            return data
+        input_model_type = self._get_input_model_type()
+        if isinstance(input_model_type, type) and issubclass(
+            input_model_type, BaseModel
+        ):
+            try:
+                return input_model_type(**data)
+            except Exception as e:
+                raise TypeError(
+                    f"Failed to convert input data to {input_model_type.__name__}: {e}"
+                ) from e
+        raise TypeError("InputModel must be a subclass of BaseModel")
+    def _validate_output_model(self) -> type[BaseModel]:
+        """
+        Validate that the output model is properly configured.
+        Returns:
+            The validated output model type
+        Raises:
+            TypeError: If output model is not a valid BaseModel subclass
+        """
+        output_model_type = self._get_output_model_type()
+        if not (
+            isinstance(output_model_type, type)
+            and issubclass(output_model_type, BaseModel)
+        ):
+            raise TypeError("OutputModel must be a subclass of BaseModel")
+        return output_model_type
+    async def _base_call(
+        self, data: Union[BaseModel, dict]
+    ) -> ParsedOutput[OutputModelType]:
+        """
+        Core method that handles language model interaction with type safety.
+        Args:
+            data: Input data as BaseModel instance or dictionary
+        Returns:
+            Parsed output from the language model
+        Raises:
+            TypeError: If input/output models are not properly configured
+        """
+        # Validate input and output models
+        validated_input = self._validate_and_convert_input(data)
+        self._validate_output_model()
+        # Execute the language model call
+        return cast(
+            ParsedOutput[OutputModelType],
+            await self.lm.parse(
+                instruction=self.__doc__ or "",
+                prompt=validated_input.model_dump_json(),
+            ),
+        )
+    def _create_no_think_messages(self, think_messages: Messages) -> Messages:
+        """
+        Convert thinking mode messages to non-thinking mode.
+        Args:
+            think_messages: Original messages with thinking mode enabled
+        Returns:
+            Messages converted to non-thinking mode
+        """
+        if not think_messages:
+            return think_messages
+        # Create deep copy to avoid modifying original
+        no_think_messages = copy.deepcopy(think_messages)
+        # Update system message
+        if no_think_messages and "content" in no_think_messages[0]:
+            system_content = no_think_messages[0]["content"]
+            if isinstance(system_content, str):
+                no_think_messages[0]["content"] = system_content.replace(
+                    "/think", "/no_think"
+                )
+        # Update assistant message (last message)
+        if len(no_think_messages) > 1 and "content" in no_think_messages[-1]:
+            assistant_content = no_think_messages[-1]["content"]
+            if isinstance(assistant_content, str) and "</think>" in assistant_content:
+                # Extract content after thinking block
+                post_think_content = assistant_content.split("</think>", 1)[1].strip()
+                no_think_messages[-1]["content"] = (
+                    f"<think>\n\n</think>\n\n{post_think_content}"
+                )
+        return no_think_messages
+    def _save_training_data(
+        self,
+        input_data: InputModelType,
+        think_messages: Messages,
+        no_think_messages: Messages,
+        model_kwargs: Dict[str, Any],
+        cache_dir: pathlib.Path,
+        expected_response: Optional[OutputModelType] = None,
+        label: Optional[str] = None,
+    ) -> None:
+        """
+        Save training data to cache directory.
+        Args:
+            input_data: Input data for the task
+            think_messages: Messages with thinking mode
+            no_think_messages: Messages without thinking mode
+            model_kwargs: Model configuration used
+            cache_dir: Directory to save training data
+            expected_response: Expected response for validation
+            label: Optional label for the training data
+        """
+        # Create unique identifier for this input
+        input_id = identify(input_data.model_dump())
+        class_cache_dir = cache_dir / self.__class__.__name__
+        class_cache_dir.mkdir(parents=True, exist_ok=True)
+        # Prepare combined training data
+        training_data = {
+            "think_messages": think_messages,
+            "no_think_messages": no_think_messages,
+            "model_kwargs": model_kwargs,
+            "input_data": input_data.model_dump(),
+            "label": label,
+        }
+        if expected_response is not None:
+            training_data["expected_response"] = expected_response.model_dump()
+        # Save to file
+        training_file = class_cache_dir / f"{input_id}.json"
+        dump_json_or_pickle(training_data, str(training_file))
+    async def _generate_training_data_with_thinking_mode(
+        self,
+        input_data: InputModelType,
+        expected_response: Optional[OutputModelType] = None,
+        label: Optional[str] = None,
+        cache_dir: pathlib.Path = DEFAULT_CACHE_DIR,
+    ) -> OutputModelType:
+        """
+        Generate training data for both thinking and non-thinking modes.
+        This method executes the task in thinking mode, then creates equivalent
+        non-thinking mode data for training purposes. Both versions are saved
+        to the cache directory for later use in model training.
+        Args:
+            input_data: Input data for the task
+            expected_response: Expected response for validation
+            label: Optional label for the training data
+            cache_dir: Directory to save training data
+        Returns:
+            Parsed output from the language model
+        """
+        # Execute the base call to get thinking mode data
+        output = await self._base_call(input_data)
+        parsed_result = output["parsed"]
+        think_messages = output["messages"]
+        # Create non-thinking mode equivalent
+        no_think_messages = self._create_no_think_messages(think_messages)
+        # Save training data
+        self._save_training_data(
+            input_data=input_data,
+            think_messages=think_messages,
+            no_think_messages=no_think_messages,
+            model_kwargs=output["model_kwargs"],
+            cache_dir=cache_dir,
+            expected_response=expected_response,
+            label=label,
+        )
+        return parsed_result
+    def _should_collect_data(self) -> bool:
+        """
+        Determine if training data should be collected for this call.
+        Returns:
+            True if data collection is enabled
+        """
+        return self.IS_DATA_COLLECTION
+    async def __call__(
+        self,
+        input_data: InputModelType,
+        expected_response: Optional[OutputModelType] = None,
+        label: Optional[str] = None,
+        **kwargs: Any,
+    ) -> OutputModelType:
+        """
+        Execute the LLM task with the provided input data.
+        This is the main entry point for task execution. If data collection
+        is enabled (either via instance configuration or environment variable),
+        training data will be automatically generated and saved.
+        Args:
+            input_data: Input data conforming to InputModelType
+            expected_response: Expected response for validation during data collection
+            label: Optional label for training data categorization
+            **kwargs: Additional keyword arguments (for future extensibility)
+        Returns:
+            Parsed output conforming to OutputModelType
+        """
+        if self._should_collect_data():
+            return await self._generate_training_data_with_thinking_mode(
+                input_data=input_data,
+                expected_response=expected_response,
+                label=label,
+            )
+        else:
+            output = await self._base_call(input_data)
+            return output["parsed"]
+    def generate_training_data(
+        self, input_json: str, output_json: str
+    ) -> Dict[str, Any]:
+        """
+        Generate training data in ShareGPT format for the given input/output pair.
+        This method is useful for creating training datasets from existing
+        input/output pairs without executing the language model.
+        Args:
+            input_dict: Input data as dictionary
+            output: Output data as dictionary
+        Returns:
+            Training data in ShareGPT message format
+        Raises:
+            AttributeError: If InputModel or OutputModel are not properly defined
+        """
+        # if not hasattr(self, "InputModel") or not hasattr(self, "OutputModel"):
+        #     raise AttributeError(
+        #         f"{self.__class__.__name__} must define InputModel and OutputModel "
+        #         "as class attributes to use generate_training_data"
+        #     )
+        system_prompt = self.__doc__ or ""
+        assert isinstance(input_json, str), "Input must be a JSON string"
+        assert isinstance(output_json, str), "Output must be a JSON string"
+        messages = get_conversation_one_turn(
+            system_msg=system_prompt,
+            user_msg=input_json,
+            assistant_msg=output_json,
+        )
+        return {"messages": messages}
+    # Compatibility alias for other LLMTask implementations
+    arun = __call__
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self._lm and hasattr(self._lm, "aclose"):  # Or self._lm.client
+            await self._lm._last_client._client.aclose()

speedy-utils 1.1.6__tar.gz → 1.1.8__tar.gz

speedy-utils 1.1.6tar.gz → 1.1.8tar.gz