PyPI - speedy-utils - Versions diffs - 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl - Mend

speedy-utils 1.1.5py3-none-any.whl → 1.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

llm_utils/__init__.py +1 -5
llm_utils/chat_format/display.py +17 -4
llm_utils/chat_format/transform.py +9 -9
llm_utils/group_messages.py +1 -1
llm_utils/lm/async_lm/__init__.py +7 -0
llm_utils/lm/async_lm/_utils.py +201 -0
llm_utils/lm/async_lm/async_llm_task.py +509 -0
llm_utils/lm/async_lm/async_lm.py +387 -0
llm_utils/lm/async_lm/async_lm_base.py +405 -0
llm_utils/lm/async_lm/lm_specific.py +136 -0
llm_utils/lm/utils.py +1 -3
llm_utils/scripts/vllm_load_balancer.py +244 -147
speedy_utils/__init__.py +3 -1
speedy_utils/common/notebook_utils.py +4 -4
speedy_utils/common/report_manager.py +2 -3
speedy_utils/common/utils_cache.py +233 -3
speedy_utils/common/utils_io.py +2 -0
speedy_utils/scripts/mpython.py +1 -3
{speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/METADATA +1 -1
speedy_utils-1.1.7.dist-info/RECORD +39 -0
llm_utils/lm/async_lm.py +0 -942
llm_utils/lm/chat_html.py +0 -246
llm_utils/lm/lm_json.py +0 -68
llm_utils/lm/sync_lm.py +0 -943
speedy_utils-1.1.5.dist-info/RECORD +0 -37
{speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/WHEEL +0 -0
{speedy_utils-1.1.5.dist-info → speedy_utils-1.1.7.dist-info}/entry_points.txt +0 -0

llm_utils/__init__.py CHANGED Viewed

@@ -10,7 +10,6 @@ from .chat_format import (
     transform_messages_to_chatml,
 )
 from .lm.async_lm import AsyncLLMTask, AsyncLM
-from .lm.sync_lm import LM, LLMTask
 __all__ = [
     "transform_messages",
@@ -21,10 +20,7 @@ __all__ = [
     "display_conversations",
     "build_chatml_input",
     "format_msgs",
-    # "group_messages_by_len",
-    "LM",
-    "AsyncLM",
     "display_chat_messages_as_html",
-    "LLMTask",
+    "AsyncLM",
     "AsyncLLMTask",
 ]

llm_utils/chat_format/display.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from __future__ import annotations
+from difflib import SequenceMatcher
 from typing import Any, Optional
 from IPython.display import HTML, display
-from difflib import SequenceMatcher
 def show_chat(
@@ -19,6 +21,17 @@ def show_chat(
         isinstance(msg, dict) and "role" in msg and "content" in msg for msg in msgs
     ), "The input format is not recognized. Please specify the input format."
+    if isinstance(msgs[-1], dict) and "choices" in msgs[-1]:
+        message = msgs[-1]["choices"][0]["message"]
+        reasoning_content = message.get("reasoning_content")
+        content = message.get("content", "")
+        if reasoning_content:
+            content = reasoning_content + "\n" + content
+        msgs[-1] = {
+            "role": message["role"],
+            "content": content,
+        }
     themes: dict[str, dict[str, dict[str, str]]] = {
         "default": {
             "system": {"background": "#ffaaaa", "text": "#222222"},  # More red
@@ -156,9 +169,9 @@ def get_conversation_one_turn(
     if assistant_msg is not None:
         messages.append({"role": "assistant", "content": assistant_msg})
     if assistant_prefix is not None:
-        assert (
-            return_format != "chatml"
-        ), 'Change return_format to "text" if you want to use assistant_prefix'
+        assert return_format != "chatml", (
+            'Change return_format to "text" if you want to use assistant_prefix'
+        )
         assert messages[-1]["role"] == "user"
         from .transform import transform_messages

llm_utils/chat_format/transform.py CHANGED Viewed

@@ -16,9 +16,9 @@ def identify_format(item):
 def _transform_sharegpt_to_chatml(
     item, default_system_message="You are a helpful assistant.", print_msg=False
 ):
-    assert isinstance(
-        item, dict
-    ), "The item is not in the correct format. Please check the format of the item."
+    assert isinstance(item, dict), (
+        "The item is not in the correct format. Please check the format of the item."
+    )
     messages = []
     system_msg = item.get("system", "")
@@ -116,16 +116,16 @@ def transform_messages_to_chatml(input_data, input_format="auto"):
         input_data = deepcopy(input_data)
         if isinstance(input_data, list):
             input_format = "chatlm"
-            assert (
-                input_data[0].get("role") is not None
-            ), "The input format is not recognized. Please specify the input format."
+            assert input_data[0].get("role") is not None, (
+                "The input format is not recognized. Please specify the input format."
+            )
         elif isinstance(input_data, dict):
             input_data = _transform_sharegpt_to_chatml(input_data)
             input_format = "sharegpt"
         elif isinstance(input_data, str):
-            assert (
-                "<|im_end|>" in input_data
-            ), "The input format is not recognized. Please specify the input format."
+            assert "<|im_end|>" in input_data, (
+                "The input format is not recognized. Please specify the input format."
+            )
             input_format = "chatlm"
             parts = input_data.split("<|im_end|>")
             input_data = []

llm_utils/group_messages.py CHANGED Viewed

@@ -76,7 +76,7 @@ def group_messages_by_len(
     """
     if messages is None:
         raise ValueError("messages parameter cannot be None")
-    from transformers.models.auto.tokenization_auto import AutoTokenizer # type: ignore
+    from transformers.models.auto.tokenization_auto import AutoTokenizer  # type: ignore
     tokenizer = AutoTokenizer.from_pretrained(model_name)

llm_utils/lm/async_lm/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .async_llm_task import AsyncLLMTask
+from .async_lm import AsyncLM
+__all__ = [
+    "AsyncLM",
+    "AsyncLLMTask",
+]

llm_utils/lm/async_lm/_utils.py ADDED Viewed

@@ -0,0 +1,201 @@
+from functools import lru_cache
+from typing import (
+    Any,
+    Dict,
+    Generic,
+    List,
+    TypeVar,
+    Union,
+)
+# from openai.pagination import AsyncSyncPage
+from openai.types.chat import (
+    ChatCompletionMessageParam,
+)
+from pydantic import BaseModel
+from typing_extensions import TypedDict
+# --------------------------------------------------------------------------- #
+# type helpers
+# --------------------------------------------------------------------------- #
+TModel = TypeVar("TModel", bound=BaseModel)
+Messages = List[ChatCompletionMessageParam]
+LegacyMsgs = List[Dict[str, str]]
+RawMsgs = Union[Messages, LegacyMsgs]
+# --------------------------------------------------------------------------- #
+# color helpers (unchanged)
+# --------------------------------------------------------------------------- #
+def _color(code: int, text: str) -> str:
+    return f"\x1b[{code}m{text}\x1b[0m"
+def _red(t):
+    return _color(31, t)
+def _green(t):
+    return _color(32, t)
+def _blue(t):
+    return _color(34, t)
+def _yellow(t):
+    return _color(33, t)
+# TParsed = TypeVar("TParsed", bound=BaseModel)
+InputModelType = TypeVar("InputModelType", bound=BaseModel)
+OutputModelType = TypeVar("OutputModelType", bound=BaseModel)
+class ParsedOutput(TypedDict, Generic[OutputModelType]):
+    messages: List
+    completion: Any
+    parsed: OutputModelType
+    model_kwargs: Dict[str, Any]
+# --------------------------------------------------------------------------- #
+# Module-level utility functions (async versions)
+# --------------------------------------------------------------------------- #
+@lru_cache(maxsize=10)
+def get_tokenizer(model_name: str) -> Any:
+    """Get tokenizer for the given model."""
+    from transformers import AutoTokenizer  # type: ignore
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    return tokenizer
+async def inspect_word_probs_async(lm, tokenizer, messages):
+    """Async version of inspect_word_probs."""
+    import numpy as np
+    async def compute_word_log_probs(
+        tokenizer: Any,
+        lm_client: Any,
+    ) -> tuple[List[Dict[str, Any]], Any]:
+        # Build a prompt that preserves literal newlines
+        prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,  # Don't tokenize yet, we need raw text
+            add_generation_prompt=False,  # No generation prompt needed
+        )
+        # Request token logprobs
+        response = await lm_client.client.completions.create(
+            model=lm_client.model,  # type: ignore
+            prompt=prompt,
+            max_tokens=1,
+            logprobs=1,
+            extra_body={"prompt_logprobs": 0},
+        )
+        token_logprob_dicts = response.choices[0].prompt_logprobs  # type: ignore
+        # Override first token to known start marker
+        start_id = tokenizer.encode("<|im_start|>")[0]
+        token_logprob_dicts[0] = {
+            str(start_id): {
+                "logprob": -1,
+                "rank": 1,
+                "decoded_token": "<|im_start|>",
+            }
+        }
+        # Flatten tokens
+        tokens: List[Dict[str, Any]] = [
+            {"id": int(tid), **tdata}
+            for td in token_logprob_dicts
+            for tid, tdata in td.items()
+        ]
+        # Validate tokenization
+        tokenized = tokenizer.tokenize(prompt)
+        if len(tokenized) != len(tokens):
+            raise ValueError(f"Token count mismatch: {len(tokenized)} vs {len(tokens)}")
+        for idx, tok in enumerate(tokens):
+            if tokenized[idx] != tok["decoded_token"]:
+                raise AssertionError(
+                    f"Token mismatch at {idx}: "
+                    f"{tokenized[idx]} != {tok['decoded_token']}"
+                )
+        # Split on newline sentinel
+        split_prompt = prompt.replace("\n", " <NL> ")
+        words = split_prompt.split()
+        word_log_probs: List[Dict[str, Any]] = []
+        token_idx = 0
+        for word in words:
+            # Map sentinel back to actual newline for encoding
+            target = "\n" if word == "<NL>" else word
+            sub_ids = tokenizer.encode(target, add_special_tokens=False)
+            count = len(sub_ids)
+            if count == 0:
+                continue
+            subs = tokens[token_idx : token_idx + count]
+            avg_logprob = sum(s["logprob"] for s in subs) / count
+            prob = float(np.exp(avg_logprob))
+            word_log_probs.append({"word": target, "probability": prob})
+            token_idx += count
+        return word_log_probs, token_logprob_dicts  # type: ignore
+    def render_by_logprob(word_log_probs: List[Dict[str, Any]]) -> str:
+        """
+        Return an ANSI-colored string for word probabilities (red → green).
+        """
+        if not word_log_probs:
+            return ""
+        probs = [entry["probability"] for entry in word_log_probs]
+        min_p, max_p = min(probs), max(probs)
+        parts: List[str] = []
+        for entry in word_log_probs:
+            word = entry["word"]
+            # Preserve actual line breaks
+            if word == "\n":
+                parts.append("\n")
+                continue
+            p = entry["probability"]
+            norm = (p - min_p) / (max_p - min_p or 1.0)
+            r = int(255 * (1 - norm))  # red component (high when prob is low)
+            g = int(255 * norm)  # green component (high when prob is high)
+            b = 0  # no blue for red-green gradient
+            colored = f"\x1b[38;2;{r};{g};{b}m{word}\x1b[0m"
+            parts.append(colored + " ")
+        return "".join(parts).rstrip()
+    word_probs, token_logprob_dicts = await compute_word_log_probs(tokenizer, lm)
+    return word_probs, token_logprob_dicts, render_by_logprob(word_probs)
+__all__ = [
+    "TModel",
+    "Messages",
+    "LegacyMsgs",
+    "RawMsgs",
+    "ParsedOutput",
+    "get_tokenizer",
+    "inspect_word_probs_async",
+    "_color",
+    "_red",
+    "_green",
+    "_blue",
+    "_yellow",
+]
+# --------------------------------------------------------------------------- #]

speedy-utils 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

speedy-utils 1.1.5py3-none-any.whl → 1.1.7py3-none-any.whl