speedy-utils 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
+
3
+ from difflib import SequenceMatcher
2
4
  from typing import Any, Optional
5
+
3
6
  from IPython.display import HTML, display
4
- from difflib import SequenceMatcher
5
7
 
6
8
 
7
9
  def show_chat(
@@ -19,6 +21,17 @@ def show_chat(
19
21
  isinstance(msg, dict) and "role" in msg and "content" in msg for msg in msgs
20
22
  ), "The input format is not recognized. Please specify the input format."
21
23
 
24
+ if isinstance(msgs[-1], dict) and "choices" in msgs[-1]:
25
+ message = msgs[-1]["choices"][0]["message"]
26
+ reasoning_content = message.get("reasoning_content")
27
+ content = message.get("content", "")
28
+ if reasoning_content:
29
+ content = reasoning_content + "\n" + content
30
+ msgs[-1] = {
31
+ "role": message["role"],
32
+ "content": content,
33
+ }
34
+
22
35
  themes: dict[str, dict[str, dict[str, str]]] = {
23
36
  "default": {
24
37
  "system": {"background": "#ffaaaa", "text": "#222222"}, # More red
@@ -156,9 +169,9 @@ def get_conversation_one_turn(
156
169
  if assistant_msg is not None:
157
170
  messages.append({"role": "assistant", "content": assistant_msg})
158
171
  if assistant_prefix is not None:
159
- assert (
160
- return_format != "chatml"
161
- ), 'Change return_format to "text" if you want to use assistant_prefix'
172
+ assert return_format != "chatml", (
173
+ 'Change return_format to "text" if you want to use assistant_prefix'
174
+ )
162
175
  assert messages[-1]["role"] == "user"
163
176
  from .transform import transform_messages
164
177
 
@@ -0,0 +1,2 @@
1
+ from .async_lm import AsyncLM
2
+ from .async_llm_task import AsyncLLMTask
@@ -0,0 +1,198 @@
1
+ from functools import lru_cache
2
+ from typing import (
3
+ Any,
4
+ Dict,
5
+ Generic,
6
+ List,
7
+ TypeVar,
8
+ Union,
9
+ )
10
+
11
+ # from openai.pagination import AsyncSyncPage
12
+ from openai.types.chat import (
13
+ ChatCompletionMessageParam,
14
+ )
15
+ from pydantic import BaseModel
16
+ from typing_extensions import TypedDict
17
+
18
+ # --------------------------------------------------------------------------- #
19
+ # type helpers
20
+ # --------------------------------------------------------------------------- #
21
+ TModel = TypeVar("TModel", bound=BaseModel)
22
+ Messages = List[ChatCompletionMessageParam]
23
+ LegacyMsgs = List[Dict[str, str]]
24
+ RawMsgs = Union[Messages, LegacyMsgs]
25
+
26
+ # --------------------------------------------------------------------------- #
27
+ # color helpers (unchanged)
28
+ # --------------------------------------------------------------------------- #
29
+
30
+
31
+ def _color(code: int, text: str) -> str:
32
+ return f"\x1b[{code}m{text}\x1b[0m"
33
+
34
+
35
+ def _red(t):
36
+ return _color(31, t)
37
+
38
+
39
+ def _green(t):
40
+ return _color(32, t)
41
+
42
+
43
+ def _blue(t):
44
+ return _color(34, t)
45
+
46
+
47
+ def _yellow(t):
48
+ return _color(33, t)
49
+
50
+
51
+ TParsed = TypeVar("TParsed", bound=BaseModel)
52
+
53
+
54
+ class ParsedOutput(TypedDict, Generic[TParsed]):
55
+ messages: List
56
+ completion: Any
57
+ parsed: TParsed
58
+
59
+
60
+ # --------------------------------------------------------------------------- #
61
+ # Module-level utility functions (async versions)
62
+ # --------------------------------------------------------------------------- #
63
+
64
+
65
+ @lru_cache(maxsize=10)
66
+ def get_tokenizer(model_name: str) -> Any:
67
+ """Get tokenizer for the given model."""
68
+ from transformers import AutoTokenizer # type: ignore
69
+
70
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
71
+ return tokenizer
72
+
73
+
74
+ async def inspect_word_probs_async(lm, tokenizer, messages):
75
+ """Async version of inspect_word_probs."""
76
+
77
+ import numpy as np
78
+
79
+ async def compute_word_log_probs(
80
+ tokenizer: Any,
81
+ lm_client: Any,
82
+ ) -> tuple[List[Dict[str, Any]], Any]:
83
+ # Build a prompt that preserves literal newlines
84
+ prompt = tokenizer.apply_chat_template(
85
+ messages,
86
+ tokenize=False, # Don't tokenize yet, we need raw text
87
+ add_generation_prompt=False, # No generation prompt needed
88
+ )
89
+
90
+ # Request token logprobs
91
+ response = await lm_client.client.completions.create(
92
+ model=lm_client.model, # type: ignore
93
+ prompt=prompt,
94
+ max_tokens=1,
95
+ logprobs=1,
96
+ extra_body={"prompt_logprobs": 0},
97
+ )
98
+ token_logprob_dicts = response.choices[0].prompt_logprobs # type: ignore
99
+
100
+ # Override first token to known start marker
101
+ start_id = tokenizer.encode("<|im_start|>")[0]
102
+ token_logprob_dicts[0] = {
103
+ str(start_id): {
104
+ "logprob": -1,
105
+ "rank": 1,
106
+ "decoded_token": "<|im_start|>",
107
+ }
108
+ }
109
+
110
+ # Flatten tokens
111
+ tokens: List[Dict[str, Any]] = [
112
+ {"id": int(tid), **tdata}
113
+ for td in token_logprob_dicts
114
+ for tid, tdata in td.items()
115
+ ]
116
+
117
+ # Validate tokenization
118
+ tokenized = tokenizer.tokenize(prompt)
119
+ if len(tokenized) != len(tokens):
120
+ raise ValueError(f"Token count mismatch: {len(tokenized)} vs {len(tokens)}")
121
+ for idx, tok in enumerate(tokens):
122
+ if tokenized[idx] != tok["decoded_token"]:
123
+ raise AssertionError(
124
+ f"Token mismatch at {idx}: "
125
+ f"{tokenized[idx]} != {tok['decoded_token']}"
126
+ )
127
+
128
+ # Split on newline sentinel
129
+ split_prompt = prompt.replace("\n", " <NL> ")
130
+ words = split_prompt.split()
131
+
132
+ word_log_probs: List[Dict[str, Any]] = []
133
+ token_idx = 0
134
+
135
+ for word in words:
136
+ # Map sentinel back to actual newline for encoding
137
+ target = "\n" if word == "<NL>" else word
138
+ sub_ids = tokenizer.encode(target, add_special_tokens=False)
139
+ count = len(sub_ids)
140
+ if count == 0:
141
+ continue
142
+
143
+ subs = tokens[token_idx : token_idx + count]
144
+ avg_logprob = sum(s["logprob"] for s in subs) / count
145
+ prob = float(np.exp(avg_logprob))
146
+ word_log_probs.append({"word": target, "probability": prob})
147
+ token_idx += count
148
+
149
+ return word_log_probs, token_logprob_dicts # type: ignore
150
+
151
+ def render_by_logprob(word_log_probs: List[Dict[str, Any]]) -> str:
152
+ """
153
+ Return an ANSI-colored string for word probabilities (red → green).
154
+ """
155
+ if not word_log_probs:
156
+ return ""
157
+
158
+ probs = [entry["probability"] for entry in word_log_probs]
159
+ min_p, max_p = min(probs), max(probs)
160
+ parts: List[str] = []
161
+
162
+ for entry in word_log_probs:
163
+ word = entry["word"]
164
+ # Preserve actual line breaks
165
+ if word == "\n":
166
+ parts.append("\n")
167
+ continue
168
+
169
+ p = entry["probability"]
170
+ norm = (p - min_p) / (max_p - min_p or 1.0)
171
+ r = int(255 * (1 - norm)) # red component (high when prob is low)
172
+ g = int(255 * norm) # green component (high when prob is high)
173
+ b = 0 # no blue for red-green gradient
174
+ colored = f"\x1b[38;2;{r};{g};{b}m{word}\x1b[0m"
175
+ parts.append(colored + " ")
176
+
177
+ return "".join(parts).rstrip()
178
+
179
+ word_probs, token_logprob_dicts = await compute_word_log_probs(tokenizer, lm)
180
+ return word_probs, token_logprob_dicts, render_by_logprob(word_probs)
181
+
182
+
183
+ __all__ = [
184
+ "TModel",
185
+ "Messages",
186
+ "LegacyMsgs",
187
+ "RawMsgs",
188
+ "TParsed",
189
+ "ParsedOutput",
190
+ "get_tokenizer",
191
+ "inspect_word_probs_async",
192
+ "_color",
193
+ "_red",
194
+ "_green",
195
+ "_blue",
196
+ "_yellow",
197
+ ]
198
+ # --------------------------------------------------------------------------- #]
@@ -0,0 +1,154 @@
1
+ from abc import ABC
2
+ from typing import (
3
+ Any,
4
+ Dict,
5
+ Generic,
6
+ List,
7
+ Optional,
8
+ TypeVar,
9
+ Union,
10
+ cast,
11
+ )
12
+
13
+ # from openai.pagination import AsyncSyncPage
14
+ from openai.types.chat import (
15
+ ChatCompletionMessageParam,
16
+ )
17
+ from pydantic import BaseModel
18
+
19
+ from llm_utils.chat_format.display import get_conversation_one_turn
20
+
21
+ from .async_lm import AsyncLM
22
+
23
+ # --------------------------------------------------------------------------- #
24
+ # type helpers
25
+ # --------------------------------------------------------------------------- #
26
+ TModel = TypeVar("TModel", bound=BaseModel)
27
+ Messages = List[ChatCompletionMessageParam]
28
+ LegacyMsgs = List[Dict[str, str]]
29
+ RawMsgs = Union[Messages, LegacyMsgs]
30
+
31
+
32
+ # --------------------------------------------------------------------------- #
33
+ # Async LLMTask class
34
+ # --------------------------------------------------------------------------- #
35
+
36
+ InputModelType = TypeVar("InputModelType", bound=BaseModel)
37
+ OutputModelType = TypeVar("OutputModelType", bound=BaseModel)
38
+
39
+
40
+ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
41
+ """
42
+ Async callable wrapper around an AsyncLM endpoint.
43
+
44
+ Sub-classes must set:
45
+ • lm – the async language-model instance
46
+ • InputModel – a Pydantic input class
47
+ • OutputModel – a Pydantic output class
48
+
49
+ Optional flags:
50
+ • temperature – float (default 0.6)
51
+ • think – bool (if the backend supports "chain-of-thought")
52
+ • add_json_schema – bool (include schema in the instruction)
53
+
54
+ The **docstring** of each sub-class is sent as the LM instruction.
55
+ Example
56
+ ```python
57
+ class DemoTask(AsyncLLMTask):
58
+ "TODO: SYSTEM_PROMPT_INSTURCTION HERE"
59
+
60
+ lm = AsyncLM(port=8130, cache=False, model="gpt-3.5-turbo")
61
+
62
+ class InputModel(BaseModel):
63
+ text_to_translate:str
64
+
65
+ class OutputModel(BaseModel):
66
+ translation:str
67
+ glossary_use:str
68
+
69
+ temperature = 0.6
70
+ think=False
71
+
72
+ demo_task = DemoTask()
73
+ result = await demo_task({'text_to_translate': 'Translate from english to vietnamese: Hello how are you'})
74
+ ```
75
+ """
76
+
77
+ lm: "AsyncLM"
78
+ InputModel: InputModelType
79
+ OutputModel: OutputModelType
80
+
81
+ temperature: float = 0.6
82
+ think: bool = False
83
+ add_json_schema: bool = False
84
+ cache: bool = False
85
+
86
+ async def __call__(
87
+ self,
88
+ data: BaseModel | dict,
89
+ temperature: float = 0.1,
90
+ cache: bool = False,
91
+ think: Optional[bool] = None, # if not None, overrides self.think
92
+ ) -> tuple[OutputModelType, List[Dict[str, Any]]]:
93
+ # Get the input and output model types from the generic parameters
94
+ type_args = getattr(self.__class__, "__orig_bases__", None)
95
+ if (
96
+ type_args
97
+ and hasattr(type_args[0], "__args__")
98
+ and len(type_args[0].__args__) >= 2
99
+ ):
100
+ input_model = type_args[0].__args__[0]
101
+ output_model = type_args[0].__args__[1]
102
+ else:
103
+ # Fallback to the old way if type introspection fails
104
+ if (
105
+ not hasattr(self, "InputModel")
106
+ or not hasattr(self, "OutputModel")
107
+ or not hasattr(self, "lm")
108
+ ):
109
+ raise NotImplementedError(
110
+ f"{self.__class__.__name__} must define lm, InputModel, and OutputModel as class attributes or use proper generic typing."
111
+ )
112
+ input_model = self.InputModel
113
+ output_model = self.OutputModel
114
+
115
+ # Ensure input_model is a class before calling
116
+ if isinstance(data, BaseModel):
117
+ item = data
118
+ elif isinstance(input_model, type) and issubclass(input_model, BaseModel):
119
+ item = input_model(**data)
120
+ else:
121
+ raise TypeError("InputModel must be a subclass of BaseModel")
122
+
123
+ assert isinstance(output_model, type) and issubclass(output_model, BaseModel), (
124
+ "OutputModel must be a subclass of BaseModel"
125
+ )
126
+
127
+ result = await self.lm.parse(
128
+ prompt=item.model_dump_json(),
129
+ instruction=self.__doc__ or "",
130
+ response_model=output_model,
131
+ temperature=temperature or self.temperature,
132
+ think=think if think is not None else self.think,
133
+ add_json_schema_to_instruction=self.add_json_schema,
134
+ cache=self.cache or cache,
135
+ )
136
+
137
+ return (
138
+ cast(OutputModelType, result["parsed"]), # type: ignore
139
+ cast(List[dict], result["messages"]), # type: ignore
140
+ )
141
+
142
+ def generate_training_data(
143
+ self, input_dict: Dict[str, Any], output: Dict[str, Any]
144
+ ) -> Dict[str, Any]:
145
+ """Return share gpt like format"""
146
+ system_prompt = self.__doc__ or ""
147
+ user_msg = self.InputModel(**input_dict).model_dump_json() # type: ignore[attr-defined]
148
+ assistant_msg = self.OutputModel(**output).model_dump_json() # type: ignore[attr-defined]
149
+ messages = get_conversation_one_turn(
150
+ system_msg=system_prompt, user_msg=user_msg, assistant_msg=assistant_msg
151
+ )
152
+ return {"messages": messages}
153
+
154
+ arun = __call__ # alias for compatibility with other LLMTask implementations