speedy-utils 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +10 -15
- llm_utils/lm/alm.py +24 -3
- llm_utils/lm/chat_html.py +244 -0
- llm_utils/lm/lm.py +390 -74
- llm_utils/lm/lm_json.py +72 -0
- llm_utils/scripts/README.md +48 -0
- llm_utils/scripts/example_vllm_client.py +269 -0
- llm_utils/scripts/requirements_example.txt +3 -0
- llm_utils/scripts/serve_script.sh +2 -0
- speedy_utils/__init__.py +96 -5
- speedy_utils/common/notebook_utils.py +63 -0
- speedy_utils/common/utils_cache.py +3 -3
- speedy_utils/common/utils_print.py +2 -65
- speedy_utils/multi_worker/process.py +7 -0
- speedy_utils/scripts/__init__.py +0 -0
- speedy_utils/scripts/mpython.py +3 -2
- speedy_utils/scripts/openapi_client_codegen.py +258 -0
- {speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/METADATA +1 -1
- {speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/RECORD +21 -12
- {speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/entry_points.txt +1 -0
- {speedy_utils-1.0.13.dist-info → speedy_utils-1.0.15.dist-info}/WHEEL +0 -0
llm_utils/__init__.py
CHANGED
|
@@ -1,20 +1,16 @@
|
|
|
1
1
|
from .chat_format import (
|
|
2
|
-
transform_messages,
|
|
3
|
-
transform_messages_to_chatml,
|
|
4
|
-
show_chat,
|
|
5
|
-
get_conversation_one_turn,
|
|
6
|
-
show_string_diff,
|
|
7
|
-
display_conversations,
|
|
8
2
|
build_chatml_input,
|
|
9
|
-
format_msgs,
|
|
10
3
|
display_chat_messages_as_html,
|
|
4
|
+
display_conversations,
|
|
5
|
+
format_msgs,
|
|
6
|
+
get_conversation_one_turn,
|
|
7
|
+
show_chat,
|
|
8
|
+
show_string_diff,
|
|
9
|
+
transform_messages,
|
|
10
|
+
transform_messages_to_chatml,
|
|
11
11
|
)
|
|
12
|
-
from .lm.lm import LM,
|
|
12
|
+
from .lm.lm import LM, LLMTask
|
|
13
13
|
from .lm.alm import AsyncLM
|
|
14
|
-
from .group_messages import (
|
|
15
|
-
split_indices_by_length,
|
|
16
|
-
group_messages_by_len,
|
|
17
|
-
)
|
|
18
14
|
|
|
19
15
|
__all__ = [
|
|
20
16
|
"transform_messages",
|
|
@@ -25,10 +21,9 @@ __all__ = [
|
|
|
25
21
|
"display_conversations",
|
|
26
22
|
"build_chatml_input",
|
|
27
23
|
"format_msgs",
|
|
28
|
-
"
|
|
29
|
-
"group_messages_by_len",
|
|
24
|
+
# "group_messages_by_len",
|
|
30
25
|
"LM",
|
|
31
|
-
"LMReasoner",
|
|
32
26
|
"AsyncLM",
|
|
33
27
|
"display_chat_messages_as_html",
|
|
28
|
+
"LLMTask",
|
|
34
29
|
]
|
llm_utils/lm/alm.py
CHANGED
|
@@ -34,7 +34,7 @@ from typing import (
|
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
from httpx import URL
|
|
37
|
-
from openai import AsyncOpenAI, AuthenticationError, RateLimitError
|
|
37
|
+
from openai import AsyncOpenAI, AuthenticationError, BadRequestError, RateLimitError
|
|
38
38
|
|
|
39
39
|
# from openai.pagination import AsyncSyncPage
|
|
40
40
|
from openai.types.chat import (
|
|
@@ -108,6 +108,7 @@ class AsyncLM:
|
|
|
108
108
|
# if have multiple ports
|
|
109
109
|
if self.ports:
|
|
110
110
|
import random
|
|
111
|
+
|
|
111
112
|
port = random.choice(self.ports)
|
|
112
113
|
api_base = f"http://{self.host}:{port}/v1"
|
|
113
114
|
logger.debug(f"Using port: {port}")
|
|
@@ -213,6 +214,13 @@ class AsyncLM:
|
|
|
213
214
|
self._cache_key(messages, kw, response_format) if use_cache else None
|
|
214
215
|
)
|
|
215
216
|
if cache_key and (hit := self._load_cache(cache_key)) is not None:
|
|
217
|
+
# Check if cached value is an error
|
|
218
|
+
if isinstance(hit, dict) and hit.get("error"):
|
|
219
|
+
error_type = hit.get("error_type", "Unknown")
|
|
220
|
+
error_msg = hit.get("error_message", "Cached error")
|
|
221
|
+
logger.warning(f"Found cached error ({error_type}): {error_msg}")
|
|
222
|
+
# Re-raise as a ValueError with meaningful message
|
|
223
|
+
raise ValueError(f"Cached {error_type}: {error_msg}")
|
|
216
224
|
return hit
|
|
217
225
|
|
|
218
226
|
try:
|
|
@@ -230,8 +238,21 @@ class AsyncLM:
|
|
|
230
238
|
**kw,
|
|
231
239
|
)
|
|
232
240
|
|
|
233
|
-
except (AuthenticationError, RateLimitError) as exc:
|
|
234
|
-
|
|
241
|
+
except (AuthenticationError, RateLimitError, BadRequestError) as exc:
|
|
242
|
+
error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
|
|
243
|
+
logger.error(error_msg)
|
|
244
|
+
|
|
245
|
+
# Cache the error if it's a BadRequestError to avoid repeated calls
|
|
246
|
+
if isinstance(exc, BadRequestError) and cache_key:
|
|
247
|
+
error_response = {
|
|
248
|
+
"error": True,
|
|
249
|
+
"error_type": "BadRequestError",
|
|
250
|
+
"error_message": str(exc),
|
|
251
|
+
"choices": [],
|
|
252
|
+
}
|
|
253
|
+
self._dump_cache(cache_key, error_response)
|
|
254
|
+
logger.debug(f"Cached BadRequestError for key: {cache_key}")
|
|
255
|
+
|
|
235
256
|
raise
|
|
236
257
|
|
|
237
258
|
if cache_key:
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
from .lm import *
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
# Configuration
|
|
5
|
+
DEFAULT_FONT_SIZE = 1 # Base font size in pixels
|
|
6
|
+
DEFAULT_CODE_FONT_SIZE = 1 # Code font size in pixels
|
|
7
|
+
DEFAULT_PADDING = [1] * 4 # Padding [top, right, bottom, left] in pixels
|
|
8
|
+
DEFAULT_INNER_PADDING = [1] * 4 # Inner padding [top, right, bottom, left]
|
|
9
|
+
thinking_tag = "think"
|
|
10
|
+
# Jupyter notebook detection and imports
|
|
11
|
+
try:
|
|
12
|
+
from IPython.display import display, HTML
|
|
13
|
+
from IPython import get_ipython
|
|
14
|
+
|
|
15
|
+
JUPYTER_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
JUPYTER_AVAILABLE = False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _is_jupyter_notebook() -> bool:
|
|
21
|
+
"""Check if running in Jupyter notebook environment."""
|
|
22
|
+
if not JUPYTER_AVAILABLE:
|
|
23
|
+
return False
|
|
24
|
+
try:
|
|
25
|
+
shell = get_ipython().__class__.__name__
|
|
26
|
+
return shell == "ZMQInteractiveShell"
|
|
27
|
+
except Exception:
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _parse_thinking_content(content: str) -> tuple[str, str]:
|
|
32
|
+
"""Parse content to separate thinking and answer sections during streaming."""
|
|
33
|
+
import re
|
|
34
|
+
|
|
35
|
+
# For streaming: detect if we're currently in thinking mode
|
|
36
|
+
think_start_match = re.search(r"<think[^>]*>", content, re.IGNORECASE)
|
|
37
|
+
if not think_start_match:
|
|
38
|
+
return "", content
|
|
39
|
+
|
|
40
|
+
think_start_pos = think_start_match.end()
|
|
41
|
+
|
|
42
|
+
# Look for closing tag
|
|
43
|
+
think_end_match = re.search(
|
|
44
|
+
r"</think[^>]*>", content[think_start_pos:], re.IGNORECASE
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if think_end_match:
|
|
48
|
+
# We have complete thinking section
|
|
49
|
+
thinking_content = content[
|
|
50
|
+
think_start_pos : think_start_pos + think_end_match.start()
|
|
51
|
+
].strip()
|
|
52
|
+
# Everything after </think> is answer content
|
|
53
|
+
answer_start = think_start_pos + think_end_match.end()
|
|
54
|
+
answer_content = content[answer_start:].strip()
|
|
55
|
+
return thinking_content, answer_content
|
|
56
|
+
else:
|
|
57
|
+
# Still in thinking mode (streaming), no closing tag yet
|
|
58
|
+
thinking_content = content[think_start_pos:].strip()
|
|
59
|
+
return thinking_content, ""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _get_chat_html_template(
|
|
63
|
+
content: str,
|
|
64
|
+
font_size: int = DEFAULT_FONT_SIZE,
|
|
65
|
+
padding: list[int] = DEFAULT_PADDING,
|
|
66
|
+
inner_padding: list[int] = DEFAULT_INNER_PADDING,
|
|
67
|
+
) -> str:
|
|
68
|
+
"""Generate HTML template with improved styling for chat display."""
|
|
69
|
+
code_font_size = max(font_size - 1, 10) # Code slightly smaller, min 10px
|
|
70
|
+
|
|
71
|
+
# Parse thinking and answer content
|
|
72
|
+
thinking_content, answer_content = _parse_thinking_content(content)
|
|
73
|
+
|
|
74
|
+
# Format padding as CSS value - reduce outer padding more
|
|
75
|
+
outer_padding_css = f"2px {padding[1]}px 2px {padding[3]}px"
|
|
76
|
+
inner_padding_css = f"2px {inner_padding[1]}px 2px {inner_padding[3]}px"
|
|
77
|
+
|
|
78
|
+
# Build thinking section HTML if present
|
|
79
|
+
thinking_html = ""
|
|
80
|
+
if thinking_content:
|
|
81
|
+
# Show as open during streaming, closed when complete
|
|
82
|
+
is_complete = "</think" in content.lower()
|
|
83
|
+
open_attr = "" if is_complete else "open"
|
|
84
|
+
|
|
85
|
+
thinking_html = f"""
|
|
86
|
+
<details {open_attr} style="
|
|
87
|
+
margin-bottom: 4px;
|
|
88
|
+
border: 1px solid #d1d9e0;
|
|
89
|
+
border-radius: 4px;
|
|
90
|
+
background-color: #f8f9fa;
|
|
91
|
+
">
|
|
92
|
+
<summary style="
|
|
93
|
+
padding: 3px 8px;
|
|
94
|
+
background-color: #e9ecef;
|
|
95
|
+
border-radius: 3px 3px 0 0;
|
|
96
|
+
cursor: pointer;
|
|
97
|
+
font-weight: 500;
|
|
98
|
+
color: #495057;
|
|
99
|
+
user-select: none;
|
|
100
|
+
border-bottom: 1px solid #d1d9e0;
|
|
101
|
+
font-size: {font_size - 1}px;
|
|
102
|
+
">
|
|
103
|
+
🤔 Thinking{'...' if not is_complete else ''}
|
|
104
|
+
</summary>
|
|
105
|
+
<div style="
|
|
106
|
+
padding: 4px 8px;
|
|
107
|
+
background-color: #f8f9fa;
|
|
108
|
+
border-radius: 0 0 3px 3px;
|
|
109
|
+
">
|
|
110
|
+
<pre style="
|
|
111
|
+
margin: 0;
|
|
112
|
+
padding: 0;
|
|
113
|
+
white-space: pre-wrap;
|
|
114
|
+
word-wrap: break-word;
|
|
115
|
+
font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
|
|
116
|
+
font-size: {code_font_size - 1}px;
|
|
117
|
+
line-height: 1.3;
|
|
118
|
+
background: transparent;
|
|
119
|
+
border: none;
|
|
120
|
+
color: #6c757d;
|
|
121
|
+
">{thinking_content}</pre>
|
|
122
|
+
</div>
|
|
123
|
+
</details>
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
return f"""
|
|
127
|
+
<div style="
|
|
128
|
+
border: 1px solid #d0d7de;
|
|
129
|
+
border-radius: 6px;
|
|
130
|
+
padding: {outer_padding_css};
|
|
131
|
+
margin: 2px 0;
|
|
132
|
+
background-color: #f6f8fa;
|
|
133
|
+
color: #24292f;
|
|
134
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif;
|
|
135
|
+
font-size: {font_size}px;
|
|
136
|
+
line-height: 1.4;
|
|
137
|
+
white-space: pre-wrap;
|
|
138
|
+
word-wrap: break-word;
|
|
139
|
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
|
140
|
+
">
|
|
141
|
+
<div style="
|
|
142
|
+
background-color: #fff;
|
|
143
|
+
border: 1px solid #d0d7de;
|
|
144
|
+
border-radius: 4px;
|
|
145
|
+
padding: {inner_padding_css};
|
|
146
|
+
color: #24292f;
|
|
147
|
+
">
|
|
148
|
+
<strong style="color: #0969da;">Assistant:</strong><br>
|
|
149
|
+
{thinking_html}
|
|
150
|
+
<pre style="
|
|
151
|
+
margin: 2px 0 0 0;
|
|
152
|
+
padding: 0;
|
|
153
|
+
white-space: pre-wrap;
|
|
154
|
+
word-wrap: break-word;
|
|
155
|
+
font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
|
|
156
|
+
font-size: {code_font_size}px;
|
|
157
|
+
line-height: 1.4;
|
|
158
|
+
background: transparent;
|
|
159
|
+
border: none;
|
|
160
|
+
">{answer_content}</pre>
|
|
161
|
+
</div>
|
|
162
|
+
</div>
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class LMChatHtml(LM):
|
|
167
|
+
def __init__(
|
|
168
|
+
self,
|
|
169
|
+
*args,
|
|
170
|
+
font_size: int = DEFAULT_FONT_SIZE,
|
|
171
|
+
padding: list[int] = DEFAULT_PADDING,
|
|
172
|
+
inner_padding: list[int] = DEFAULT_INNER_PADDING,
|
|
173
|
+
**kwargs,
|
|
174
|
+
):
|
|
175
|
+
super().__init__(*args, **kwargs)
|
|
176
|
+
self.font_size = font_size
|
|
177
|
+
self.padding = padding
|
|
178
|
+
self.inner_padding = inner_padding
|
|
179
|
+
|
|
180
|
+
def chat_stream(
|
|
181
|
+
self,
|
|
182
|
+
prompt: Optional[str] = None,
|
|
183
|
+
messages: Optional[RawMsgs] = None,
|
|
184
|
+
html_mode: bool = False,
|
|
185
|
+
font_size: Optional[int] = None,
|
|
186
|
+
padding: Optional[list[int]] = None,
|
|
187
|
+
inner_padding: Optional[list[int]] = None,
|
|
188
|
+
**kwargs: Any,
|
|
189
|
+
) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Stream responses from the model with HTML support in Jupyter.
|
|
192
|
+
"""
|
|
193
|
+
if prompt is not None:
|
|
194
|
+
messages = [{"role": "user", "content": prompt}]
|
|
195
|
+
|
|
196
|
+
assert messages is not None # for type-checker
|
|
197
|
+
|
|
198
|
+
openai_msgs: Messages = (
|
|
199
|
+
self._convert_messages(cast(LegacyMsgs, messages))
|
|
200
|
+
if isinstance(messages[0], dict) # legacy style
|
|
201
|
+
else cast(Messages, messages) # already typed
|
|
202
|
+
)
|
|
203
|
+
assert self.model is not None, "Model must be set before streaming."
|
|
204
|
+
|
|
205
|
+
stream = self.client.chat.completions.create(
|
|
206
|
+
model=self.model,
|
|
207
|
+
messages=openai_msgs,
|
|
208
|
+
stream=True,
|
|
209
|
+
**kwargs,
|
|
210
|
+
) # type: ignore
|
|
211
|
+
|
|
212
|
+
output_text = ""
|
|
213
|
+
is_jupyter = _is_jupyter_notebook()
|
|
214
|
+
display_font_size = font_size or self.font_size
|
|
215
|
+
display_padding = padding or self.padding
|
|
216
|
+
display_inner_padding = inner_padding or self.inner_padding
|
|
217
|
+
|
|
218
|
+
if html_mode and is_jupyter:
|
|
219
|
+
# Create initial display handle
|
|
220
|
+
display_handle = display(HTML(""), display_id=True)
|
|
221
|
+
|
|
222
|
+
for chunk in stream:
|
|
223
|
+
if chunk.choices[0].delta.content is not None:
|
|
224
|
+
chunk_content = chunk.choices[0].delta.content
|
|
225
|
+
output_text += chunk_content
|
|
226
|
+
|
|
227
|
+
# Update HTML display progressively using improved template
|
|
228
|
+
html_content = _get_chat_html_template(
|
|
229
|
+
output_text,
|
|
230
|
+
font_size=display_font_size,
|
|
231
|
+
padding=display_padding,
|
|
232
|
+
inner_padding=display_inner_padding,
|
|
233
|
+
)
|
|
234
|
+
display_handle.update(HTML(html_content))
|
|
235
|
+
else:
|
|
236
|
+
# Console streaming mode (original behavior)
|
|
237
|
+
for chunk in stream:
|
|
238
|
+
if chunk.choices[0].delta.content is not None:
|
|
239
|
+
chunk_content = chunk.choices[0].delta.content
|
|
240
|
+
print(chunk_content, end="")
|
|
241
|
+
sys.stdout.flush()
|
|
242
|
+
output_text += chunk_content
|
|
243
|
+
|
|
244
|
+
return output_text
|