speedy-utils 1.1.6__py3-none-any.whl → 1.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +1 -5
- llm_utils/chat_format/transform.py +9 -9
- llm_utils/group_messages.py +1 -1
- llm_utils/lm/async_lm/__init__.py +6 -1
- llm_utils/lm/async_lm/_utils.py +7 -4
- llm_utils/lm/async_lm/async_llm_task.py +465 -110
- llm_utils/lm/async_lm/async_lm.py +273 -665
- llm_utils/lm/async_lm/async_lm_base.py +405 -0
- llm_utils/lm/async_lm/lm_specific.py +136 -0
- llm_utils/lm/utils.py +1 -3
- llm_utils/scripts/vllm_load_balancer.py +49 -37
- speedy_utils/__init__.py +3 -1
- speedy_utils/common/notebook_utils.py +4 -4
- speedy_utils/common/report_manager.py +2 -3
- speedy_utils/common/utils_cache.py +233 -3
- speedy_utils/common/utils_io.py +2 -0
- speedy_utils/scripts/mpython.py +1 -3
- {speedy_utils-1.1.6.dist-info → speedy_utils-1.1.7.dist-info}/METADATA +1 -1
- speedy_utils-1.1.7.dist-info/RECORD +39 -0
- llm_utils/lm/chat_html.py +0 -246
- llm_utils/lm/lm_json.py +0 -68
- llm_utils/lm/sync_lm.py +0 -943
- speedy_utils-1.1.6.dist-info/RECORD +0 -40
- {speedy_utils-1.1.6.dist-info → speedy_utils-1.1.7.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.6.dist-info → speedy_utils-1.1.7.dist-info}/entry_points.txt +0 -0
llm_utils/lm/chat_html.py
DELETED
|
@@ -1,246 +0,0 @@
|
|
|
1
|
-
from typing import Any, Optional, cast
|
|
2
|
-
from .sync_lm import LM, Messages, LegacyMsgs, RawMsgs
|
|
3
|
-
import sys
|
|
4
|
-
|
|
5
|
-
# Configuration
|
|
6
|
-
DEFAULT_FONT_SIZE = 1 # Base font size in pixels
|
|
7
|
-
DEFAULT_CODE_FONT_SIZE = 1 # Code font size in pixels
|
|
8
|
-
DEFAULT_PADDING = [1] * 4 # Padding [top, right, bottom, left] in pixels
|
|
9
|
-
DEFAULT_INNER_PADDING = [1] * 4 # Inner padding [top, right, bottom, left]
|
|
10
|
-
thinking_tag = "think"
|
|
11
|
-
# Jupyter notebook detection and imports
|
|
12
|
-
try:
|
|
13
|
-
from IPython.display import display, HTML
|
|
14
|
-
from IPython import get_ipython
|
|
15
|
-
|
|
16
|
-
JUPYTER_AVAILABLE = True
|
|
17
|
-
except ImportError:
|
|
18
|
-
JUPYTER_AVAILABLE = False
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _is_jupyter_notebook() -> bool:
|
|
22
|
-
"""Check if running in Jupyter notebook environment."""
|
|
23
|
-
if not JUPYTER_AVAILABLE:
|
|
24
|
-
return False
|
|
25
|
-
try:
|
|
26
|
-
shell = get_ipython().__class__.__name__
|
|
27
|
-
return shell == "ZMQInteractiveShell"
|
|
28
|
-
except Exception:
|
|
29
|
-
return False
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def _parse_thinking_content(content: str) -> tuple[str, str]:
|
|
33
|
-
"""Parse content to separate thinking and answer sections during streaming."""
|
|
34
|
-
import re
|
|
35
|
-
|
|
36
|
-
# For streaming: detect if we're currently in thinking mode
|
|
37
|
-
think_start_match = re.search(r"<think[^>]*>", content, re.IGNORECASE)
|
|
38
|
-
if not think_start_match:
|
|
39
|
-
return "", content
|
|
40
|
-
|
|
41
|
-
think_start_pos = think_start_match.end()
|
|
42
|
-
|
|
43
|
-
# Look for closing tag
|
|
44
|
-
think_end_match = re.search(
|
|
45
|
-
r"</think[^>]*>", content[think_start_pos:], re.IGNORECASE
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
if think_end_match:
|
|
49
|
-
# We have complete thinking section
|
|
50
|
-
thinking_content = content[
|
|
51
|
-
think_start_pos : think_start_pos + think_end_match.start()
|
|
52
|
-
].strip()
|
|
53
|
-
# Everything after </think> is answer content
|
|
54
|
-
answer_start = think_start_pos + think_end_match.end()
|
|
55
|
-
answer_content = content[answer_start:].strip()
|
|
56
|
-
return thinking_content, answer_content
|
|
57
|
-
else:
|
|
58
|
-
# Still in thinking mode (streaming), no closing tag yet
|
|
59
|
-
thinking_content = content[think_start_pos:].strip()
|
|
60
|
-
return thinking_content, ""
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def _get_chat_html_template(
|
|
64
|
-
content: str,
|
|
65
|
-
font_size: int = DEFAULT_FONT_SIZE,
|
|
66
|
-
padding: list[int] = DEFAULT_PADDING,
|
|
67
|
-
inner_padding: list[int] = DEFAULT_INNER_PADDING,
|
|
68
|
-
) -> str:
|
|
69
|
-
"""Generate HTML template with improved styling for chat display."""
|
|
70
|
-
code_font_size = max(font_size - 1, 10) # Code slightly smaller, min 10px
|
|
71
|
-
|
|
72
|
-
# Parse thinking and answer content
|
|
73
|
-
thinking_content, answer_content = _parse_thinking_content(content)
|
|
74
|
-
|
|
75
|
-
# Format padding as CSS value - reduce outer padding more
|
|
76
|
-
outer_padding_css = f"2px {padding[1]}px 2px {padding[3]}px"
|
|
77
|
-
inner_padding_css = f"2px {inner_padding[1]}px 2px {inner_padding[3]}px"
|
|
78
|
-
|
|
79
|
-
# Build thinking section HTML if present
|
|
80
|
-
thinking_html = ""
|
|
81
|
-
if thinking_content:
|
|
82
|
-
# Show as open during streaming, closed when complete
|
|
83
|
-
is_complete = "</think" in content.lower()
|
|
84
|
-
open_attr = "" if is_complete else "open"
|
|
85
|
-
|
|
86
|
-
thinking_html = f"""
|
|
87
|
-
<details {open_attr} style="
|
|
88
|
-
margin-bottom: 4px;
|
|
89
|
-
border: 1px solid #d1d9e0;
|
|
90
|
-
border-radius: 4px;
|
|
91
|
-
background-color: #f8f9fa;
|
|
92
|
-
">
|
|
93
|
-
<summary style="
|
|
94
|
-
padding: 3px 8px;
|
|
95
|
-
background-color: #e9ecef;
|
|
96
|
-
border-radius: 3px 3px 0 0;
|
|
97
|
-
cursor: pointer;
|
|
98
|
-
font-weight: 500;
|
|
99
|
-
color: #495057;
|
|
100
|
-
user-select: none;
|
|
101
|
-
border-bottom: 1px solid #d1d9e0;
|
|
102
|
-
font-size: {font_size - 1}px;
|
|
103
|
-
">
|
|
104
|
-
🤔 Thinking{'...' if not is_complete else ''}
|
|
105
|
-
</summary>
|
|
106
|
-
<div style="
|
|
107
|
-
padding: 4px 8px;
|
|
108
|
-
background-color: #f8f9fa;
|
|
109
|
-
border-radius: 0 0 3px 3px;
|
|
110
|
-
">
|
|
111
|
-
<pre style="
|
|
112
|
-
margin: 0;
|
|
113
|
-
padding: 0;
|
|
114
|
-
white-space: pre-wrap;
|
|
115
|
-
word-wrap: break-word;
|
|
116
|
-
font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
|
|
117
|
-
font-size: {code_font_size - 1}px;
|
|
118
|
-
line-height: 1.3;
|
|
119
|
-
background: transparent;
|
|
120
|
-
border: none;
|
|
121
|
-
color: #6c757d;
|
|
122
|
-
">{thinking_content}</pre>
|
|
123
|
-
</div>
|
|
124
|
-
</details>
|
|
125
|
-
"""
|
|
126
|
-
|
|
127
|
-
return f"""
|
|
128
|
-
<div style="
|
|
129
|
-
border: 1px solid #d0d7de;
|
|
130
|
-
border-radius: 6px;
|
|
131
|
-
padding: {outer_padding_css};
|
|
132
|
-
margin: 2px 0;
|
|
133
|
-
background-color: #f6f8fa;
|
|
134
|
-
color: #24292f;
|
|
135
|
-
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif;
|
|
136
|
-
font-size: {font_size}px;
|
|
137
|
-
line-height: 1.4;
|
|
138
|
-
white-space: pre-wrap;
|
|
139
|
-
word-wrap: break-word;
|
|
140
|
-
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
|
141
|
-
">
|
|
142
|
-
<div style="
|
|
143
|
-
background-color: #fff;
|
|
144
|
-
border: 1px solid #d0d7de;
|
|
145
|
-
border-radius: 4px;
|
|
146
|
-
padding: {inner_padding_css};
|
|
147
|
-
color: #24292f;
|
|
148
|
-
">
|
|
149
|
-
<strong style="color: #0969da;">Assistant:</strong><br>
|
|
150
|
-
{thinking_html}
|
|
151
|
-
<pre style="
|
|
152
|
-
margin: 2px 0 0 0;
|
|
153
|
-
padding: 0;
|
|
154
|
-
white-space: pre-wrap;
|
|
155
|
-
word-wrap: break-word;
|
|
156
|
-
font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
|
|
157
|
-
font-size: {code_font_size}px;
|
|
158
|
-
line-height: 1.4;
|
|
159
|
-
background: transparent;
|
|
160
|
-
border: none;
|
|
161
|
-
">{answer_content}</pre>
|
|
162
|
-
</div>
|
|
163
|
-
</div>
|
|
164
|
-
"""
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
class LMChatHtml(LM):
|
|
168
|
-
def __init__(
|
|
169
|
-
self,
|
|
170
|
-
*args,
|
|
171
|
-
font_size: int = DEFAULT_FONT_SIZE,
|
|
172
|
-
padding: list[int] = DEFAULT_PADDING,
|
|
173
|
-
inner_padding: list[int] = DEFAULT_INNER_PADDING,
|
|
174
|
-
**kwargs,
|
|
175
|
-
):
|
|
176
|
-
super().__init__(*args, **kwargs)
|
|
177
|
-
self.font_size = font_size
|
|
178
|
-
self.padding = padding
|
|
179
|
-
self.inner_padding = inner_padding
|
|
180
|
-
|
|
181
|
-
def chat_stream(
|
|
182
|
-
self,
|
|
183
|
-
prompt: Optional[str] = None,
|
|
184
|
-
messages: Optional[RawMsgs] = None,
|
|
185
|
-
html_mode: bool = False,
|
|
186
|
-
font_size: Optional[int] = None,
|
|
187
|
-
padding: Optional[list[int]] = None,
|
|
188
|
-
inner_padding: Optional[list[int]] = None,
|
|
189
|
-
**kwargs: Any,
|
|
190
|
-
) -> str:
|
|
191
|
-
"""
|
|
192
|
-
Stream responses from the model with HTML support in Jupyter.
|
|
193
|
-
"""
|
|
194
|
-
if prompt is not None:
|
|
195
|
-
messages = [{"role": "user", "content": prompt}]
|
|
196
|
-
|
|
197
|
-
assert messages is not None # for type-checker
|
|
198
|
-
|
|
199
|
-
openai_msgs: Messages = (
|
|
200
|
-
self._convert_messages(cast(LegacyMsgs, messages))
|
|
201
|
-
if isinstance(messages[0], dict) # legacy style
|
|
202
|
-
else cast(Messages, messages) # already typed
|
|
203
|
-
)
|
|
204
|
-
assert self.model is not None, "Model must be set before streaming."
|
|
205
|
-
|
|
206
|
-
stream = self.client.chat.completions.create(
|
|
207
|
-
model=self.model,
|
|
208
|
-
messages=openai_msgs,
|
|
209
|
-
stream=True,
|
|
210
|
-
**kwargs,
|
|
211
|
-
) # type: ignore
|
|
212
|
-
|
|
213
|
-
output_text = ""
|
|
214
|
-
is_jupyter = _is_jupyter_notebook()
|
|
215
|
-
display_font_size = font_size or self.font_size
|
|
216
|
-
display_padding = padding or self.padding
|
|
217
|
-
display_inner_padding = inner_padding or self.inner_padding
|
|
218
|
-
|
|
219
|
-
if html_mode and is_jupyter:
|
|
220
|
-
# Create initial display handle
|
|
221
|
-
display_handle = display(HTML(""), display_id=True)
|
|
222
|
-
|
|
223
|
-
for chunk in stream:
|
|
224
|
-
if chunk.choices[0].delta.content is not None:
|
|
225
|
-
chunk_content = chunk.choices[0].delta.content
|
|
226
|
-
output_text += chunk_content
|
|
227
|
-
|
|
228
|
-
# Update HTML display progressively using improved template
|
|
229
|
-
html_content = _get_chat_html_template(
|
|
230
|
-
output_text,
|
|
231
|
-
font_size=display_font_size,
|
|
232
|
-
padding=display_padding,
|
|
233
|
-
inner_padding=display_inner_padding,
|
|
234
|
-
)
|
|
235
|
-
if display_handle is not None:
|
|
236
|
-
display_handle.update(HTML(html_content))
|
|
237
|
-
else:
|
|
238
|
-
# Console streaming mode (original behavior)
|
|
239
|
-
for chunk in stream:
|
|
240
|
-
if chunk.choices[0].delta.content is not None:
|
|
241
|
-
chunk_content = chunk.choices[0].delta.content
|
|
242
|
-
print(chunk_content, end="")
|
|
243
|
-
sys.stdout.flush()
|
|
244
|
-
output_text += chunk_content
|
|
245
|
-
|
|
246
|
-
return output_text
|
llm_utils/lm/lm_json.py
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from llm_utils.lm.sync_lm import LM, RawMsgs
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class LMJson(LM):
|
|
8
|
-
"Regex-based reasoning wrapper for LM."
|
|
9
|
-
|
|
10
|
-
def __init__(
|
|
11
|
-
self,
|
|
12
|
-
model: str | None = None,
|
|
13
|
-
*,
|
|
14
|
-
temperature: float = 0.0,
|
|
15
|
-
max_tokens: int = 2_000,
|
|
16
|
-
host: str = "localhost",
|
|
17
|
-
port: Optional[int | str] = None,
|
|
18
|
-
base_url: Optional[str] = None,
|
|
19
|
-
api_key: Optional[str] = None,
|
|
20
|
-
cache: bool = True,
|
|
21
|
-
**openai_kwargs: Any,
|
|
22
|
-
) -> None:
|
|
23
|
-
"""
|
|
24
|
-
Initialize the LMJson instance.
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
model (str | None): The model name to use.
|
|
28
|
-
temperature (float): Sampling temperature.
|
|
29
|
-
max_tokens (int): Maximum number of tokens to generate.
|
|
30
|
-
host (str): Host for the API.
|
|
31
|
-
port (int | str, optional): Port for the API.
|
|
32
|
-
base_url (str, optional): Base URL for the API.
|
|
33
|
-
api_key (str, optional): API key for authentication.
|
|
34
|
-
cache (bool): Whether to cache responses.
|
|
35
|
-
**openai_kwargs: Additional OpenAI parameters.
|
|
36
|
-
"""
|
|
37
|
-
super().__init__(
|
|
38
|
-
model=model,
|
|
39
|
-
temperature=temperature,
|
|
40
|
-
max_tokens=max_tokens,
|
|
41
|
-
host=host,
|
|
42
|
-
port=port,
|
|
43
|
-
base_url=base_url,
|
|
44
|
-
api_key=api_key,
|
|
45
|
-
cache=cache,
|
|
46
|
-
**openai_kwargs,
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
def __call__(
|
|
50
|
-
self,
|
|
51
|
-
prompt: Optional[str] = None,
|
|
52
|
-
messages: Optional[RawMsgs] = None,
|
|
53
|
-
cache: Optional[bool] = None,
|
|
54
|
-
max_tokens: Optional[int] = None,
|
|
55
|
-
return_openai_response: bool = False,
|
|
56
|
-
**kwargs: Any,
|
|
57
|
-
):
|
|
58
|
-
|
|
59
|
-
output = super().__call__(
|
|
60
|
-
prompt=prompt,
|
|
61
|
-
messages=messages,
|
|
62
|
-
response_format=str,
|
|
63
|
-
cache=cache,
|
|
64
|
-
max_tokens=max_tokens,
|
|
65
|
-
return_openai_response=return_openai_response,
|
|
66
|
-
**kwargs,
|
|
67
|
-
)
|
|
68
|
-
return output
|