speedy-utils 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_utils/__init__.py CHANGED
@@ -1,20 +1,16 @@
1
1
  from .chat_format import (
2
- transform_messages,
3
- transform_messages_to_chatml,
4
- show_chat,
5
- get_conversation_one_turn,
6
- show_string_diff,
7
- display_conversations,
8
2
  build_chatml_input,
9
- format_msgs,
10
3
  display_chat_messages_as_html,
4
+ display_conversations,
5
+ format_msgs,
6
+ get_conversation_one_turn,
7
+ show_chat,
8
+ show_string_diff,
9
+ transform_messages,
10
+ transform_messages_to_chatml,
11
11
  )
12
- from .lm.lm import LM, LMReasoner
12
+ from .lm.lm import LM, LLMTask
13
13
  from .lm.alm import AsyncLM
14
- from .group_messages import (
15
- split_indices_by_length,
16
- group_messages_by_len,
17
- )
18
14
 
19
15
  __all__ = [
20
16
  "transform_messages",
@@ -25,10 +21,9 @@ __all__ = [
25
21
  "display_conversations",
26
22
  "build_chatml_input",
27
23
  "format_msgs",
28
- "split_indices_by_length",
29
- "group_messages_by_len",
24
+ # "group_messages_by_len",
30
25
  "LM",
31
- "LMReasoner",
32
26
  "AsyncLM",
33
27
  "display_chat_messages_as_html",
28
+ "LLMTask",
34
29
  ]
llm_utils/lm/alm.py CHANGED
@@ -34,7 +34,7 @@ from typing import (
34
34
  )
35
35
 
36
36
  from httpx import URL
37
- from openai import AsyncOpenAI, AuthenticationError, RateLimitError
37
+ from openai import AsyncOpenAI, AuthenticationError, BadRequestError, RateLimitError
38
38
 
39
39
  # from openai.pagination import AsyncSyncPage
40
40
  from openai.types.chat import (
@@ -108,6 +108,7 @@ class AsyncLM:
108
108
  # if have multiple ports
109
109
  if self.ports:
110
110
  import random
111
+
111
112
  port = random.choice(self.ports)
112
113
  api_base = f"http://{self.host}:{port}/v1"
113
114
  logger.debug(f"Using port: {port}")
@@ -213,6 +214,13 @@ class AsyncLM:
213
214
  self._cache_key(messages, kw, response_format) if use_cache else None
214
215
  )
215
216
  if cache_key and (hit := self._load_cache(cache_key)) is not None:
217
+ # Check if cached value is an error
218
+ if isinstance(hit, dict) and hit.get("error"):
219
+ error_type = hit.get("error_type", "Unknown")
220
+ error_msg = hit.get("error_message", "Cached error")
221
+ logger.warning(f"Found cached error ({error_type}): {error_msg}")
222
+ # Re-raise as a ValueError with meaningful message
223
+ raise ValueError(f"Cached {error_type}: {error_msg}")
216
224
  return hit
217
225
 
218
226
  try:
@@ -230,8 +238,21 @@ class AsyncLM:
230
238
  **kw,
231
239
  )
232
240
 
233
- except (AuthenticationError, RateLimitError) as exc:
234
- logger.error(exc)
241
+ except (AuthenticationError, RateLimitError, BadRequestError) as exc:
242
+ error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
243
+ logger.error(error_msg)
244
+
245
+ # Cache the error if it's a BadRequestError to avoid repeated calls
246
+ if isinstance(exc, BadRequestError) and cache_key:
247
+ error_response = {
248
+ "error": True,
249
+ "error_type": "BadRequestError",
250
+ "error_message": str(exc),
251
+ "choices": [],
252
+ }
253
+ self._dump_cache(cache_key, error_response)
254
+ logger.debug(f"Cached BadRequestError for key: {cache_key}")
255
+
235
256
  raise
236
257
 
237
258
  if cache_key:
@@ -0,0 +1,244 @@
1
+ from .lm import *
2
+ import sys
3
+
4
+ # Configuration
5
+ DEFAULT_FONT_SIZE = 1 # Base font size in pixels
6
+ DEFAULT_CODE_FONT_SIZE = 1 # Code font size in pixels
7
+ DEFAULT_PADDING = [1] * 4 # Padding [top, right, bottom, left] in pixels
8
+ DEFAULT_INNER_PADDING = [1] * 4 # Inner padding [top, right, bottom, left]
9
+ thinking_tag = "think"
10
+ # Jupyter notebook detection and imports
11
+ try:
12
+ from IPython.display import display, HTML
13
+ from IPython import get_ipython
14
+
15
+ JUPYTER_AVAILABLE = True
16
+ except ImportError:
17
+ JUPYTER_AVAILABLE = False
18
+
19
+
20
+ def _is_jupyter_notebook() -> bool:
21
+ """Check if running in Jupyter notebook environment."""
22
+ if not JUPYTER_AVAILABLE:
23
+ return False
24
+ try:
25
+ shell = get_ipython().__class__.__name__
26
+ return shell == "ZMQInteractiveShell"
27
+ except Exception:
28
+ return False
29
+
30
+
31
+ def _parse_thinking_content(content: str) -> tuple[str, str]:
32
+ """Parse content to separate thinking and answer sections during streaming."""
33
+ import re
34
+
35
+ # For streaming: detect if we're currently in thinking mode
36
+ think_start_match = re.search(r"<think[^>]*>", content, re.IGNORECASE)
37
+ if not think_start_match:
38
+ return "", content
39
+
40
+ think_start_pos = think_start_match.end()
41
+
42
+ # Look for closing tag
43
+ think_end_match = re.search(
44
+ r"</think[^>]*>", content[think_start_pos:], re.IGNORECASE
45
+ )
46
+
47
+ if think_end_match:
48
+ # We have complete thinking section
49
+ thinking_content = content[
50
+ think_start_pos : think_start_pos + think_end_match.start()
51
+ ].strip()
52
+ # Everything after </think> is answer content
53
+ answer_start = think_start_pos + think_end_match.end()
54
+ answer_content = content[answer_start:].strip()
55
+ return thinking_content, answer_content
56
+ else:
57
+ # Still in thinking mode (streaming), no closing tag yet
58
+ thinking_content = content[think_start_pos:].strip()
59
+ return thinking_content, ""
60
+
61
+
62
+ def _get_chat_html_template(
63
+ content: str,
64
+ font_size: int = DEFAULT_FONT_SIZE,
65
+ padding: list[int] = DEFAULT_PADDING,
66
+ inner_padding: list[int] = DEFAULT_INNER_PADDING,
67
+ ) -> str:
68
+ """Generate HTML template with improved styling for chat display."""
69
+ code_font_size = max(font_size - 1, 10) # Code slightly smaller, min 10px
70
+
71
+ # Parse thinking and answer content
72
+ thinking_content, answer_content = _parse_thinking_content(content)
73
+
74
+ # Format padding as CSS value - reduce outer padding more
75
+ outer_padding_css = f"2px {padding[1]}px 2px {padding[3]}px"
76
+ inner_padding_css = f"2px {inner_padding[1]}px 2px {inner_padding[3]}px"
77
+
78
+ # Build thinking section HTML if present
79
+ thinking_html = ""
80
+ if thinking_content:
81
+ # Show as open during streaming, closed when complete
82
+ is_complete = "</think" in content.lower()
83
+ open_attr = "" if is_complete else "open"
84
+
85
+ thinking_html = f"""
86
+ <details {open_attr} style="
87
+ margin-bottom: 4px;
88
+ border: 1px solid #d1d9e0;
89
+ border-radius: 4px;
90
+ background-color: #f8f9fa;
91
+ ">
92
+ <summary style="
93
+ padding: 3px 8px;
94
+ background-color: #e9ecef;
95
+ border-radius: 3px 3px 0 0;
96
+ cursor: pointer;
97
+ font-weight: 500;
98
+ color: #495057;
99
+ user-select: none;
100
+ border-bottom: 1px solid #d1d9e0;
101
+ font-size: {font_size - 1}px;
102
+ ">
103
+ 🤔 Thinking{'...' if not is_complete else ''}
104
+ </summary>
105
+ <div style="
106
+ padding: 4px 8px;
107
+ background-color: #f8f9fa;
108
+ border-radius: 0 0 3px 3px;
109
+ ">
110
+ <pre style="
111
+ margin: 0;
112
+ padding: 0;
113
+ white-space: pre-wrap;
114
+ word-wrap: break-word;
115
+ font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
116
+ font-size: {code_font_size - 1}px;
117
+ line-height: 1.3;
118
+ background: transparent;
119
+ border: none;
120
+ color: #6c757d;
121
+ ">{thinking_content}</pre>
122
+ </div>
123
+ </details>
124
+ """
125
+
126
+ return f"""
127
+ <div style="
128
+ border: 1px solid #d0d7de;
129
+ border-radius: 6px;
130
+ padding: {outer_padding_css};
131
+ margin: 2px 0;
132
+ background-color: #f6f8fa;
133
+ color: #24292f;
134
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif;
135
+ font-size: {font_size}px;
136
+ line-height: 1.4;
137
+ white-space: pre-wrap;
138
+ word-wrap: break-word;
139
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
140
+ ">
141
+ <div style="
142
+ background-color: #fff;
143
+ border: 1px solid #d0d7de;
144
+ border-radius: 4px;
145
+ padding: {inner_padding_css};
146
+ color: #24292f;
147
+ ">
148
+ <strong style="color: #0969da;">Assistant:</strong><br>
149
+ {thinking_html}
150
+ <pre style="
151
+ margin: 2px 0 0 0;
152
+ padding: 0;
153
+ white-space: pre-wrap;
154
+ word-wrap: break-word;
155
+ font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
156
+ font-size: {code_font_size}px;
157
+ line-height: 1.4;
158
+ background: transparent;
159
+ border: none;
160
+ ">{answer_content}</pre>
161
+ </div>
162
+ </div>
163
+ """
164
+
165
+
166
+ class LMChatHtml(LM):
167
+ def __init__(
168
+ self,
169
+ *args,
170
+ font_size: int = DEFAULT_FONT_SIZE,
171
+ padding: list[int] = DEFAULT_PADDING,
172
+ inner_padding: list[int] = DEFAULT_INNER_PADDING,
173
+ **kwargs,
174
+ ):
175
+ super().__init__(*args, **kwargs)
176
+ self.font_size = font_size
177
+ self.padding = padding
178
+ self.inner_padding = inner_padding
179
+
180
+ def chat_stream(
181
+ self,
182
+ prompt: Optional[str] = None,
183
+ messages: Optional[RawMsgs] = None,
184
+ html_mode: bool = False,
185
+ font_size: Optional[int] = None,
186
+ padding: Optional[list[int]] = None,
187
+ inner_padding: Optional[list[int]] = None,
188
+ **kwargs: Any,
189
+ ) -> str:
190
+ """
191
+ Stream responses from the model with HTML support in Jupyter.
192
+ """
193
+ if prompt is not None:
194
+ messages = [{"role": "user", "content": prompt}]
195
+
196
+ assert messages is not None # for type-checker
197
+
198
+ openai_msgs: Messages = (
199
+ self._convert_messages(cast(LegacyMsgs, messages))
200
+ if isinstance(messages[0], dict) # legacy style
201
+ else cast(Messages, messages) # already typed
202
+ )
203
+ assert self.model is not None, "Model must be set before streaming."
204
+
205
+ stream = self.client.chat.completions.create(
206
+ model=self.model,
207
+ messages=openai_msgs,
208
+ stream=True,
209
+ **kwargs,
210
+ ) # type: ignore
211
+
212
+ output_text = ""
213
+ is_jupyter = _is_jupyter_notebook()
214
+ display_font_size = font_size or self.font_size
215
+ display_padding = padding or self.padding
216
+ display_inner_padding = inner_padding or self.inner_padding
217
+
218
+ if html_mode and is_jupyter:
219
+ # Create initial display handle
220
+ display_handle = display(HTML(""), display_id=True)
221
+
222
+ for chunk in stream:
223
+ if chunk.choices[0].delta.content is not None:
224
+ chunk_content = chunk.choices[0].delta.content
225
+ output_text += chunk_content
226
+
227
+ # Update HTML display progressively using improved template
228
+ html_content = _get_chat_html_template(
229
+ output_text,
230
+ font_size=display_font_size,
231
+ padding=display_padding,
232
+ inner_padding=display_inner_padding,
233
+ )
234
+ display_handle.update(HTML(html_content))
235
+ else:
236
+ # Console streaming mode (original behavior)
237
+ for chunk in stream:
238
+ if chunk.choices[0].delta.content is not None:
239
+ chunk_content = chunk.choices[0].delta.content
240
+ print(chunk_content, end="")
241
+ sys.stdout.flush()
242
+ output_text += chunk_content
243
+
244
+ return output_text