speedy-utils 1.1.6__py3-none-any.whl → 1.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_utils/lm/chat_html.py DELETED
@@ -1,246 +0,0 @@
1
- from typing import Any, Optional, cast
2
- from .sync_lm import LM, Messages, LegacyMsgs, RawMsgs
3
- import sys
4
-
5
- # Configuration
6
- DEFAULT_FONT_SIZE = 1 # Base font size in pixels
7
- DEFAULT_CODE_FONT_SIZE = 1 # Code font size in pixels
8
- DEFAULT_PADDING = [1] * 4 # Padding [top, right, bottom, left] in pixels
9
- DEFAULT_INNER_PADDING = [1] * 4 # Inner padding [top, right, bottom, left]
10
- thinking_tag = "think"
11
- # Jupyter notebook detection and imports
12
- try:
13
- from IPython.display import display, HTML
14
- from IPython import get_ipython
15
-
16
- JUPYTER_AVAILABLE = True
17
- except ImportError:
18
- JUPYTER_AVAILABLE = False
19
-
20
-
21
- def _is_jupyter_notebook() -> bool:
22
- """Check if running in Jupyter notebook environment."""
23
- if not JUPYTER_AVAILABLE:
24
- return False
25
- try:
26
- shell = get_ipython().__class__.__name__
27
- return shell == "ZMQInteractiveShell"
28
- except Exception:
29
- return False
30
-
31
-
32
- def _parse_thinking_content(content: str) -> tuple[str, str]:
33
- """Parse content to separate thinking and answer sections during streaming."""
34
- import re
35
-
36
- # For streaming: detect if we're currently in thinking mode
37
- think_start_match = re.search(r"<think[^>]*>", content, re.IGNORECASE)
38
- if not think_start_match:
39
- return "", content
40
-
41
- think_start_pos = think_start_match.end()
42
-
43
- # Look for closing tag
44
- think_end_match = re.search(
45
- r"</think[^>]*>", content[think_start_pos:], re.IGNORECASE
46
- )
47
-
48
- if think_end_match:
49
- # We have complete thinking section
50
- thinking_content = content[
51
- think_start_pos : think_start_pos + think_end_match.start()
52
- ].strip()
53
- # Everything after </think> is answer content
54
- answer_start = think_start_pos + think_end_match.end()
55
- answer_content = content[answer_start:].strip()
56
- return thinking_content, answer_content
57
- else:
58
- # Still in thinking mode (streaming), no closing tag yet
59
- thinking_content = content[think_start_pos:].strip()
60
- return thinking_content, ""
61
-
62
-
63
- def _get_chat_html_template(
64
- content: str,
65
- font_size: int = DEFAULT_FONT_SIZE,
66
- padding: list[int] = DEFAULT_PADDING,
67
- inner_padding: list[int] = DEFAULT_INNER_PADDING,
68
- ) -> str:
69
- """Generate HTML template with improved styling for chat display."""
70
- code_font_size = max(font_size - 1, 10) # Code slightly smaller, min 10px
71
-
72
- # Parse thinking and answer content
73
- thinking_content, answer_content = _parse_thinking_content(content)
74
-
75
- # Format padding as CSS value - reduce outer padding more
76
- outer_padding_css = f"2px {padding[1]}px 2px {padding[3]}px"
77
- inner_padding_css = f"2px {inner_padding[1]}px 2px {inner_padding[3]}px"
78
-
79
- # Build thinking section HTML if present
80
- thinking_html = ""
81
- if thinking_content:
82
- # Show as open during streaming, closed when complete
83
- is_complete = "</think" in content.lower()
84
- open_attr = "" if is_complete else "open"
85
-
86
- thinking_html = f"""
87
- <details {open_attr} style="
88
- margin-bottom: 4px;
89
- border: 1px solid #d1d9e0;
90
- border-radius: 4px;
91
- background-color: #f8f9fa;
92
- ">
93
- <summary style="
94
- padding: 3px 8px;
95
- background-color: #e9ecef;
96
- border-radius: 3px 3px 0 0;
97
- cursor: pointer;
98
- font-weight: 500;
99
- color: #495057;
100
- user-select: none;
101
- border-bottom: 1px solid #d1d9e0;
102
- font-size: {font_size - 1}px;
103
- ">
104
- 🤔 Thinking{'...' if not is_complete else ''}
105
- </summary>
106
- <div style="
107
- padding: 4px 8px;
108
- background-color: #f8f9fa;
109
- border-radius: 0 0 3px 3px;
110
- ">
111
- <pre style="
112
- margin: 0;
113
- padding: 0;
114
- white-space: pre-wrap;
115
- word-wrap: break-word;
116
- font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
117
- font-size: {code_font_size - 1}px;
118
- line-height: 1.3;
119
- background: transparent;
120
- border: none;
121
- color: #6c757d;
122
- ">{thinking_content}</pre>
123
- </div>
124
- </details>
125
- """
126
-
127
- return f"""
128
- <div style="
129
- border: 1px solid #d0d7de;
130
- border-radius: 6px;
131
- padding: {outer_padding_css};
132
- margin: 2px 0;
133
- background-color: #f6f8fa;
134
- color: #24292f;
135
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif;
136
- font-size: {font_size}px;
137
- line-height: 1.4;
138
- white-space: pre-wrap;
139
- word-wrap: break-word;
140
- box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
141
- ">
142
- <div style="
143
- background-color: #fff;
144
- border: 1px solid #d0d7de;
145
- border-radius: 4px;
146
- padding: {inner_padding_css};
147
- color: #24292f;
148
- ">
149
- <strong style="color: #0969da;">Assistant:</strong><br>
150
- {thinking_html}
151
- <pre style="
152
- margin: 2px 0 0 0;
153
- padding: 0;
154
- white-space: pre-wrap;
155
- word-wrap: break-word;
156
- font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, 'Courier New', monospace;
157
- font-size: {code_font_size}px;
158
- line-height: 1.4;
159
- background: transparent;
160
- border: none;
161
- ">{answer_content}</pre>
162
- </div>
163
- </div>
164
- """
165
-
166
-
167
- class LMChatHtml(LM):
168
- def __init__(
169
- self,
170
- *args,
171
- font_size: int = DEFAULT_FONT_SIZE,
172
- padding: list[int] = DEFAULT_PADDING,
173
- inner_padding: list[int] = DEFAULT_INNER_PADDING,
174
- **kwargs,
175
- ):
176
- super().__init__(*args, **kwargs)
177
- self.font_size = font_size
178
- self.padding = padding
179
- self.inner_padding = inner_padding
180
-
181
- def chat_stream(
182
- self,
183
- prompt: Optional[str] = None,
184
- messages: Optional[RawMsgs] = None,
185
- html_mode: bool = False,
186
- font_size: Optional[int] = None,
187
- padding: Optional[list[int]] = None,
188
- inner_padding: Optional[list[int]] = None,
189
- **kwargs: Any,
190
- ) -> str:
191
- """
192
- Stream responses from the model with HTML support in Jupyter.
193
- """
194
- if prompt is not None:
195
- messages = [{"role": "user", "content": prompt}]
196
-
197
- assert messages is not None # for type-checker
198
-
199
- openai_msgs: Messages = (
200
- self._convert_messages(cast(LegacyMsgs, messages))
201
- if isinstance(messages[0], dict) # legacy style
202
- else cast(Messages, messages) # already typed
203
- )
204
- assert self.model is not None, "Model must be set before streaming."
205
-
206
- stream = self.client.chat.completions.create(
207
- model=self.model,
208
- messages=openai_msgs,
209
- stream=True,
210
- **kwargs,
211
- ) # type: ignore
212
-
213
- output_text = ""
214
- is_jupyter = _is_jupyter_notebook()
215
- display_font_size = font_size or self.font_size
216
- display_padding = padding or self.padding
217
- display_inner_padding = inner_padding or self.inner_padding
218
-
219
- if html_mode and is_jupyter:
220
- # Create initial display handle
221
- display_handle = display(HTML(""), display_id=True)
222
-
223
- for chunk in stream:
224
- if chunk.choices[0].delta.content is not None:
225
- chunk_content = chunk.choices[0].delta.content
226
- output_text += chunk_content
227
-
228
- # Update HTML display progressively using improved template
229
- html_content = _get_chat_html_template(
230
- output_text,
231
- font_size=display_font_size,
232
- padding=display_padding,
233
- inner_padding=display_inner_padding,
234
- )
235
- if display_handle is not None:
236
- display_handle.update(HTML(html_content))
237
- else:
238
- # Console streaming mode (original behavior)
239
- for chunk in stream:
240
- if chunk.choices[0].delta.content is not None:
241
- chunk_content = chunk.choices[0].delta.content
242
- print(chunk_content, end="")
243
- sys.stdout.flush()
244
- output_text += chunk_content
245
-
246
- return output_text
llm_utils/lm/lm_json.py DELETED
@@ -1,68 +0,0 @@
1
- from typing import Any, Optional
2
-
3
-
4
- from llm_utils.lm.sync_lm import LM, RawMsgs
5
-
6
-
7
- class LMJson(LM):
8
- "Regex-based reasoning wrapper for LM."
9
-
10
- def __init__(
11
- self,
12
- model: str | None = None,
13
- *,
14
- temperature: float = 0.0,
15
- max_tokens: int = 2_000,
16
- host: str = "localhost",
17
- port: Optional[int | str] = None,
18
- base_url: Optional[str] = None,
19
- api_key: Optional[str] = None,
20
- cache: bool = True,
21
- **openai_kwargs: Any,
22
- ) -> None:
23
- """
24
- Initialize the LMJson instance.
25
-
26
- Args:
27
- model (str | None): The model name to use.
28
- temperature (float): Sampling temperature.
29
- max_tokens (int): Maximum number of tokens to generate.
30
- host (str): Host for the API.
31
- port (int | str, optional): Port for the API.
32
- base_url (str, optional): Base URL for the API.
33
- api_key (str, optional): API key for authentication.
34
- cache (bool): Whether to cache responses.
35
- **openai_kwargs: Additional OpenAI parameters.
36
- """
37
- super().__init__(
38
- model=model,
39
- temperature=temperature,
40
- max_tokens=max_tokens,
41
- host=host,
42
- port=port,
43
- base_url=base_url,
44
- api_key=api_key,
45
- cache=cache,
46
- **openai_kwargs,
47
- )
48
-
49
- def __call__(
50
- self,
51
- prompt: Optional[str] = None,
52
- messages: Optional[RawMsgs] = None,
53
- cache: Optional[bool] = None,
54
- max_tokens: Optional[int] = None,
55
- return_openai_response: bool = False,
56
- **kwargs: Any,
57
- ):
58
-
59
- output = super().__call__(
60
- prompt=prompt,
61
- messages=messages,
62
- response_format=str,
63
- cache=cache,
64
- max_tokens=max_tokens,
65
- return_openai_response=return_openai_response,
66
- **kwargs,
67
- )
68
- return output