speedy-utils 1.1.46__py3-none-any.whl → 1.1.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +1 -3
- llm_utils/chat_format/__init__.py +0 -2
- llm_utils/chat_format/display.py +283 -364
- llm_utils/lm/llm.py +62 -22
- speedy_utils/__init__.py +4 -0
- speedy_utils/multi_worker/__init__.py +4 -0
- speedy_utils/multi_worker/_multi_process.py +425 -0
- speedy_utils/multi_worker/_multi_process_ray.py +308 -0
- speedy_utils/multi_worker/common.py +879 -0
- speedy_utils/multi_worker/dataset_sharding.py +203 -0
- speedy_utils/multi_worker/process.py +53 -1234
- speedy_utils/multi_worker/progress.py +71 -1
- speedy_utils/multi_worker/thread.py +45 -0
- speedy_utils/scripts/mpython.py +19 -12
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.48.dist-info}/METADATA +1 -1
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.48.dist-info}/RECORD +18 -14
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.48.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.46.dist-info → speedy_utils-1.1.48.dist-info}/entry_points.txt +0 -0
llm_utils/chat_format/display.py
CHANGED
|
@@ -2,248 +2,279 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
from difflib import SequenceMatcher
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
from IPython.display import HTML, display
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _preprocess_as_json(content: str) -> str:
|
|
11
|
-
"""
|
|
12
|
-
Preprocess content as JSON with proper formatting and syntax highlighting.
|
|
13
|
-
"""
|
|
11
|
+
"""Preprocess content as JSON with proper formatting."""
|
|
14
12
|
try:
|
|
15
|
-
# Try to parse and reformat JSON
|
|
16
13
|
parsed = json.loads(content)
|
|
17
14
|
return json.dumps(parsed, indent=2, ensure_ascii=False)
|
|
18
15
|
except (json.JSONDecodeError, TypeError):
|
|
19
|
-
# If not valid JSON, return as-is
|
|
20
16
|
return content
|
|
21
17
|
|
|
22
18
|
|
|
23
19
|
def _preprocess_as_markdown(content: str) -> str:
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
"""
|
|
27
|
-
# Basic markdown preprocessing - convert common patterns
|
|
28
|
-
lines = content.split("\n")
|
|
20
|
+
"""Preprocess content as markdown with proper formatting."""
|
|
21
|
+
lines = content.split('\n')
|
|
29
22
|
processed_lines = []
|
|
30
23
|
|
|
31
24
|
for line in lines:
|
|
32
25
|
# Convert **bold** to span with bold styling
|
|
33
|
-
while
|
|
34
|
-
first_pos = line.find(
|
|
35
|
-
if first_pos
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
bold_text = line[first_pos + 2 : second_pos]
|
|
40
|
-
after = line[second_pos + 2 :]
|
|
41
|
-
line = f'{before}<span style="font-weight: bold;">{bold_text}</span>{after}'
|
|
42
|
-
else:
|
|
43
|
-
break
|
|
44
|
-
else:
|
|
26
|
+
while '**' in line:
|
|
27
|
+
first_pos = line.find('**')
|
|
28
|
+
if first_pos == -1:
|
|
29
|
+
break
|
|
30
|
+
second_pos = line.find('**', first_pos + 2)
|
|
31
|
+
if second_pos == -1:
|
|
45
32
|
break
|
|
33
|
+
before = line[:first_pos]
|
|
34
|
+
bold_text = line[first_pos + 2 : second_pos]
|
|
35
|
+
after = line[second_pos + 2 :]
|
|
36
|
+
line = f'{before}<span style="font-weight: bold;">{bold_text}</span>{after}'
|
|
46
37
|
|
|
47
38
|
# Convert *italic* to span with italic styling
|
|
48
|
-
while
|
|
49
|
-
first_pos = line.find(
|
|
50
|
-
if first_pos
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
italic_text = line[first_pos + 1 : second_pos]
|
|
55
|
-
after = line[second_pos + 1 :]
|
|
56
|
-
line = f'{before}<span style="font-style: italic;">{italic_text}</span>{after}'
|
|
57
|
-
else:
|
|
58
|
-
break
|
|
59
|
-
else:
|
|
39
|
+
while '*' in line and line.count('*') >= 2:
|
|
40
|
+
first_pos = line.find('*')
|
|
41
|
+
if first_pos == -1:
|
|
42
|
+
break
|
|
43
|
+
second_pos = line.find('*', first_pos + 1)
|
|
44
|
+
if second_pos == -1:
|
|
60
45
|
break
|
|
46
|
+
before = line[:first_pos]
|
|
47
|
+
italic_text = line[first_pos + 1 : second_pos]
|
|
48
|
+
after = line[second_pos + 1 :]
|
|
49
|
+
line = (
|
|
50
|
+
f'{before}<span style="font-style: italic;">{italic_text}</span>{after}'
|
|
51
|
+
)
|
|
61
52
|
|
|
62
53
|
# Convert # headers to bold headers
|
|
63
|
-
if line.strip().startswith(
|
|
64
|
-
level = len(line) - len(line.lstrip(
|
|
65
|
-
header_text = line.lstrip(
|
|
54
|
+
if line.strip().startswith('#'):
|
|
55
|
+
level = len(line) - len(line.lstrip('#'))
|
|
56
|
+
header_text = line.lstrip('# ').strip()
|
|
66
57
|
line = f'<span style="font-weight: bold; font-size: 1.{min(4, level)}em;">{header_text}</span>'
|
|
67
58
|
|
|
68
59
|
processed_lines.append(line)
|
|
69
60
|
|
|
70
|
-
return
|
|
61
|
+
return '\n'.join(processed_lines)
|
|
71
62
|
|
|
72
63
|
|
|
73
|
-
def
|
|
74
|
-
msgs: Any,
|
|
75
|
-
return_html: bool = False,
|
|
76
|
-
file: str = "/tmp/conversation.html",
|
|
77
|
-
theme: str = "default",
|
|
78
|
-
as_markdown: bool = False,
|
|
79
|
-
as_json: bool = False,
|
|
80
|
-
) -> str | None:
|
|
64
|
+
def _truncate_text(text: str, max_length: int, head_ratio: float = 0.3) -> str:
|
|
81
65
|
"""
|
|
82
|
-
|
|
66
|
+
Truncate text if it exceeds max_length, showing head and tail with skip indicator.
|
|
83
67
|
|
|
84
68
|
Args:
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
69
|
+
text: Text to truncate
|
|
70
|
+
max_length: Maximum length before truncation
|
|
71
|
+
head_ratio: Ratio of max_length to show at the head (default 0.3)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Original text if within limit, otherwise truncated with [SKIP n chars] indicator
|
|
91
75
|
"""
|
|
92
|
-
if
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
76
|
+
if len(text) <= max_length:
|
|
77
|
+
return text
|
|
78
|
+
|
|
79
|
+
head_len = int(max_length * head_ratio)
|
|
80
|
+
tail_len = max_length - head_len
|
|
81
|
+
skip_len = len(text) - head_len - tail_len
|
|
82
|
+
|
|
83
|
+
return f'{text[:head_len]}\n...[SKIP {skip_len} chars]...\n{text[-tail_len:]}'
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _format_reasoning_content(
|
|
87
|
+
reasoning: str, max_reasoning_length: int | None = None
|
|
88
|
+
) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Format reasoning content with <think> tags.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
reasoning: The reasoning content
|
|
94
|
+
max_reasoning_length: Max length before truncation (None = no truncation)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Formatted reasoning with <think> tags
|
|
98
|
+
"""
|
|
99
|
+
if max_reasoning_length is not None:
|
|
100
|
+
reasoning = _truncate_text(reasoning, max_reasoning_length)
|
|
101
|
+
return f'<think>\n{reasoning}\n</think>'
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _escape_html(content: str) -> str:
|
|
105
|
+
"""Escape HTML special characters and convert whitespace for display."""
|
|
106
|
+
return (
|
|
107
|
+
content.replace('&', '&')
|
|
108
|
+
.replace('<', '<')
|
|
109
|
+
.replace('>', '>')
|
|
110
|
+
.replace('\n', '<br>')
|
|
111
|
+
.replace('\t', ' ')
|
|
112
|
+
.replace(' ', ' ')
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _is_notebook() -> bool:
|
|
117
|
+
"""Detect if running in a notebook environment."""
|
|
118
|
+
try:
|
|
119
|
+
from IPython.core.getipython import get_ipython
|
|
120
|
+
|
|
121
|
+
ipython = get_ipython()
|
|
122
|
+
return ipython is not None and 'IPKernelApp' in ipython.config
|
|
123
|
+
except (ImportError, AttributeError):
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# Color configurations
|
|
128
|
+
ROLE_COLORS_HTML = {
|
|
129
|
+
'system': 'red',
|
|
130
|
+
'user': 'darkorange',
|
|
131
|
+
'assistant': 'green',
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
ROLE_COLORS_TERMINAL = {
|
|
135
|
+
'system': '\033[91m', # Red
|
|
136
|
+
'user': '\033[38;5;208m', # Orange
|
|
137
|
+
'assistant': '\033[92m', # Green
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
ROLE_LABELS = {
|
|
141
|
+
'system': 'System Instruction:',
|
|
142
|
+
'user': 'User:',
|
|
143
|
+
'assistant': 'Assistant:',
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
TERMINAL_RESET = '\033[0m'
|
|
147
|
+
TERMINAL_BOLD = '\033[1m'
|
|
148
|
+
TERMINAL_GRAY = '\033[90m'
|
|
149
|
+
TERMINAL_DIM = '\033[2m' # Dim text for reasoning
|
|
150
|
+
|
|
151
|
+
# HTML colors
|
|
152
|
+
HTML_REASONING_COLOR = '#AAAAAA' # Lighter gray for better readability
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _build_assistant_content_parts(
|
|
156
|
+
msg: dict[str, Any], max_reasoning_length: int | None
|
|
157
|
+
) -> tuple[str | None, str]:
|
|
158
|
+
"""
|
|
159
|
+
Build display content parts for assistant message.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Tuple of (reasoning_formatted, answer_content)
|
|
163
|
+
reasoning_formatted is None if no reasoning present
|
|
164
|
+
"""
|
|
165
|
+
content = msg.get('content', '')
|
|
166
|
+
reasoning = msg.get('reasoning_content')
|
|
167
|
+
|
|
168
|
+
if reasoning:
|
|
169
|
+
formatted_reasoning = _format_reasoning_content(reasoning, max_reasoning_length)
|
|
170
|
+
return formatted_reasoning, content
|
|
171
|
+
|
|
172
|
+
return None, content
|
|
182
173
|
|
|
183
|
-
# Choose container based on whether we have markdown formatting
|
|
184
|
-
content_container = "div" if as_markdown else "pre"
|
|
185
|
-
container_style = 'style="white-space: pre-wrap;"' if as_markdown else ""
|
|
186
174
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
175
|
+
def _show_chat_html(
|
|
176
|
+
messages: list[dict[str, Any]], max_reasoning_length: int | None
|
|
177
|
+
) -> None:
|
|
178
|
+
"""Display chat messages as HTML in notebook."""
|
|
179
|
+
html_parts = [
|
|
180
|
+
"<div style='font-family:monospace; line-height:1.6em; white-space:pre-wrap;'>"
|
|
181
|
+
]
|
|
182
|
+
separator = "<div style='color:#888; margin:0.5em 0;'>───────────────────────────────────────────────────</div>"
|
|
183
|
+
|
|
184
|
+
for i, msg in enumerate(messages):
|
|
185
|
+
role = msg.get('role', 'unknown').lower()
|
|
186
|
+
color = ROLE_COLORS_HTML.get(role, 'black')
|
|
187
|
+
label = ROLE_LABELS.get(role, f'{role.capitalize()}:')
|
|
188
|
+
|
|
189
|
+
if role == 'assistant':
|
|
190
|
+
reasoning, answer = _build_assistant_content_parts(
|
|
191
|
+
msg, max_reasoning_length
|
|
191
192
|
)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
f'<div style="background-color: {background_color}; color: {text_color}; padding: 10px; margin-bottom: 10px;">'
|
|
195
|
-
f'<strong>User:</strong><br><{content_container} id="user-{i}" {container_style}>{content}</{content_container}></div>'
|
|
193
|
+
html_parts.append(
|
|
194
|
+
f"<div><strong style='color:{color}'>{label}</strong><br>"
|
|
196
195
|
)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
196
|
+
if reasoning:
|
|
197
|
+
escaped_reasoning = _escape_html(reasoning)
|
|
198
|
+
html_parts.append(
|
|
199
|
+
f"<span style='color:{HTML_REASONING_COLOR}'>{escaped_reasoning}</span><br><br>"
|
|
200
|
+
)
|
|
201
|
+
if answer:
|
|
202
|
+
escaped_answer = _escape_html(answer)
|
|
203
|
+
html_parts.append(
|
|
204
|
+
f"<span style='color:{color}'>{escaped_answer}</span>"
|
|
205
|
+
)
|
|
206
|
+
html_parts.append('</div>')
|
|
207
|
+
else:
|
|
208
|
+
content = msg.get('content', '')
|
|
209
|
+
escaped_content = _escape_html(content)
|
|
210
|
+
html_parts.append(
|
|
211
|
+
f"<div style='color:{color}'><strong>{label}</strong><br>{escaped_content}</div>"
|
|
201
212
|
)
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
213
|
+
|
|
214
|
+
if i < len(messages) - 1:
|
|
215
|
+
html_parts.append(separator)
|
|
216
|
+
|
|
217
|
+
html_parts.append('</div>')
|
|
218
|
+
display(HTML(''.join(html_parts)))
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _show_chat_terminal(
|
|
222
|
+
messages: list[dict[str, Any]], max_reasoning_length: int | None
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Display chat messages with ANSI colors in terminal."""
|
|
225
|
+
separator = f'{TERMINAL_GRAY}─────────────────────────────────────────────────────────{TERMINAL_RESET}'
|
|
226
|
+
|
|
227
|
+
for i, msg in enumerate(messages):
|
|
228
|
+
role = msg.get('role', 'unknown').lower()
|
|
229
|
+
color = ROLE_COLORS_TERMINAL.get(role, '')
|
|
230
|
+
label = ROLE_LABELS.get(role, f'{role.capitalize()}:')
|
|
231
|
+
|
|
232
|
+
print(f'{color}{TERMINAL_BOLD}{label}{TERMINAL_RESET}')
|
|
233
|
+
|
|
234
|
+
if role == 'assistant':
|
|
235
|
+
reasoning, answer = _build_assistant_content_parts(
|
|
236
|
+
msg, max_reasoning_length
|
|
206
237
|
)
|
|
238
|
+
if reasoning:
|
|
239
|
+
# Use lighter gray without dim for better readability
|
|
240
|
+
print(f'\033[38;5;246m{reasoning.strip()}{TERMINAL_RESET}')
|
|
241
|
+
if answer:
|
|
242
|
+
print() # Blank line between reasoning and answer
|
|
243
|
+
if answer:
|
|
244
|
+
print(f'{color}{answer.strip()}{TERMINAL_RESET}')
|
|
207
245
|
else:
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
<
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
}}
|
|
236
|
-
</script>
|
|
237
|
-
</body>
|
|
238
|
-
</html>
|
|
246
|
+
content = msg.get('content', '')
|
|
247
|
+
print(f'{color}{content}{TERMINAL_RESET}')
|
|
248
|
+
|
|
249
|
+
if i < len(messages) - 1:
|
|
250
|
+
print(separator)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def show_chat(
|
|
254
|
+
messages: list[dict[str, Any]], max_reasoning_length: int | None = 2000
|
|
255
|
+
) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Display chat messages with colored formatting.
|
|
258
|
+
|
|
259
|
+
Automatically detects notebook vs terminal environment and formats accordingly.
|
|
260
|
+
Handles reasoning_content in assistant messages, formatting it with <think> tags.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
messages: List of message dicts with 'role', 'content', and optionally 'reasoning_content'
|
|
264
|
+
max_reasoning_length: Max chars for reasoning before truncation (None = no limit)
|
|
265
|
+
|
|
266
|
+
Example:
|
|
267
|
+
>>> messages = [
|
|
268
|
+
... {"role": "system", "content": "You are helpful."},
|
|
269
|
+
... {"role": "user", "content": "Hello!"},
|
|
270
|
+
... {"role": "assistant", "content": "Hi!", "reasoning_content": "User greeted me..."},
|
|
271
|
+
... ]
|
|
272
|
+
>>> show_chat(messages)
|
|
239
273
|
"""
|
|
240
|
-
if
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
return html
|
|
245
|
-
display(HTML(html))
|
|
246
|
-
return None
|
|
274
|
+
if _is_notebook():
|
|
275
|
+
_show_chat_html(messages, max_reasoning_length)
|
|
276
|
+
else:
|
|
277
|
+
_show_chat_terminal(messages, max_reasoning_length)
|
|
247
278
|
|
|
248
279
|
|
|
249
280
|
def get_conversation_one_turn(
|
|
@@ -251,204 +282,92 @@ def get_conversation_one_turn(
|
|
|
251
282
|
user_msg: str | None = None,
|
|
252
283
|
assistant_msg: str | None = None,
|
|
253
284
|
assistant_prefix: str | None = None,
|
|
254
|
-
return_format: str =
|
|
285
|
+
return_format: str = 'chatml',
|
|
255
286
|
) -> Any:
|
|
256
|
-
"""
|
|
257
|
-
Build a one-turn conversation.
|
|
258
|
-
"""
|
|
287
|
+
"""Build a one-turn conversation."""
|
|
259
288
|
messages: list[dict[str, str]] = []
|
|
289
|
+
|
|
260
290
|
if system_msg is not None:
|
|
261
|
-
messages.append({
|
|
291
|
+
messages.append({'role': 'system', 'content': system_msg})
|
|
262
292
|
if user_msg is not None:
|
|
263
|
-
messages.append({
|
|
293
|
+
messages.append({'role': 'user', 'content': user_msg})
|
|
264
294
|
if assistant_msg is not None:
|
|
265
|
-
messages.append({
|
|
295
|
+
messages.append({'role': 'assistant', 'content': assistant_msg})
|
|
296
|
+
|
|
266
297
|
if assistant_prefix is not None:
|
|
267
|
-
|
|
268
|
-
return_format
|
|
269
|
-
|
|
270
|
-
|
|
298
|
+
if return_format == 'chatml':
|
|
299
|
+
raise ValueError('Change return_format to "text" to use assistant_prefix')
|
|
300
|
+
if not messages or messages[-1]['role'] != 'user':
|
|
301
|
+
raise ValueError(
|
|
302
|
+
'Last message must be from user when using assistant_prefix'
|
|
303
|
+
)
|
|
304
|
+
|
|
271
305
|
from .transform import transform_messages
|
|
272
306
|
|
|
273
|
-
msg = transform_messages(messages,
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
307
|
+
msg = transform_messages(messages, 'chatml', 'text', add_generation_prompt=True)
|
|
308
|
+
return str(msg) + assistant_prefix
|
|
309
|
+
|
|
310
|
+
if return_format != 'chatml':
|
|
311
|
+
raise ValueError(f'Unsupported return_format: {return_format}')
|
|
312
|
+
|
|
279
313
|
return messages
|
|
280
314
|
|
|
281
315
|
|
|
282
316
|
def highlight_diff_chars(text1: str, text2: str) -> str:
|
|
283
|
-
"""
|
|
284
|
-
Return a string with deletions in red and additions in green.
|
|
285
|
-
"""
|
|
317
|
+
"""Return a string with deletions in red and additions in green."""
|
|
286
318
|
matcher = SequenceMatcher(None, text1, text2)
|
|
287
|
-
|
|
319
|
+
html_parts: list[str] = []
|
|
320
|
+
|
|
288
321
|
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
|
289
|
-
if tag ==
|
|
290
|
-
|
|
291
|
-
elif tag ==
|
|
322
|
+
if tag == 'equal':
|
|
323
|
+
html_parts.append(text1[i1:i2])
|
|
324
|
+
elif tag == 'replace':
|
|
292
325
|
if i1 != i2:
|
|
293
|
-
|
|
326
|
+
html_parts.append(
|
|
294
327
|
f'<span style="background-color:#ffd6d6; color:#b20000;">{text1[i1:i2]}</span>'
|
|
295
328
|
)
|
|
296
329
|
if j1 != j2:
|
|
297
|
-
|
|
330
|
+
html_parts.append(
|
|
298
331
|
f'<span style="background-color:#d6ffd6; color:#006600;">{text2[j1:j2]}</span>'
|
|
299
332
|
)
|
|
300
|
-
elif tag ==
|
|
301
|
-
|
|
333
|
+
elif tag == 'delete':
|
|
334
|
+
html_parts.append(
|
|
302
335
|
f'<span style="background-color:#ffd6d6; color:#b20000;">{text1[i1:i2]}</span>'
|
|
303
336
|
)
|
|
304
|
-
elif tag ==
|
|
305
|
-
|
|
337
|
+
elif tag == 'insert':
|
|
338
|
+
html_parts.append(
|
|
306
339
|
f'<span style="background-color:#d6ffd6; color:#006600;">{text2[j1:j2]}</span>'
|
|
307
340
|
)
|
|
308
|
-
return "".join(html)
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
def show_string_diff(old: str, new: str) -> None:
|
|
312
|
-
"""
|
|
313
|
-
Display a one-line visual diff between two strings (old -> new).
|
|
314
|
-
"""
|
|
315
|
-
html1 = highlight_diff_chars(old, new)
|
|
316
|
-
display(HTML(html1))
|
|
317
341
|
|
|
342
|
+
return ''.join(html_parts)
|
|
318
343
|
|
|
319
|
-
def show_chat_v2(messages: list[dict[str, str]]):
|
|
320
|
-
"""
|
|
321
|
-
Print only content of messages in different colors:
|
|
322
|
-
system -> red, user -> orange, assistant -> green.
|
|
323
|
-
Automatically detects notebook environment and uses appropriate display.
|
|
324
|
-
"""
|
|
325
|
-
# Detect if running in a notebook environment
|
|
326
|
-
try:
|
|
327
|
-
from IPython.core.getipython import get_ipython
|
|
328
344
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
is_notebook = False
|
|
333
|
-
|
|
334
|
-
if is_notebook:
|
|
335
|
-
# Use HTML display in notebook
|
|
336
|
-
from IPython.display import HTML, display
|
|
337
|
-
|
|
338
|
-
role_colors = {
|
|
339
|
-
"system": "red",
|
|
340
|
-
"user": "darkorange",
|
|
341
|
-
"assistant": "green",
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
role_labels = {
|
|
345
|
-
"system": "System Instruction:",
|
|
346
|
-
"user": "User:",
|
|
347
|
-
"assistant": "Assistant:",
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
html = "<div style='font-family:monospace; line-height:1.6em; white-space:pre-wrap;'>"
|
|
351
|
-
for i, msg in enumerate(messages):
|
|
352
|
-
role = msg.get("role", "unknown").lower()
|
|
353
|
-
content = msg.get("content", "")
|
|
354
|
-
# Escape HTML characters
|
|
355
|
-
content = (
|
|
356
|
-
content.replace("&", "&")
|
|
357
|
-
.replace("<", "<")
|
|
358
|
-
.replace(">", ">")
|
|
359
|
-
.replace("\n", "<br>")
|
|
360
|
-
.replace("\t", " ")
|
|
361
|
-
.replace(" ", " ")
|
|
362
|
-
)
|
|
363
|
-
color = role_colors.get(role, "black")
|
|
364
|
-
label = role_labels.get(role, f"{role.capitalize()}:")
|
|
365
|
-
html += f"<div style='color:{color}'><strong>{label}</strong><br>{content}</div>"
|
|
366
|
-
# Add separator except after last message
|
|
367
|
-
if i < len(messages) - 1:
|
|
368
|
-
html += "<div style='color:#888; margin:0.5em 0;'>───────────────────────────────────────────────────</div>"
|
|
369
|
-
html += "</div>"
|
|
370
|
-
|
|
371
|
-
display(HTML(html))
|
|
372
|
-
else:
|
|
373
|
-
# Use normal terminal printing with ANSI colors
|
|
374
|
-
role_colors = {
|
|
375
|
-
"system": "\033[91m", # Red
|
|
376
|
-
"user": "\033[38;5;208m", # Orange
|
|
377
|
-
"assistant": "\033[92m", # Green
|
|
378
|
-
}
|
|
379
|
-
reset = "\033[0m"
|
|
380
|
-
separator_color = "\033[90m" # Gray
|
|
381
|
-
bold = "\033[1m"
|
|
382
|
-
|
|
383
|
-
role_labels = {
|
|
384
|
-
"system": "System Instruction:",
|
|
385
|
-
"user": "User:",
|
|
386
|
-
"assistant": "Assistant:",
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
for i, msg in enumerate(messages):
|
|
390
|
-
role = msg.get("role", "unknown").lower()
|
|
391
|
-
content = msg.get("content", "")
|
|
392
|
-
color = role_colors.get(role, "")
|
|
393
|
-
label = role_labels.get(role, f"{role.capitalize()}:")
|
|
394
|
-
print(f"{color}{bold}{label}{reset}")
|
|
395
|
-
print(f"{color}{content}{reset}")
|
|
396
|
-
# Add separator except after last message
|
|
397
|
-
if i < len(messages) - 1:
|
|
398
|
-
print(
|
|
399
|
-
f"{separator_color}─────────────────────────────────────────────────────────{reset}"
|
|
400
|
-
)
|
|
345
|
+
def show_string_diff(old: str, new: str) -> None:
|
|
346
|
+
"""Display a visual diff between two strings (old -> new)."""
|
|
347
|
+
display(HTML(highlight_diff_chars(old, new)))
|
|
401
348
|
|
|
402
349
|
|
|
403
|
-
def display_conversations(data1: Any, data2: Any
|
|
404
|
-
"""
|
|
405
|
-
Display two conversations side by side.
|
|
406
|
-
"""
|
|
350
|
+
def display_conversations(data1: Any, data2: Any) -> None:
|
|
351
|
+
"""Display two conversations side by side. Deprecated."""
|
|
407
352
|
import warnings
|
|
408
353
|
|
|
409
354
|
warnings.warn(
|
|
410
|
-
|
|
355
|
+
'display_conversations is deprecated and will be removed.',
|
|
411
356
|
DeprecationWarning,
|
|
412
357
|
stacklevel=2,
|
|
413
358
|
)
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
<head>
|
|
419
|
-
<style>
|
|
420
|
-
table {{
|
|
421
|
-
width: 100%;
|
|
422
|
-
border-collapse: collapse;
|
|
423
|
-
}}
|
|
424
|
-
td {{
|
|
425
|
-
width: 50%;
|
|
426
|
-
vertical-align: top;
|
|
427
|
-
padding: 10px;
|
|
428
|
-
}}
|
|
429
|
-
</style>
|
|
430
|
-
</head>
|
|
431
|
-
<body>
|
|
432
|
-
<table>
|
|
433
|
-
<tr>
|
|
434
|
-
<td>{html1}</td>
|
|
435
|
-
<td>{html2}</td>
|
|
436
|
-
</tr>
|
|
437
|
-
</table>
|
|
438
|
-
</body>
|
|
439
|
-
</html>
|
|
440
|
-
"""
|
|
441
|
-
display(HTML(html))
|
|
359
|
+
print('=== Conversation 1 ===')
|
|
360
|
+
show_chat(data1)
|
|
361
|
+
print('\n=== Conversation 2 ===')
|
|
362
|
+
show_chat(data2)
|
|
442
363
|
|
|
443
364
|
|
|
444
365
|
def display_chat_messages_as_html(*args, **kwargs):
|
|
445
|
-
"""
|
|
446
|
-
Use as show_chat and warn about the deprecated function.
|
|
447
|
-
"""
|
|
366
|
+
"""Deprecated alias for show_chat."""
|
|
448
367
|
import warnings
|
|
449
368
|
|
|
450
369
|
warnings.warn(
|
|
451
|
-
|
|
370
|
+
'display_chat_messages_as_html is deprecated, use show_chat instead.',
|
|
452
371
|
DeprecationWarning,
|
|
453
372
|
stacklevel=2,
|
|
454
373
|
)
|
|
@@ -456,10 +375,10 @@ def display_chat_messages_as_html(*args, **kwargs):
|
|
|
456
375
|
|
|
457
376
|
|
|
458
377
|
__all__ = [
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
378
|
+
'show_chat',
|
|
379
|
+
'get_conversation_one_turn',
|
|
380
|
+
'highlight_diff_chars',
|
|
381
|
+
'show_string_diff',
|
|
382
|
+
'display_conversations',
|
|
383
|
+
'display_chat_messages_as_html',
|
|
465
384
|
]
|