speedy-utils 1.0.9__tar.gz → 1.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/PKG-INFO +1 -1
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/pyproject.toml +2 -2
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/__init__.py +4 -1
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/lm/__init__.py +2 -1
- speedy_utils-1.0.12/src/llm_utils/lm/alm.py +447 -0
- speedy_utils-1.0.12/src/llm_utils/lm/lm.py +558 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/scripts/vllm_load_balancer.py +7 -6
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/scripts/vllm_serve.py +66 -136
- speedy_utils-1.0.9/src/llm_utils/lm/lm.py +0 -304
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/README.md +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/__init__.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/display.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/transform.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/utils.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/group_messages.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/lm/utils.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/__init__.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/all.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/__init__.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/clock.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/function_decorator.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/logger.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/report_manager.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_cache.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_io.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_misc.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_print.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/multi_worker/__init__.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/multi_worker/process.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/multi_worker/thread.py +0 -0
- {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/scripts/mpython.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "speedy-utils"
|
|
3
|
-
version = "1.0.
|
|
3
|
+
version = "1.0.12"
|
|
4
4
|
description = "Fast and easy-to-use package for data science"
|
|
5
5
|
authors = ["AnhVTH <anhvth.226@gmail.com>"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -11,7 +11,7 @@ packages = [
|
|
|
11
11
|
]
|
|
12
12
|
|
|
13
13
|
[build-system]
|
|
14
|
-
requires = ["poetry-core>=1.0.
|
|
14
|
+
requires = ["poetry-core>=1.0.12"]
|
|
15
15
|
build-backend = "poetry.core.masonry.api"
|
|
16
16
|
|
|
17
17
|
[tool.black]
|
|
@@ -9,7 +9,8 @@ from .chat_format import (
|
|
|
9
9
|
format_msgs,
|
|
10
10
|
display_chat_messages_as_html,
|
|
11
11
|
)
|
|
12
|
-
from .lm import LM
|
|
12
|
+
from .lm.lm import LM, LMReasoner
|
|
13
|
+
from .lm.alm import AsyncLM
|
|
13
14
|
from .group_messages import (
|
|
14
15
|
split_indices_by_length,
|
|
15
16
|
group_messages_by_len,
|
|
@@ -27,5 +28,7 @@ __all__ = [
|
|
|
27
28
|
"split_indices_by_length",
|
|
28
29
|
"group_messages_by_len",
|
|
29
30
|
"LM",
|
|
31
|
+
"LMReasoner",
|
|
32
|
+
"AsyncLM",
|
|
30
33
|
"display_chat_messages_as_html",
|
|
31
34
|
]
|
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""An **asynchronous** drop‑in replacement for the original `LM` class.
|
|
4
|
+
|
|
5
|
+
Usage example (Python ≥3.8):
|
|
6
|
+
|
|
7
|
+
from async_lm import AsyncLM
|
|
8
|
+
import asyncio
|
|
9
|
+
|
|
10
|
+
async def main():
|
|
11
|
+
lm = AsyncLM(model="gpt-4o-mini")
|
|
12
|
+
reply: str = await lm(prompt="Hello, world!")
|
|
13
|
+
print(reply)
|
|
14
|
+
|
|
15
|
+
asyncio.run(main())
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import base64
|
|
20
|
+
import hashlib
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
from typing import (
|
|
24
|
+
Any,
|
|
25
|
+
Dict,
|
|
26
|
+
List,
|
|
27
|
+
Optional,
|
|
28
|
+
Sequence,
|
|
29
|
+
Type,
|
|
30
|
+
TypeVar,
|
|
31
|
+
Union,
|
|
32
|
+
overload,
|
|
33
|
+
cast,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
from httpx import URL
|
|
37
|
+
from openai import AsyncOpenAI, AuthenticationError, RateLimitError
|
|
38
|
+
|
|
39
|
+
# from openai.pagination import AsyncSyncPage
|
|
40
|
+
from openai.types.chat import (
|
|
41
|
+
ChatCompletionAssistantMessageParam,
|
|
42
|
+
ChatCompletionMessageParam,
|
|
43
|
+
ChatCompletionSystemMessageParam,
|
|
44
|
+
ChatCompletionToolMessageParam,
|
|
45
|
+
ChatCompletionUserMessageParam,
|
|
46
|
+
)
|
|
47
|
+
from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
|
|
48
|
+
from openai.types.model import Model
|
|
49
|
+
from pydantic import BaseModel
|
|
50
|
+
from loguru import logger
|
|
51
|
+
from openai.pagination import AsyncPage as AsyncSyncPage
|
|
52
|
+
|
|
53
|
+
# --------------------------------------------------------------------------- #
|
|
54
|
+
# type helpers
|
|
55
|
+
# --------------------------------------------------------------------------- #
|
|
56
|
+
TModel = TypeVar("TModel", bound=BaseModel)
|
|
57
|
+
Messages = List[ChatCompletionMessageParam]
|
|
58
|
+
LegacyMsgs = List[Dict[str, str]]
|
|
59
|
+
RawMsgs = Union[Messages, LegacyMsgs]
|
|
60
|
+
|
|
61
|
+
# --------------------------------------------------------------------------- #
|
|
62
|
+
# color helpers (unchanged)
|
|
63
|
+
# --------------------------------------------------------------------------- #
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _color(code: int, text: str) -> str:
|
|
67
|
+
return f"\x1b[{code}m{text}\x1b[0m"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
_red = lambda t: _color(31, t)
|
|
71
|
+
_green = lambda t: _color(32, t)
|
|
72
|
+
_blue = lambda t: _color(34, t)
|
|
73
|
+
_yellow = lambda t: _color(33, t)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class AsyncLM:
|
|
77
|
+
"""Unified **async** language‑model wrapper with optional JSON parsing."""
|
|
78
|
+
|
|
79
|
+
def __init__(
|
|
80
|
+
self,
|
|
81
|
+
model: str | None = None,
|
|
82
|
+
*,
|
|
83
|
+
temperature: float = 0.0,
|
|
84
|
+
max_tokens: int = 2_000,
|
|
85
|
+
host: str = "localhost",
|
|
86
|
+
port: Optional[int | str] = None,
|
|
87
|
+
base_url: Optional[str] = None,
|
|
88
|
+
api_key: Optional[str] = None,
|
|
89
|
+
cache: bool = True,
|
|
90
|
+
ports: Optional[List[int]] = None,
|
|
91
|
+
**openai_kwargs: Any,
|
|
92
|
+
) -> None:
|
|
93
|
+
self.model = model
|
|
94
|
+
self.temperature = temperature
|
|
95
|
+
self.max_tokens = max_tokens
|
|
96
|
+
self.port = port
|
|
97
|
+
self.host = host
|
|
98
|
+
self.base_url = base_url or (f"http://{host}:{port}/v1" if port else None)
|
|
99
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY", "abc")
|
|
100
|
+
self.openai_kwargs = openai_kwargs
|
|
101
|
+
self.do_cache = cache
|
|
102
|
+
self.ports = ports
|
|
103
|
+
|
|
104
|
+
# Async client
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def client(self) -> AsyncOpenAI:
|
|
108
|
+
# if have multiple ports
|
|
109
|
+
if self.ports:
|
|
110
|
+
import random
|
|
111
|
+
port = random.choice(self.ports)
|
|
112
|
+
api_base = f"http://{self.host}:{port}/v1"
|
|
113
|
+
logger.debug(f"Using port: {port}")
|
|
114
|
+
else:
|
|
115
|
+
api_base = self.base_url or f"http://{self.host}:{self.port}/v1"
|
|
116
|
+
client = AsyncOpenAI(
|
|
117
|
+
api_key=self.api_key, base_url=api_base, **self.openai_kwargs
|
|
118
|
+
)
|
|
119
|
+
return client
|
|
120
|
+
|
|
121
|
+
# ------------------------------------------------------------------ #
|
|
122
|
+
# Public API – typed overloads
|
|
123
|
+
# ------------------------------------------------------------------ #
|
|
124
|
+
@overload
|
|
125
|
+
async def __call__(
|
|
126
|
+
self,
|
|
127
|
+
*,
|
|
128
|
+
prompt: str | None = ...,
|
|
129
|
+
messages: RawMsgs | None = ...,
|
|
130
|
+
response_format: type[str] = str,
|
|
131
|
+
return_openai_response: bool = ...,
|
|
132
|
+
**kwargs: Any,
|
|
133
|
+
) -> str: ...
|
|
134
|
+
|
|
135
|
+
@overload
|
|
136
|
+
async def __call__(
|
|
137
|
+
self,
|
|
138
|
+
*,
|
|
139
|
+
prompt: str | None = ...,
|
|
140
|
+
messages: RawMsgs | None = ...,
|
|
141
|
+
response_format: Type[TModel],
|
|
142
|
+
return_openai_response: bool = ...,
|
|
143
|
+
**kwargs: Any,
|
|
144
|
+
) -> TModel: ...
|
|
145
|
+
|
|
146
|
+
async def __call__(
|
|
147
|
+
self,
|
|
148
|
+
prompt: Optional[str] = None,
|
|
149
|
+
messages: Optional[RawMsgs] = None,
|
|
150
|
+
response_format: Union[type[str], Type[BaseModel]] = str,
|
|
151
|
+
cache: Optional[bool] = None,
|
|
152
|
+
max_tokens: Optional[int] = None,
|
|
153
|
+
return_openai_response: bool = False,
|
|
154
|
+
**kwargs: Any,
|
|
155
|
+
):
|
|
156
|
+
if (prompt is None) == (messages is None):
|
|
157
|
+
raise ValueError("Provide *either* `prompt` or `messages` (but not both).")
|
|
158
|
+
|
|
159
|
+
if prompt is not None:
|
|
160
|
+
messages = [{"role": "user", "content": prompt}]
|
|
161
|
+
|
|
162
|
+
assert messages is not None
|
|
163
|
+
# assert self.model is not None, "Model must be set before calling."
|
|
164
|
+
if not self.model:
|
|
165
|
+
models = await self.list_models(port=self.port, host=self.host)
|
|
166
|
+
self.model = models[0] if models else None
|
|
167
|
+
logger.info(
|
|
168
|
+
f"No model specified. Using the first available model. {self.model}"
|
|
169
|
+
)
|
|
170
|
+
openai_msgs: Messages = (
|
|
171
|
+
self._convert_messages(cast(LegacyMsgs, messages))
|
|
172
|
+
if isinstance(messages[0], dict)
|
|
173
|
+
else cast(Messages, messages)
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
kw = dict(
|
|
177
|
+
self.openai_kwargs,
|
|
178
|
+
temperature=self.temperature,
|
|
179
|
+
max_tokens=max_tokens or self.max_tokens,
|
|
180
|
+
)
|
|
181
|
+
kw.update(kwargs)
|
|
182
|
+
use_cache = self.do_cache if cache is None else cache
|
|
183
|
+
|
|
184
|
+
raw_response = await self._call_raw(
|
|
185
|
+
openai_msgs,
|
|
186
|
+
response_format=response_format,
|
|
187
|
+
use_cache=use_cache,
|
|
188
|
+
**kw,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if return_openai_response:
|
|
192
|
+
response = raw_response
|
|
193
|
+
else:
|
|
194
|
+
response = self._parse_output(raw_response, response_format)
|
|
195
|
+
|
|
196
|
+
self.last_log = [prompt, messages, raw_response]
|
|
197
|
+
return response
|
|
198
|
+
|
|
199
|
+
# ------------------------------------------------------------------ #
|
|
200
|
+
# Model invocation (async)
|
|
201
|
+
# ------------------------------------------------------------------ #
|
|
202
|
+
async def _call_raw(
|
|
203
|
+
self,
|
|
204
|
+
messages: Sequence[ChatCompletionMessageParam],
|
|
205
|
+
response_format: Union[type[str], Type[BaseModel]],
|
|
206
|
+
use_cache: bool,
|
|
207
|
+
**kw: Any,
|
|
208
|
+
):
|
|
209
|
+
assert self.model is not None, "Model must be set before making a call."
|
|
210
|
+
model: str = self.model
|
|
211
|
+
|
|
212
|
+
cache_key = (
|
|
213
|
+
self._cache_key(messages, kw, response_format) if use_cache else None
|
|
214
|
+
)
|
|
215
|
+
if cache_key and (hit := self._load_cache(cache_key)) is not None:
|
|
216
|
+
return hit
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
if response_format is not str and issubclass(response_format, BaseModel):
|
|
220
|
+
openai_response = await self.client.beta.chat.completions.parse(
|
|
221
|
+
model=model,
|
|
222
|
+
messages=list(messages),
|
|
223
|
+
response_format=response_format, # type: ignore[arg-type]
|
|
224
|
+
**kw,
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
openai_response = await self.client.chat.completions.create(
|
|
228
|
+
model=model,
|
|
229
|
+
messages=list(messages),
|
|
230
|
+
**kw,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
except (AuthenticationError, RateLimitError) as exc:
|
|
234
|
+
logger.error(exc)
|
|
235
|
+
raise
|
|
236
|
+
|
|
237
|
+
if cache_key:
|
|
238
|
+
self._dump_cache(cache_key, openai_response)
|
|
239
|
+
|
|
240
|
+
return openai_response
|
|
241
|
+
|
|
242
|
+
# ------------------------------------------------------------------ #
|
|
243
|
+
# Utilities below are unchanged (sync I/O is acceptable)
|
|
244
|
+
# ------------------------------------------------------------------ #
|
|
245
|
+
@staticmethod
|
|
246
|
+
def _convert_messages(msgs: LegacyMsgs) -> Messages:
|
|
247
|
+
converted: Messages = []
|
|
248
|
+
for msg in msgs:
|
|
249
|
+
role = msg["role"]
|
|
250
|
+
content = msg["content"]
|
|
251
|
+
if role == "user":
|
|
252
|
+
converted.append(
|
|
253
|
+
ChatCompletionUserMessageParam(role="user", content=content)
|
|
254
|
+
)
|
|
255
|
+
elif role == "assistant":
|
|
256
|
+
converted.append(
|
|
257
|
+
ChatCompletionAssistantMessageParam(
|
|
258
|
+
role="assistant", content=content
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
elif role == "system":
|
|
262
|
+
converted.append(
|
|
263
|
+
ChatCompletionSystemMessageParam(role="system", content=content)
|
|
264
|
+
)
|
|
265
|
+
elif role == "tool":
|
|
266
|
+
converted.append(
|
|
267
|
+
ChatCompletionToolMessageParam(
|
|
268
|
+
role="tool",
|
|
269
|
+
content=content,
|
|
270
|
+
tool_call_id=msg.get("tool_call_id") or "",
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
else:
|
|
274
|
+
converted.append({"role": role, "content": content}) # type: ignore[arg-type]
|
|
275
|
+
return converted
|
|
276
|
+
|
|
277
|
+
@staticmethod
|
|
278
|
+
def _parse_output(
|
|
279
|
+
raw_response: Any, response_format: Union[type[str], Type[BaseModel]]
|
|
280
|
+
) -> str | BaseModel:
|
|
281
|
+
if hasattr(raw_response, "model_dump"):
|
|
282
|
+
raw_response = raw_response.model_dump()
|
|
283
|
+
|
|
284
|
+
if response_format is str:
|
|
285
|
+
if isinstance(raw_response, dict) and "choices" in raw_response:
|
|
286
|
+
message = raw_response["choices"][0]["message"]
|
|
287
|
+
return message.get("content", "") or ""
|
|
288
|
+
return cast(str, raw_response)
|
|
289
|
+
|
|
290
|
+
model_cls = cast(Type[BaseModel], response_format)
|
|
291
|
+
|
|
292
|
+
if isinstance(raw_response, dict) and "choices" in raw_response:
|
|
293
|
+
message = raw_response["choices"][0]["message"]
|
|
294
|
+
if "parsed" in message:
|
|
295
|
+
return model_cls.model_validate(message["parsed"])
|
|
296
|
+
content = message.get("content")
|
|
297
|
+
if content is None:
|
|
298
|
+
raise ValueError("Model returned empty content")
|
|
299
|
+
try:
|
|
300
|
+
data = json.loads(content)
|
|
301
|
+
return model_cls.model_validate(data)
|
|
302
|
+
except Exception as exc:
|
|
303
|
+
raise ValueError(
|
|
304
|
+
f"Failed to parse model output as JSON:\n{content}"
|
|
305
|
+
) from exc
|
|
306
|
+
|
|
307
|
+
if isinstance(raw_response, model_cls):
|
|
308
|
+
return raw_response
|
|
309
|
+
if isinstance(raw_response, dict):
|
|
310
|
+
return model_cls.model_validate(raw_response)
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
data = json.loads(raw_response)
|
|
314
|
+
return model_cls.model_validate(data)
|
|
315
|
+
except Exception as exc:
|
|
316
|
+
raise ValueError(
|
|
317
|
+
f"Model did not return valid JSON:\n---\n{raw_response}"
|
|
318
|
+
) from exc
|
|
319
|
+
|
|
320
|
+
# ------------------------------------------------------------------ #
|
|
321
|
+
# Simple disk cache (sync)
|
|
322
|
+
# ------------------------------------------------------------------ #
|
|
323
|
+
@staticmethod
|
|
324
|
+
def _cache_key(
|
|
325
|
+
messages: Any, kw: Any, response_format: Union[type[str], Type[BaseModel]]
|
|
326
|
+
) -> str:
|
|
327
|
+
tag = response_format.__name__ if response_format is not str else "text"
|
|
328
|
+
blob = json.dumps([messages, kw, tag], sort_keys=True).encode()
|
|
329
|
+
return base64.urlsafe_b64encode(hashlib.sha256(blob).digest()).decode()[:22]
|
|
330
|
+
|
|
331
|
+
@staticmethod
|
|
332
|
+
def _cache_path(key: str) -> str:
|
|
333
|
+
return os.path.expanduser(f"~/.cache/lm/{key}.json")
|
|
334
|
+
|
|
335
|
+
def _dump_cache(self, key: str, val: Any) -> None:
|
|
336
|
+
try:
|
|
337
|
+
path = self._cache_path(key)
|
|
338
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
339
|
+
with open(path, "w") as fh:
|
|
340
|
+
if isinstance(val, BaseModel):
|
|
341
|
+
json.dump(val.model_dump(mode="json"), fh)
|
|
342
|
+
else:
|
|
343
|
+
json.dump(val, fh)
|
|
344
|
+
except Exception as exc:
|
|
345
|
+
logger.debug(f"cache write skipped: {exc}")
|
|
346
|
+
|
|
347
|
+
def _load_cache(self, key: str) -> Any | None:
|
|
348
|
+
path = self._cache_path(key)
|
|
349
|
+
if not os.path.exists(path):
|
|
350
|
+
return None
|
|
351
|
+
try:
|
|
352
|
+
with open(path) as fh:
|
|
353
|
+
return json.load(fh)
|
|
354
|
+
except Exception:
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
# ------------------------------------------------------------------ #
|
|
358
|
+
# Utility helpers
|
|
359
|
+
# ------------------------------------------------------------------ #
|
|
360
|
+
async def inspect_history(self) -> None:
|
|
361
|
+
if not hasattr(self, "last_log"):
|
|
362
|
+
raise ValueError("No history available. Please call the model first.")
|
|
363
|
+
|
|
364
|
+
prompt, messages, response = self.last_log
|
|
365
|
+
if hasattr(response, "model_dump"):
|
|
366
|
+
response = response.model_dump()
|
|
367
|
+
if not messages:
|
|
368
|
+
messages = [{"role": "user", "content": prompt}]
|
|
369
|
+
|
|
370
|
+
print("\n\n")
|
|
371
|
+
print(_blue("[Conversation History]") + "\n")
|
|
372
|
+
|
|
373
|
+
for msg in messages:
|
|
374
|
+
role = msg["role"]
|
|
375
|
+
content = msg["content"]
|
|
376
|
+
print(_red(f"{role.capitalize()}:"))
|
|
377
|
+
if isinstance(content, str):
|
|
378
|
+
print(content.strip())
|
|
379
|
+
elif isinstance(content, list):
|
|
380
|
+
for item in content:
|
|
381
|
+
if item.get("type") == "text":
|
|
382
|
+
print(item["text"].strip())
|
|
383
|
+
elif item.get("type") == "image_url":
|
|
384
|
+
image_url = item["image_url"]["url"]
|
|
385
|
+
if "base64" in image_url:
|
|
386
|
+
len_base64 = len(image_url.split("base64,")[1])
|
|
387
|
+
print(_blue(f"<IMAGE BASE64 ENCODED({len_base64})>"))
|
|
388
|
+
else:
|
|
389
|
+
print(_blue(f"<image_url: {image_url}>"))
|
|
390
|
+
print("\n")
|
|
391
|
+
|
|
392
|
+
print(_red("Response:"))
|
|
393
|
+
if isinstance(response, dict) and response.get("choices"):
|
|
394
|
+
message = response["choices"][0].get("message", {})
|
|
395
|
+
reasoning = message.get("reasoning_content")
|
|
396
|
+
parsed = message.get("parsed")
|
|
397
|
+
content = message.get("content")
|
|
398
|
+
if reasoning:
|
|
399
|
+
print(_yellow("<think>"))
|
|
400
|
+
print(reasoning.strip())
|
|
401
|
+
print(_yellow("</think>\n"))
|
|
402
|
+
if parsed:
|
|
403
|
+
print(
|
|
404
|
+
json.dumps(
|
|
405
|
+
(
|
|
406
|
+
parsed.model_dump()
|
|
407
|
+
if hasattr(parsed, "model_dump")
|
|
408
|
+
else parsed
|
|
409
|
+
),
|
|
410
|
+
indent=2,
|
|
411
|
+
)
|
|
412
|
+
+ "\n"
|
|
413
|
+
)
|
|
414
|
+
elif content:
|
|
415
|
+
print(content.strip())
|
|
416
|
+
else:
|
|
417
|
+
print(_green("[No content]"))
|
|
418
|
+
if len(response["choices"]) > 1:
|
|
419
|
+
print(
|
|
420
|
+
_blue(f"\n(Plus {len(response['choices']) - 1} other completions)")
|
|
421
|
+
)
|
|
422
|
+
else:
|
|
423
|
+
print(_yellow("Warning: Not a standard OpenAI response object"))
|
|
424
|
+
if isinstance(response, str):
|
|
425
|
+
print(_green(response.strip()))
|
|
426
|
+
elif isinstance(response, dict):
|
|
427
|
+
print(_green(json.dumps(response, indent=2)))
|
|
428
|
+
else:
|
|
429
|
+
print(_green(str(response)))
|
|
430
|
+
|
|
431
|
+
# ------------------------------------------------------------------ #
|
|
432
|
+
# Misc helpers
|
|
433
|
+
# ------------------------------------------------------------------ #
|
|
434
|
+
def set_model(self, model: str) -> None:
|
|
435
|
+
self.model = model
|
|
436
|
+
|
|
437
|
+
@staticmethod
|
|
438
|
+
async def list_models(port=None, host="localhost") -> List[str]:
|
|
439
|
+
try:
|
|
440
|
+
client: AsyncOpenAI = AsyncLM(port=port, host=host).client # type: ignore[arg-type]
|
|
441
|
+
base_url: URL = client.base_url
|
|
442
|
+
logger.debug(f"Base URL: {base_url}")
|
|
443
|
+
models: AsyncSyncPage[Model] = await client.models.list() # type: ignore[assignment]
|
|
444
|
+
return [model.id for model in models.data]
|
|
445
|
+
except Exception as exc:
|
|
446
|
+
logger.error(f"Failed to list models: {exc}")
|
|
447
|
+
return []
|