speedy-utils 1.0.9__tar.gz → 1.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/PKG-INFO +1 -1
  2. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/pyproject.toml +2 -2
  3. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/__init__.py +4 -1
  4. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/lm/__init__.py +2 -1
  5. speedy_utils-1.0.12/src/llm_utils/lm/alm.py +447 -0
  6. speedy_utils-1.0.12/src/llm_utils/lm/lm.py +558 -0
  7. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/scripts/vllm_load_balancer.py +7 -6
  8. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/scripts/vllm_serve.py +66 -136
  9. speedy_utils-1.0.9/src/llm_utils/lm/lm.py +0 -304
  10. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/README.md +0 -0
  11. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/__init__.py +0 -0
  12. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/display.py +0 -0
  13. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/transform.py +0 -0
  14. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/chat_format/utils.py +0 -0
  15. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/group_messages.py +0 -0
  16. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/llm_utils/lm/utils.py +0 -0
  17. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/__init__.py +0 -0
  18. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/all.py +0 -0
  19. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/__init__.py +0 -0
  20. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/clock.py +0 -0
  21. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/function_decorator.py +0 -0
  22. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/logger.py +0 -0
  23. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/report_manager.py +0 -0
  24. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_cache.py +0 -0
  25. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_io.py +0 -0
  26. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_misc.py +0 -0
  27. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/common/utils_print.py +0 -0
  28. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/multi_worker/__init__.py +0 -0
  29. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/multi_worker/process.py +0 -0
  30. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/multi_worker/thread.py +0 -0
  31. {speedy_utils-1.0.9 → speedy_utils-1.0.12}/src/speedy_utils/scripts/mpython.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.0.9
3
+ Version: 1.0.12
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "speedy-utils"
3
- version = "1.0.9"
3
+ version = "1.0.12"
4
4
  description = "Fast and easy-to-use package for data science"
5
5
  authors = ["AnhVTH <anhvth.226@gmail.com>"]
6
6
  readme = "README.md"
@@ -11,7 +11,7 @@ packages = [
11
11
  ]
12
12
 
13
13
  [build-system]
14
- requires = ["poetry-core>=1.0.9"]
14
+ requires = ["poetry-core>=1.0.12"]
15
15
  build-backend = "poetry.core.masonry.api"
16
16
 
17
17
  [tool.black]
@@ -9,7 +9,8 @@ from .chat_format import (
9
9
  format_msgs,
10
10
  display_chat_messages_as_html,
11
11
  )
12
- from .lm import LM
12
+ from .lm.lm import LM, LMReasoner
13
+ from .lm.alm import AsyncLM
13
14
  from .group_messages import (
14
15
  split_indices_by_length,
15
16
  group_messages_by_len,
@@ -27,5 +28,7 @@ __all__ = [
27
28
  "split_indices_by_length",
28
29
  "group_messages_by_len",
29
30
  "LM",
31
+ "LMReasoner",
32
+ "AsyncLM",
30
33
  "display_chat_messages_as_html",
31
34
  ]
@@ -1,8 +1,9 @@
1
1
  from .lm import LM
2
-
2
+ from .alm import AsyncLM
3
3
  OAI_LM = LM
4
4
 
5
5
  __all__ = [
6
6
  "LM",
7
7
  "OAI_LM",
8
+ "AsyncLM",
8
9
  ]
@@ -0,0 +1,447 @@
1
+ from __future__ import annotations
2
+
3
+ """An **asynchronous** drop‑in replacement for the original `LM` class.
4
+
5
+ Usage example (Python ≥3.8):
6
+
7
+ from async_lm import AsyncLM
8
+ import asyncio
9
+
10
+ async def main():
11
+ lm = AsyncLM(model="gpt-4o-mini")
12
+ reply: str = await lm(prompt="Hello, world!")
13
+ print(reply)
14
+
15
+ asyncio.run(main())
16
+ """
17
+
18
+ import asyncio
19
+ import base64
20
+ import hashlib
21
+ import json
22
+ import os
23
+ from typing import (
24
+ Any,
25
+ Dict,
26
+ List,
27
+ Optional,
28
+ Sequence,
29
+ Type,
30
+ TypeVar,
31
+ Union,
32
+ overload,
33
+ cast,
34
+ )
35
+
36
+ from httpx import URL
37
+ from openai import AsyncOpenAI, AuthenticationError, RateLimitError
38
+
39
+ # from openai.pagination import AsyncSyncPage
40
+ from openai.types.chat import (
41
+ ChatCompletionAssistantMessageParam,
42
+ ChatCompletionMessageParam,
43
+ ChatCompletionSystemMessageParam,
44
+ ChatCompletionToolMessageParam,
45
+ ChatCompletionUserMessageParam,
46
+ )
47
+ from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
48
+ from openai.types.model import Model
49
+ from pydantic import BaseModel
50
+ from loguru import logger
51
+ from openai.pagination import AsyncPage as AsyncSyncPage
52
+
53
+ # --------------------------------------------------------------------------- #
54
+ # type helpers
55
+ # --------------------------------------------------------------------------- #
56
+ TModel = TypeVar("TModel", bound=BaseModel)
57
+ Messages = List[ChatCompletionMessageParam]
58
+ LegacyMsgs = List[Dict[str, str]]
59
+ RawMsgs = Union[Messages, LegacyMsgs]
60
+
61
+ # --------------------------------------------------------------------------- #
62
+ # color helpers (unchanged)
63
+ # --------------------------------------------------------------------------- #
64
+
65
+
66
+ def _color(code: int, text: str) -> str:
67
+ return f"\x1b[{code}m{text}\x1b[0m"
68
+
69
+
70
+ _red = lambda t: _color(31, t)
71
+ _green = lambda t: _color(32, t)
72
+ _blue = lambda t: _color(34, t)
73
+ _yellow = lambda t: _color(33, t)
74
+
75
+
76
+ class AsyncLM:
77
+ """Unified **async** language‑model wrapper with optional JSON parsing."""
78
+
79
+ def __init__(
80
+ self,
81
+ model: str | None = None,
82
+ *,
83
+ temperature: float = 0.0,
84
+ max_tokens: int = 2_000,
85
+ host: str = "localhost",
86
+ port: Optional[int | str] = None,
87
+ base_url: Optional[str] = None,
88
+ api_key: Optional[str] = None,
89
+ cache: bool = True,
90
+ ports: Optional[List[int]] = None,
91
+ **openai_kwargs: Any,
92
+ ) -> None:
93
+ self.model = model
94
+ self.temperature = temperature
95
+ self.max_tokens = max_tokens
96
+ self.port = port
97
+ self.host = host
98
+ self.base_url = base_url or (f"http://{host}:{port}/v1" if port else None)
99
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY", "abc")
100
+ self.openai_kwargs = openai_kwargs
101
+ self.do_cache = cache
102
+ self.ports = ports
103
+
104
+ # Async client
105
+
106
+ @property
107
+ def client(self) -> AsyncOpenAI:
108
+ # if have multiple ports
109
+ if self.ports:
110
+ import random
111
+ port = random.choice(self.ports)
112
+ api_base = f"http://{self.host}:{port}/v1"
113
+ logger.debug(f"Using port: {port}")
114
+ else:
115
+ api_base = self.base_url or f"http://{self.host}:{self.port}/v1"
116
+ client = AsyncOpenAI(
117
+ api_key=self.api_key, base_url=api_base, **self.openai_kwargs
118
+ )
119
+ return client
120
+
121
+ # ------------------------------------------------------------------ #
122
+ # Public API – typed overloads
123
+ # ------------------------------------------------------------------ #
124
+ @overload
125
+ async def __call__(
126
+ self,
127
+ *,
128
+ prompt: str | None = ...,
129
+ messages: RawMsgs | None = ...,
130
+ response_format: type[str] = str,
131
+ return_openai_response: bool = ...,
132
+ **kwargs: Any,
133
+ ) -> str: ...
134
+
135
+ @overload
136
+ async def __call__(
137
+ self,
138
+ *,
139
+ prompt: str | None = ...,
140
+ messages: RawMsgs | None = ...,
141
+ response_format: Type[TModel],
142
+ return_openai_response: bool = ...,
143
+ **kwargs: Any,
144
+ ) -> TModel: ...
145
+
146
+ async def __call__(
147
+ self,
148
+ prompt: Optional[str] = None,
149
+ messages: Optional[RawMsgs] = None,
150
+ response_format: Union[type[str], Type[BaseModel]] = str,
151
+ cache: Optional[bool] = None,
152
+ max_tokens: Optional[int] = None,
153
+ return_openai_response: bool = False,
154
+ **kwargs: Any,
155
+ ):
156
+ if (prompt is None) == (messages is None):
157
+ raise ValueError("Provide *either* `prompt` or `messages` (but not both).")
158
+
159
+ if prompt is not None:
160
+ messages = [{"role": "user", "content": prompt}]
161
+
162
+ assert messages is not None
163
+ # assert self.model is not None, "Model must be set before calling."
164
+ if not self.model:
165
+ models = await self.list_models(port=self.port, host=self.host)
166
+ self.model = models[0] if models else None
167
+ logger.info(
168
+ f"No model specified. Using the first available model. {self.model}"
169
+ )
170
+ openai_msgs: Messages = (
171
+ self._convert_messages(cast(LegacyMsgs, messages))
172
+ if isinstance(messages[0], dict)
173
+ else cast(Messages, messages)
174
+ )
175
+
176
+ kw = dict(
177
+ self.openai_kwargs,
178
+ temperature=self.temperature,
179
+ max_tokens=max_tokens or self.max_tokens,
180
+ )
181
+ kw.update(kwargs)
182
+ use_cache = self.do_cache if cache is None else cache
183
+
184
+ raw_response = await self._call_raw(
185
+ openai_msgs,
186
+ response_format=response_format,
187
+ use_cache=use_cache,
188
+ **kw,
189
+ )
190
+
191
+ if return_openai_response:
192
+ response = raw_response
193
+ else:
194
+ response = self._parse_output(raw_response, response_format)
195
+
196
+ self.last_log = [prompt, messages, raw_response]
197
+ return response
198
+
199
+ # ------------------------------------------------------------------ #
200
+ # Model invocation (async)
201
+ # ------------------------------------------------------------------ #
202
+ async def _call_raw(
203
+ self,
204
+ messages: Sequence[ChatCompletionMessageParam],
205
+ response_format: Union[type[str], Type[BaseModel]],
206
+ use_cache: bool,
207
+ **kw: Any,
208
+ ):
209
+ assert self.model is not None, "Model must be set before making a call."
210
+ model: str = self.model
211
+
212
+ cache_key = (
213
+ self._cache_key(messages, kw, response_format) if use_cache else None
214
+ )
215
+ if cache_key and (hit := self._load_cache(cache_key)) is not None:
216
+ return hit
217
+
218
+ try:
219
+ if response_format is not str and issubclass(response_format, BaseModel):
220
+ openai_response = await self.client.beta.chat.completions.parse(
221
+ model=model,
222
+ messages=list(messages),
223
+ response_format=response_format, # type: ignore[arg-type]
224
+ **kw,
225
+ )
226
+ else:
227
+ openai_response = await self.client.chat.completions.create(
228
+ model=model,
229
+ messages=list(messages),
230
+ **kw,
231
+ )
232
+
233
+ except (AuthenticationError, RateLimitError) as exc:
234
+ logger.error(exc)
235
+ raise
236
+
237
+ if cache_key:
238
+ self._dump_cache(cache_key, openai_response)
239
+
240
+ return openai_response
241
+
242
+ # ------------------------------------------------------------------ #
243
+ # Utilities below are unchanged (sync I/O is acceptable)
244
+ # ------------------------------------------------------------------ #
245
+ @staticmethod
246
+ def _convert_messages(msgs: LegacyMsgs) -> Messages:
247
+ converted: Messages = []
248
+ for msg in msgs:
249
+ role = msg["role"]
250
+ content = msg["content"]
251
+ if role == "user":
252
+ converted.append(
253
+ ChatCompletionUserMessageParam(role="user", content=content)
254
+ )
255
+ elif role == "assistant":
256
+ converted.append(
257
+ ChatCompletionAssistantMessageParam(
258
+ role="assistant", content=content
259
+ )
260
+ )
261
+ elif role == "system":
262
+ converted.append(
263
+ ChatCompletionSystemMessageParam(role="system", content=content)
264
+ )
265
+ elif role == "tool":
266
+ converted.append(
267
+ ChatCompletionToolMessageParam(
268
+ role="tool",
269
+ content=content,
270
+ tool_call_id=msg.get("tool_call_id") or "",
271
+ )
272
+ )
273
+ else:
274
+ converted.append({"role": role, "content": content}) # type: ignore[arg-type]
275
+ return converted
276
+
277
+ @staticmethod
278
+ def _parse_output(
279
+ raw_response: Any, response_format: Union[type[str], Type[BaseModel]]
280
+ ) -> str | BaseModel:
281
+ if hasattr(raw_response, "model_dump"):
282
+ raw_response = raw_response.model_dump()
283
+
284
+ if response_format is str:
285
+ if isinstance(raw_response, dict) and "choices" in raw_response:
286
+ message = raw_response["choices"][0]["message"]
287
+ return message.get("content", "") or ""
288
+ return cast(str, raw_response)
289
+
290
+ model_cls = cast(Type[BaseModel], response_format)
291
+
292
+ if isinstance(raw_response, dict) and "choices" in raw_response:
293
+ message = raw_response["choices"][0]["message"]
294
+ if "parsed" in message:
295
+ return model_cls.model_validate(message["parsed"])
296
+ content = message.get("content")
297
+ if content is None:
298
+ raise ValueError("Model returned empty content")
299
+ try:
300
+ data = json.loads(content)
301
+ return model_cls.model_validate(data)
302
+ except Exception as exc:
303
+ raise ValueError(
304
+ f"Failed to parse model output as JSON:\n{content}"
305
+ ) from exc
306
+
307
+ if isinstance(raw_response, model_cls):
308
+ return raw_response
309
+ if isinstance(raw_response, dict):
310
+ return model_cls.model_validate(raw_response)
311
+
312
+ try:
313
+ data = json.loads(raw_response)
314
+ return model_cls.model_validate(data)
315
+ except Exception as exc:
316
+ raise ValueError(
317
+ f"Model did not return valid JSON:\n---\n{raw_response}"
318
+ ) from exc
319
+
320
+ # ------------------------------------------------------------------ #
321
+ # Simple disk cache (sync)
322
+ # ------------------------------------------------------------------ #
323
+ @staticmethod
324
+ def _cache_key(
325
+ messages: Any, kw: Any, response_format: Union[type[str], Type[BaseModel]]
326
+ ) -> str:
327
+ tag = response_format.__name__ if response_format is not str else "text"
328
+ blob = json.dumps([messages, kw, tag], sort_keys=True).encode()
329
+ return base64.urlsafe_b64encode(hashlib.sha256(blob).digest()).decode()[:22]
330
+
331
+ @staticmethod
332
+ def _cache_path(key: str) -> str:
333
+ return os.path.expanduser(f"~/.cache/lm/{key}.json")
334
+
335
+ def _dump_cache(self, key: str, val: Any) -> None:
336
+ try:
337
+ path = self._cache_path(key)
338
+ os.makedirs(os.path.dirname(path), exist_ok=True)
339
+ with open(path, "w") as fh:
340
+ if isinstance(val, BaseModel):
341
+ json.dump(val.model_dump(mode="json"), fh)
342
+ else:
343
+ json.dump(val, fh)
344
+ except Exception as exc:
345
+ logger.debug(f"cache write skipped: {exc}")
346
+
347
+ def _load_cache(self, key: str) -> Any | None:
348
+ path = self._cache_path(key)
349
+ if not os.path.exists(path):
350
+ return None
351
+ try:
352
+ with open(path) as fh:
353
+ return json.load(fh)
354
+ except Exception:
355
+ return None
356
+
357
+ # ------------------------------------------------------------------ #
358
+ # Utility helpers
359
+ # ------------------------------------------------------------------ #
360
+ async def inspect_history(self) -> None:
361
+ if not hasattr(self, "last_log"):
362
+ raise ValueError("No history available. Please call the model first.")
363
+
364
+ prompt, messages, response = self.last_log
365
+ if hasattr(response, "model_dump"):
366
+ response = response.model_dump()
367
+ if not messages:
368
+ messages = [{"role": "user", "content": prompt}]
369
+
370
+ print("\n\n")
371
+ print(_blue("[Conversation History]") + "\n")
372
+
373
+ for msg in messages:
374
+ role = msg["role"]
375
+ content = msg["content"]
376
+ print(_red(f"{role.capitalize()}:"))
377
+ if isinstance(content, str):
378
+ print(content.strip())
379
+ elif isinstance(content, list):
380
+ for item in content:
381
+ if item.get("type") == "text":
382
+ print(item["text"].strip())
383
+ elif item.get("type") == "image_url":
384
+ image_url = item["image_url"]["url"]
385
+ if "base64" in image_url:
386
+ len_base64 = len(image_url.split("base64,")[1])
387
+ print(_blue(f"<IMAGE BASE64 ENCODED({len_base64})>"))
388
+ else:
389
+ print(_blue(f"<image_url: {image_url}>"))
390
+ print("\n")
391
+
392
+ print(_red("Response:"))
393
+ if isinstance(response, dict) and response.get("choices"):
394
+ message = response["choices"][0].get("message", {})
395
+ reasoning = message.get("reasoning_content")
396
+ parsed = message.get("parsed")
397
+ content = message.get("content")
398
+ if reasoning:
399
+ print(_yellow("<think>"))
400
+ print(reasoning.strip())
401
+ print(_yellow("</think>\n"))
402
+ if parsed:
403
+ print(
404
+ json.dumps(
405
+ (
406
+ parsed.model_dump()
407
+ if hasattr(parsed, "model_dump")
408
+ else parsed
409
+ ),
410
+ indent=2,
411
+ )
412
+ + "\n"
413
+ )
414
+ elif content:
415
+ print(content.strip())
416
+ else:
417
+ print(_green("[No content]"))
418
+ if len(response["choices"]) > 1:
419
+ print(
420
+ _blue(f"\n(Plus {len(response['choices']) - 1} other completions)")
421
+ )
422
+ else:
423
+ print(_yellow("Warning: Not a standard OpenAI response object"))
424
+ if isinstance(response, str):
425
+ print(_green(response.strip()))
426
+ elif isinstance(response, dict):
427
+ print(_green(json.dumps(response, indent=2)))
428
+ else:
429
+ print(_green(str(response)))
430
+
431
+ # ------------------------------------------------------------------ #
432
+ # Misc helpers
433
+ # ------------------------------------------------------------------ #
434
+ def set_model(self, model: str) -> None:
435
+ self.model = model
436
+
437
+ @staticmethod
438
+ async def list_models(port=None, host="localhost") -> List[str]:
439
+ try:
440
+ client: AsyncOpenAI = AsyncLM(port=port, host=host).client # type: ignore[arg-type]
441
+ base_url: URL = client.base_url
442
+ logger.debug(f"Base URL: {base_url}")
443
+ models: AsyncSyncPage[Model] = await client.models.list() # type: ignore[assignment]
444
+ return [model.id for model in models.data]
445
+ except Exception as exc:
446
+ logger.error(f"Failed to list models: {exc}")
447
+ return []