speedy-utils 1.1.10__tar.gz → 1.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/PKG-INFO +1 -1
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/pyproject.toml +1 -1
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/__init__.py +2 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_lm.py +26 -54
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_lm_base.py +5 -173
- speedy_utils-1.1.12/src/llm_utils/lm/openai_memoize.py +72 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/scripts/vllm_serve.py +2 -1
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_cache.py +23 -7
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_io.py +14 -2
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/README.md +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/__init__.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/display.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/transform.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/utils.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/group_messages.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/__init__.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/__init__.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/_utils.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/utils.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/scripts/README.md +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/__init__.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/all.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/__init__.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/clock.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/function_decorator.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/logger.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/notebook_utils.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/report_manager.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_misc.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_print.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/multi_worker/__init__.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/multi_worker/process.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/multi_worker/thread.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/scripts/__init__.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/scripts/mpython.py +0 -0
- {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
|
@@ -9,7 +9,7 @@ from typing import (
|
|
|
9
9
|
)
|
|
10
10
|
|
|
11
11
|
from loguru import logger
|
|
12
|
-
from openai import AuthenticationError, BadRequestError, RateLimitError
|
|
12
|
+
from openai import AuthenticationError, BadRequestError, OpenAI, RateLimitError
|
|
13
13
|
from pydantic import BaseModel
|
|
14
14
|
from speedy_utils import jloads
|
|
15
15
|
|
|
@@ -43,8 +43,8 @@ class AsyncLM(AsyncLMBase):
|
|
|
43
43
|
|
|
44
44
|
def __init__(
|
|
45
45
|
self,
|
|
46
|
-
model: str,
|
|
47
46
|
*,
|
|
47
|
+
model: Optional[str] = None,
|
|
48
48
|
response_model: Optional[type[BaseModel]] = None,
|
|
49
49
|
temperature: float = 0.0,
|
|
50
50
|
max_tokens: int = 2_000,
|
|
@@ -63,6 +63,13 @@ class AsyncLM(AsyncLMBase):
|
|
|
63
63
|
repetition_penalty: float = 1.0,
|
|
64
64
|
frequency_penalty: Optional[float] = None,
|
|
65
65
|
) -> None:
|
|
66
|
+
|
|
67
|
+
if model is None:
|
|
68
|
+
models = OpenAI(base_url=f'http://{host}:{port}/v1', api_key='abc').models.list().data
|
|
69
|
+
assert len(models) == 1, f"Found {len(models)} models, please specify one."
|
|
70
|
+
model = models[0].id
|
|
71
|
+
print(f"Using model: {model}")
|
|
72
|
+
|
|
66
73
|
super().__init__(
|
|
67
74
|
host=host,
|
|
68
75
|
port=port,
|
|
@@ -98,69 +105,35 @@ class AsyncLM(AsyncLMBase):
|
|
|
98
105
|
self,
|
|
99
106
|
messages: RawMsgs,
|
|
100
107
|
extra_body: Optional[dict] = None,
|
|
101
|
-
|
|
108
|
+
max_tokens: Optional[int] = None,
|
|
102
109
|
) -> dict:
|
|
103
|
-
"""Unified method for all client interactions
|
|
110
|
+
"""Unified method for all client interactions (caching handled by MAsyncOpenAI)."""
|
|
104
111
|
converted_messages: Messages = (
|
|
105
112
|
self._convert_messages(cast(LegacyMsgs, messages))
|
|
106
113
|
if messages and isinstance(messages[0], dict)
|
|
107
114
|
else cast(Messages, messages)
|
|
108
115
|
)
|
|
109
|
-
|
|
110
|
-
|
|
116
|
+
# override max_tokens if provided
|
|
117
|
+
if max_tokens is not None:
|
|
118
|
+
self.model_kwargs["max_tokens"] = max_tokens
|
|
111
119
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
120
|
+
try:
|
|
121
|
+
# Get completion from API (caching handled by MAsyncOpenAI)
|
|
122
|
+
call_kwargs = {
|
|
115
123
|
"messages": converted_messages,
|
|
116
|
-
|
|
117
|
-
"extra_body": extra_body or {},
|
|
118
|
-
"cache_suffix": cache_suffix,
|
|
124
|
+
**self.model_kwargs,
|
|
119
125
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
# Check for cached error responses
|
|
124
|
-
if (
|
|
125
|
-
completion
|
|
126
|
-
and isinstance(completion, dict)
|
|
127
|
-
and "error" in completion
|
|
128
|
-
and completion["error"]
|
|
129
|
-
):
|
|
130
|
-
error_type = completion.get("error_type", "Unknown")
|
|
131
|
-
error_message = completion.get("error_message", "Cached error")
|
|
132
|
-
logger.warning(f"Found cached error ({error_type}): {error_message}")
|
|
133
|
-
raise ValueError(f"Cached {error_type}: {error_message}")
|
|
126
|
+
if extra_body:
|
|
127
|
+
call_kwargs["extra_body"] = extra_body
|
|
134
128
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if
|
|
138
|
-
|
|
139
|
-
"messages": converted_messages,
|
|
140
|
-
**self.model_kwargs,
|
|
141
|
-
}
|
|
142
|
-
if extra_body:
|
|
143
|
-
call_kwargs["extra_body"] = extra_body
|
|
144
|
-
|
|
145
|
-
completion = await self.client.chat.completions.create(**call_kwargs)
|
|
146
|
-
|
|
147
|
-
if hasattr(completion, "model_dump"):
|
|
148
|
-
completion = completion.model_dump()
|
|
149
|
-
if cache_key:
|
|
150
|
-
self._dump_cache(cache_key, completion)
|
|
129
|
+
completion = await self.client.chat.completions.create(**call_kwargs)
|
|
130
|
+
|
|
131
|
+
if hasattr(completion, "model_dump"):
|
|
132
|
+
completion = completion.model_dump()
|
|
151
133
|
|
|
152
134
|
except (AuthenticationError, RateLimitError, BadRequestError) as exc:
|
|
153
135
|
error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
|
|
154
136
|
logger.error(error_msg)
|
|
155
|
-
if isinstance(exc, BadRequestError) and cache_key:
|
|
156
|
-
error_response = {
|
|
157
|
-
"error": True,
|
|
158
|
-
"error_type": "BadRequestError",
|
|
159
|
-
"error_message": str(exc),
|
|
160
|
-
"choices": [],
|
|
161
|
-
}
|
|
162
|
-
self._dump_cache(cache_key, error_response)
|
|
163
|
-
logger.debug(f"Cached BadRequestError for key: {cache_key}")
|
|
164
137
|
raise
|
|
165
138
|
|
|
166
139
|
return completion
|
|
@@ -183,7 +156,6 @@ class AsyncLM(AsyncLMBase):
|
|
|
183
156
|
completion = await self._unified_client_call(
|
|
184
157
|
messages,
|
|
185
158
|
extra_body={**self.extra_body},
|
|
186
|
-
cache_suffix=f"_parse_{response_model.__name__}",
|
|
187
159
|
)
|
|
188
160
|
|
|
189
161
|
# Parse the response
|
|
@@ -238,7 +210,6 @@ class AsyncLM(AsyncLMBase):
|
|
|
238
210
|
completion = await self._unified_client_call(
|
|
239
211
|
messages,
|
|
240
212
|
extra_body={"guided_json": json_schema, **self.extra_body},
|
|
241
|
-
cache_suffix=f"_beta_parse_{response_model.__name__}",
|
|
242
213
|
)
|
|
243
214
|
|
|
244
215
|
# Parse the response
|
|
@@ -281,6 +252,7 @@ class AsyncLM(AsyncLMBase):
|
|
|
281
252
|
self,
|
|
282
253
|
prompt: Optional[str] = None,
|
|
283
254
|
messages: Optional[RawMsgs] = None,
|
|
255
|
+
max_tokens: Optional[int] = None,
|
|
284
256
|
): # -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:# -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:
|
|
285
257
|
"""Unified async call for language model, returns (assistant_message.model_dump(), messages)."""
|
|
286
258
|
if (prompt is None) == (messages is None):
|
|
@@ -303,7 +275,7 @@ class AsyncLM(AsyncLMBase):
|
|
|
303
275
|
|
|
304
276
|
# Use unified client call
|
|
305
277
|
raw_response = await self._unified_client_call(
|
|
306
|
-
list(openai_msgs),
|
|
278
|
+
list(openai_msgs), max_tokens=max_tokens
|
|
307
279
|
)
|
|
308
280
|
|
|
309
281
|
if hasattr(raw_response, "model_dump"):
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
# from ._utils import *
|
|
2
|
-
import base64
|
|
3
|
-
import hashlib
|
|
4
2
|
import json
|
|
5
3
|
import os
|
|
6
4
|
from typing import (
|
|
@@ -26,6 +24,8 @@ from openai.types.chat import (
|
|
|
26
24
|
from openai.types.model import Model
|
|
27
25
|
from pydantic import BaseModel
|
|
28
26
|
|
|
27
|
+
from llm_utils.lm.openai_memoize import MAsyncOpenAI
|
|
28
|
+
|
|
29
29
|
from ._utils import (
|
|
30
30
|
LegacyMsgs,
|
|
31
31
|
Messages,
|
|
@@ -56,7 +56,7 @@ class AsyncLMBase:
|
|
|
56
56
|
self._init_port = port # <-- store the port provided at init
|
|
57
57
|
|
|
58
58
|
@property
|
|
59
|
-
def client(self) ->
|
|
59
|
+
def client(self) -> MAsyncOpenAI:
|
|
60
60
|
# if have multiple ports
|
|
61
61
|
if self.ports:
|
|
62
62
|
import random
|
|
@@ -66,9 +66,10 @@ class AsyncLMBase:
|
|
|
66
66
|
logger.debug(f"Using port: {port}")
|
|
67
67
|
else:
|
|
68
68
|
api_base = self.base_url or f"http://{self._host}:{self._port}/v1"
|
|
69
|
-
client =
|
|
69
|
+
client = MAsyncOpenAI(
|
|
70
70
|
api_key=self.api_key,
|
|
71
71
|
base_url=api_base,
|
|
72
|
+
cache=self._cache,
|
|
72
73
|
)
|
|
73
74
|
self._last_client = client
|
|
74
75
|
return client
|
|
@@ -176,175 +177,6 @@ class AsyncLMBase:
|
|
|
176
177
|
f"Model did not return valid JSON:\n---\n{raw_response}"
|
|
177
178
|
) from exc
|
|
178
179
|
|
|
179
|
-
# ------------------------------------------------------------------ #
|
|
180
|
-
# Simple disk cache (sync)
|
|
181
|
-
# ------------------------------------------------------------------ #
|
|
182
|
-
@staticmethod
|
|
183
|
-
def _cache_key(
|
|
184
|
-
messages: Any, kw: Any, response_format: Union[type[str], Type[BaseModel]]
|
|
185
|
-
) -> str:
|
|
186
|
-
tag = response_format.__name__ if response_format is not str else "text"
|
|
187
|
-
blob = json.dumps([messages, kw, tag], sort_keys=True).encode()
|
|
188
|
-
return base64.urlsafe_b64encode(hashlib.sha256(blob).digest()).decode()[:22]
|
|
189
|
-
|
|
190
|
-
@staticmethod
|
|
191
|
-
def _cache_path(key: str) -> str:
|
|
192
|
-
return os.path.expanduser(f"~/.cache/lm/{key}.json")
|
|
193
|
-
|
|
194
|
-
def _dump_cache(self, key: str, val: Any) -> None:
|
|
195
|
-
try:
|
|
196
|
-
path = self._cache_path(key)
|
|
197
|
-
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
198
|
-
with open(path, "w") as fh:
|
|
199
|
-
if isinstance(val, BaseModel):
|
|
200
|
-
json.dump(val.model_dump(mode="json"), fh)
|
|
201
|
-
else:
|
|
202
|
-
json.dump(val, fh)
|
|
203
|
-
except Exception as exc:
|
|
204
|
-
logger.debug(f"cache write skipped: {exc}")
|
|
205
|
-
|
|
206
|
-
def _load_cache(self, key: str) -> Any | None:
|
|
207
|
-
path = self._cache_path(key)
|
|
208
|
-
if not os.path.exists(path):
|
|
209
|
-
return None
|
|
210
|
-
try:
|
|
211
|
-
with open(path) as fh:
|
|
212
|
-
return json.load(fh)
|
|
213
|
-
except Exception:
|
|
214
|
-
return None
|
|
215
|
-
|
|
216
|
-
# async def inspect_word_probs(
|
|
217
|
-
# self,
|
|
218
|
-
# messages: Optional[List[Dict[str, Any]]] = None,
|
|
219
|
-
# tokenizer: Optional[Any] = None,
|
|
220
|
-
# do_print=True,
|
|
221
|
-
# add_think: bool = True,
|
|
222
|
-
# ) -> tuple[List[Dict[str, Any]], Any, str]:
|
|
223
|
-
# """
|
|
224
|
-
# Inspect word probabilities in a language model response.
|
|
225
|
-
|
|
226
|
-
# Args:
|
|
227
|
-
# tokenizer: Tokenizer instance to encode words.
|
|
228
|
-
# messages: List of messages to analyze.
|
|
229
|
-
|
|
230
|
-
# Returns:
|
|
231
|
-
# A tuple containing:
|
|
232
|
-
# - List of word probabilities with their log probabilities.
|
|
233
|
-
# - Token log probability dictionaries.
|
|
234
|
-
# - Rendered string with colored word probabilities.
|
|
235
|
-
# """
|
|
236
|
-
# if messages is None:
|
|
237
|
-
# messages = await self.last_messages(add_think=add_think)
|
|
238
|
-
# if messages is None:
|
|
239
|
-
# raise ValueError("No messages provided and no last messages available.")
|
|
240
|
-
|
|
241
|
-
# if tokenizer is None:
|
|
242
|
-
# tokenizer = get_tokenizer(self.model)
|
|
243
|
-
|
|
244
|
-
# ret = await inspect_word_probs_async(self, tokenizer, messages)
|
|
245
|
-
# if do_print:
|
|
246
|
-
# print(ret[-1])
|
|
247
|
-
# return ret
|
|
248
|
-
|
|
249
|
-
# async def last_messages(
|
|
250
|
-
# self, add_think: bool = True
|
|
251
|
-
# ) -> Optional[List[Dict[str, str]]]:
|
|
252
|
-
# """Get the last conversation messages including assistant response."""
|
|
253
|
-
# if not hasattr(self, "last_log"):
|
|
254
|
-
# return None
|
|
255
|
-
|
|
256
|
-
# last_conv = self._last_log
|
|
257
|
-
# messages = last_conv[1] if len(last_conv) > 1 else None
|
|
258
|
-
# last_msg = last_conv[2]
|
|
259
|
-
# if not isinstance(last_msg, dict):
|
|
260
|
-
# last_conv[2] = last_conv[2].model_dump() # type: ignore
|
|
261
|
-
# msg = last_conv[2]
|
|
262
|
-
# # Ensure msg is a dict
|
|
263
|
-
# if hasattr(msg, "model_dump"):
|
|
264
|
-
# msg = msg.model_dump()
|
|
265
|
-
# message = msg["choices"][0]["message"]
|
|
266
|
-
# reasoning = message.get("reasoning_content")
|
|
267
|
-
# answer = message.get("content")
|
|
268
|
-
# if reasoning and add_think:
|
|
269
|
-
# final_answer = f"<think>{reasoning}</think>\n{answer}"
|
|
270
|
-
# else:
|
|
271
|
-
# final_answer = f"<think>\n\n</think>\n{answer}"
|
|
272
|
-
# assistant = {"role": "assistant", "content": final_answer}
|
|
273
|
-
# messages = messages + [assistant] # type: ignore
|
|
274
|
-
# return messages if messages else None
|
|
275
|
-
|
|
276
|
-
# async def inspect_history(self) -> None:
|
|
277
|
-
# """Inspect the conversation history with proper formatting."""
|
|
278
|
-
# if not hasattr(self, "last_log"):
|
|
279
|
-
# raise ValueError("No history available. Please call the model first.")
|
|
280
|
-
|
|
281
|
-
# prompt, messages, response = self._last_log
|
|
282
|
-
# if hasattr(response, "model_dump"):
|
|
283
|
-
# response = response.model_dump()
|
|
284
|
-
# if not messages:
|
|
285
|
-
# messages = [{"role": "user", "content": prompt}]
|
|
286
|
-
|
|
287
|
-
# print("\n\n")
|
|
288
|
-
# print(_blue("[Conversation History]") + "\n")
|
|
289
|
-
|
|
290
|
-
# for msg in messages:
|
|
291
|
-
# role = msg["role"]
|
|
292
|
-
# content = msg["content"]
|
|
293
|
-
# print(_red(f"{role.capitalize()}:"))
|
|
294
|
-
# if isinstance(content, str):
|
|
295
|
-
# print(content.strip())
|
|
296
|
-
# elif isinstance(content, list):
|
|
297
|
-
# for item in content:
|
|
298
|
-
# if item.get("type") == "text":
|
|
299
|
-
# print(item["text"].strip())
|
|
300
|
-
# elif item.get("type") == "image_url":
|
|
301
|
-
# image_url = item["image_url"]["url"]
|
|
302
|
-
# if "base64" in image_url:
|
|
303
|
-
# len_base64 = len(image_url.split("base64,")[1])
|
|
304
|
-
# print(_blue(f"<IMAGE BASE64 ENCODED({len_base64})>"))
|
|
305
|
-
# else:
|
|
306
|
-
# print(_blue(f"<image_url: {image_url}>"))
|
|
307
|
-
# print("\n")
|
|
308
|
-
|
|
309
|
-
# print(_red("Response:"))
|
|
310
|
-
# if isinstance(response, dict) and response.get("choices"):
|
|
311
|
-
# message = response["choices"][0].get("message", {})
|
|
312
|
-
# reasoning = message.get("reasoning_content")
|
|
313
|
-
# parsed = message.get("parsed")
|
|
314
|
-
# content = message.get("content")
|
|
315
|
-
# if reasoning:
|
|
316
|
-
# print(_yellow("<think>"))
|
|
317
|
-
# print(reasoning.strip())
|
|
318
|
-
# print(_yellow("</think>\n"))
|
|
319
|
-
# if parsed:
|
|
320
|
-
# print(
|
|
321
|
-
# json.dumps(
|
|
322
|
-
# (
|
|
323
|
-
# parsed.model_dump()
|
|
324
|
-
# if hasattr(parsed, "model_dump")
|
|
325
|
-
# else parsed
|
|
326
|
-
# ),
|
|
327
|
-
# indent=2,
|
|
328
|
-
# )
|
|
329
|
-
# + "\n"
|
|
330
|
-
# )
|
|
331
|
-
# elif content:
|
|
332
|
-
# print(content.strip())
|
|
333
|
-
# else:
|
|
334
|
-
# print(_green("[No content]"))
|
|
335
|
-
# if len(response["choices"]) > 1:
|
|
336
|
-
# print(
|
|
337
|
-
# _blue(f"\n(Plus {len(response['choices']) - 1} other completions)")
|
|
338
|
-
# )
|
|
339
|
-
# else:
|
|
340
|
-
# print(_yellow("Warning: Not a standard OpenAI response object"))
|
|
341
|
-
# if isinstance(response, str):
|
|
342
|
-
# print(_green(response.strip()))
|
|
343
|
-
# elif isinstance(response, dict):
|
|
344
|
-
# print(_green(json.dumps(response, indent=2)))
|
|
345
|
-
# else:
|
|
346
|
-
# print(_green(str(response)))
|
|
347
|
-
|
|
348
180
|
# ------------------------------------------------------------------ #
|
|
349
181
|
# Misc helpers
|
|
350
182
|
# ------------------------------------------------------------------ #
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from openai import OpenAI, AsyncOpenAI
|
|
2
|
+
|
|
3
|
+
from speedy_utils.common.utils_cache import memoize
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MOpenAI(OpenAI):
|
|
7
|
+
"""
|
|
8
|
+
MOpenAI(*args, **kwargs)
|
|
9
|
+
|
|
10
|
+
Subclass of OpenAI that transparently memoizes the instance's `post` method.
|
|
11
|
+
|
|
12
|
+
This class forwards all constructor arguments to the OpenAI base class and then
|
|
13
|
+
replaces the instance's `post` method with a memoized wrapper:
|
|
14
|
+
|
|
15
|
+
Behavior
|
|
16
|
+
- The memoized `post` caches responses based on the arguments with which it is
|
|
17
|
+
invoked, preventing repeated identical requests from invoking the underlying
|
|
18
|
+
OpenAI API repeatedly.
|
|
19
|
+
- Because `post` is replaced on the instance, the cache is by-default tied to
|
|
20
|
+
the MOpenAI instance (per-instance cache).
|
|
21
|
+
- Any initialization arguments are passed unchanged to OpenAI.__init__.
|
|
22
|
+
|
|
23
|
+
Notes and cautions
|
|
24
|
+
- The exact semantics of caching (cache key construction, expiry, max size,
|
|
25
|
+
persistence) depend on the implementation of `memoize`. Ensure that the
|
|
26
|
+
provided `memoize` supports the desired behavior (e.g., hashing of mutable
|
|
27
|
+
inputs, thread-safety, TTL, cache invalidation).
|
|
28
|
+
- If the original `post` method has important side effects or relies on
|
|
29
|
+
non-deterministic behavior, memoization may change program behavior.
|
|
30
|
+
- If you need a shared cache across instances, or more advanced cache controls,
|
|
31
|
+
modify `memoize` or wrap at a class/static level instead of assigning to the
|
|
32
|
+
bound method.
|
|
33
|
+
|
|
34
|
+
Example
|
|
35
|
+
m = MOpenAI(api_key="...", model="gpt-4")
|
|
36
|
+
r1 = m.post("Hello") # executes API call and caches result
|
|
37
|
+
r2 = m.post("Hello") # returns cached result (no API call)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, *args, cache=True, **kwargs):
|
|
41
|
+
super().__init__(*args, **kwargs)
|
|
42
|
+
if cache:
|
|
43
|
+
self.post = memoize(self.post)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class MAsyncOpenAI(AsyncOpenAI):
|
|
47
|
+
"""
|
|
48
|
+
MAsyncOpenAI(*args, **kwargs)
|
|
49
|
+
|
|
50
|
+
Async subclass of AsyncOpenAI that transparently memoizes the instance's `post` method.
|
|
51
|
+
|
|
52
|
+
This class forwards all constructor arguments to the AsyncOpenAI base class and then
|
|
53
|
+
replaces the instance's `post` method with a memoized wrapper:
|
|
54
|
+
|
|
55
|
+
Behavior
|
|
56
|
+
- The memoized `post` caches responses based on the arguments with which it is
|
|
57
|
+
invoked, preventing repeated identical requests from invoking the underlying
|
|
58
|
+
OpenAI API repeatedly.
|
|
59
|
+
- Because `post` is replaced on the instance, the cache is by-default tied to
|
|
60
|
+
the MAsyncOpenAI instance (per-instance cache).
|
|
61
|
+
- Any initialization arguments are passed unchanged to AsyncOpenAI.__init__.
|
|
62
|
+
|
|
63
|
+
Example
|
|
64
|
+
m = MAsyncOpenAI(api_key="...", model="gpt-4")
|
|
65
|
+
r1 = await m.post("Hello") # executes API call and caches result
|
|
66
|
+
r2 = await m.post("Hello") # returns cached result (no API call)
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, *args, cache=True, **kwargs):
|
|
70
|
+
super().__init__(*args, **kwargs)
|
|
71
|
+
if cache:
|
|
72
|
+
self.post = memoize(self.post)
|
|
@@ -72,6 +72,7 @@ import openai
|
|
|
72
72
|
import requests
|
|
73
73
|
from loguru import logger
|
|
74
74
|
|
|
75
|
+
from llm_utils.lm.openai_memoize import MOpenAI
|
|
75
76
|
from speedy_utils.common.utils_io import load_by_ext
|
|
76
77
|
|
|
77
78
|
LORA_DIR: str = os.environ.get("LORA_DIR", "/loras")
|
|
@@ -82,7 +83,7 @@ logger.info(f"LORA_DIR: {LORA_DIR}")
|
|
|
82
83
|
|
|
83
84
|
def model_list(host_port: str, api_key: str = "abc") -> None:
|
|
84
85
|
"""List models from the vLLM server."""
|
|
85
|
-
client =
|
|
86
|
+
client = MOpenAI(base_url=f"http://{host_port}/v1", api_key=api_key)
|
|
86
87
|
models = client.models.list()
|
|
87
88
|
for model in models:
|
|
88
89
|
print(f"Model ID: {model.id}")
|
|
@@ -459,7 +459,12 @@ def both_memoize(
|
|
|
459
459
|
disk_result: Optional[R] = None
|
|
460
460
|
with disk_lock:
|
|
461
461
|
if osp.exists(cache_path):
|
|
462
|
-
|
|
462
|
+
try:
|
|
463
|
+
disk_result = load_json_or_pickle(cache_path)
|
|
464
|
+
except Exception:
|
|
465
|
+
if osp.exists(cache_path):
|
|
466
|
+
os.remove(cache_path)
|
|
467
|
+
disk_result = None
|
|
463
468
|
|
|
464
469
|
if disk_result is not None:
|
|
465
470
|
with mem_lock:
|
|
@@ -555,6 +560,7 @@ def _async_both_memoize(
|
|
|
555
560
|
# Public decorator (only export memoize)
|
|
556
561
|
# --------------------------------------------------------------------------------------
|
|
557
562
|
|
|
563
|
+
|
|
558
564
|
@overload
|
|
559
565
|
def memoize(
|
|
560
566
|
_func: Callable[P, R],
|
|
@@ -619,24 +625,34 @@ def memoize(
|
|
|
619
625
|
"""
|
|
620
626
|
if "~/" in cache_dir:
|
|
621
627
|
cache_dir = osp.expanduser(cache_dir)
|
|
628
|
+
from speedy_utils import timef
|
|
622
629
|
|
|
623
630
|
def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
|
|
624
631
|
is_async = inspect.iscoroutinefunction(func)
|
|
625
632
|
|
|
633
|
+
# Apply timing decorator if verbose=True
|
|
634
|
+
target_func = timef(func) if verbose else func
|
|
635
|
+
|
|
626
636
|
if cache_type == "memory":
|
|
627
637
|
if is_async:
|
|
628
|
-
return _async_memory_memoize(
|
|
629
|
-
return _memory_memoize(
|
|
638
|
+
return _async_memory_memoize(target_func, size, keys, ignore_self, key) # type: ignore[return-value]
|
|
639
|
+
return _memory_memoize(target_func, size, keys, ignore_self, key) # type: ignore[return-value]
|
|
630
640
|
|
|
631
641
|
if cache_type == "disk":
|
|
632
642
|
if is_async:
|
|
633
|
-
return _async_disk_memoize(
|
|
634
|
-
|
|
643
|
+
return _async_disk_memoize(
|
|
644
|
+
target_func, keys, cache_dir, ignore_self, verbose, key
|
|
645
|
+
) # type: ignore[return-value]
|
|
646
|
+
return _disk_memoize(
|
|
647
|
+
target_func, keys, cache_dir, ignore_self, verbose, key
|
|
648
|
+
) # type: ignore[return-value]
|
|
635
649
|
|
|
636
650
|
# cache_type == "both"
|
|
637
651
|
if is_async:
|
|
638
|
-
return _async_both_memoize(
|
|
639
|
-
|
|
652
|
+
return _async_both_memoize(
|
|
653
|
+
target_func, keys, cache_dir, ignore_self, size, key
|
|
654
|
+
) # type: ignore[return-value]
|
|
655
|
+
return both_memoize(target_func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
|
|
640
656
|
|
|
641
657
|
# Support both @memoize and @memoize(...)
|
|
642
658
|
if _func is None:
|
|
@@ -10,6 +10,7 @@ from pathlib import Path
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
12
|
from json_repair import loads as jloads
|
|
13
|
+
from pydantic import BaseModel
|
|
13
14
|
|
|
14
15
|
from .utils_misc import mkdir_or_exist
|
|
15
16
|
|
|
@@ -46,8 +47,19 @@ def dump_json_or_pickle(
|
|
|
46
47
|
elif fname.endswith(".jsonl"):
|
|
47
48
|
dump_jsonl(obj, fname)
|
|
48
49
|
elif fname.endswith(".pkl"):
|
|
49
|
-
|
|
50
|
-
|
|
50
|
+
try:
|
|
51
|
+
with open(fname, "wb") as f:
|
|
52
|
+
pickle.dump(obj, f)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
if isinstance(obj, BaseModel):
|
|
55
|
+
data = obj.model_dump()
|
|
56
|
+
from fastcore.all import obj2dict, dict2obj
|
|
57
|
+
obj2 = dict2obj(data)
|
|
58
|
+
with open(fname, "wb") as f:
|
|
59
|
+
pickle.dump(obj2, f)
|
|
60
|
+
else:
|
|
61
|
+
raise ValueError(f"Error {e} while dumping {fname}") from e
|
|
62
|
+
|
|
51
63
|
else:
|
|
52
64
|
raise NotImplementedError(f"File type {fname} not supported")
|
|
53
65
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/scripts/openapi_client_codegen.py
RENAMED
|
File without changes
|