speedy-utils 1.1.10__tar.gz → 1.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/PKG-INFO +1 -1
  2. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/pyproject.toml +1 -1
  3. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/__init__.py +2 -0
  4. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_lm.py +26 -54
  5. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_lm_base.py +5 -173
  6. speedy_utils-1.1.12/src/llm_utils/lm/openai_memoize.py +72 -0
  7. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/scripts/vllm_serve.py +2 -1
  8. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_cache.py +23 -7
  9. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_io.py +14 -2
  10. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/README.md +0 -0
  11. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/__init__.py +0 -0
  12. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/display.py +0 -0
  13. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/transform.py +0 -0
  14. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/chat_format/utils.py +0 -0
  15. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/group_messages.py +0 -0
  16. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/__init__.py +0 -0
  17. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/__init__.py +0 -0
  18. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/_utils.py +0 -0
  19. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/async_llm_task.py +0 -0
  20. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
  21. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/lm/utils.py +0 -0
  22. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/scripts/README.md +0 -0
  23. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
  24. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/__init__.py +0 -0
  25. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/all.py +0 -0
  26. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/__init__.py +0 -0
  27. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/clock.py +0 -0
  28. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/function_decorator.py +0 -0
  29. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/logger.py +0 -0
  30. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/notebook_utils.py +0 -0
  31. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/report_manager.py +0 -0
  32. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_misc.py +0 -0
  33. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/common/utils_print.py +0 -0
  34. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/multi_worker/__init__.py +0 -0
  35. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/multi_worker/process.py +0 -0
  36. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/multi_worker/thread.py +0 -0
  37. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/scripts/__init__.py +0 -0
  38. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/scripts/mpython.py +0 -0
  39. {speedy_utils-1.1.10 → speedy_utils-1.1.12}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.1.10
3
+ Version: 1.1.12
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "speedy-utils"
3
- version = "1.1.10"
3
+ version = "1.1.12"
4
4
  description = "Fast and easy-to-use package for data science"
5
5
  authors = ["AnhVTH <anhvth.226@gmail.com>"]
6
6
  readme = "README.md"
@@ -1,3 +1,4 @@
1
+ from llm_utils.lm.openai_memoize import MOpenAI
1
2
  from .chat_format import (
2
3
  build_chatml_input,
3
4
  display_chat_messages_as_html,
@@ -23,4 +24,5 @@ __all__ = [
23
24
  "display_chat_messages_as_html",
24
25
  "AsyncLM",
25
26
  "AsyncLLMTask",
27
+ "MOpenAI"
26
28
  ]
@@ -9,7 +9,7 @@ from typing import (
9
9
  )
10
10
 
11
11
  from loguru import logger
12
- from openai import AuthenticationError, BadRequestError, RateLimitError
12
+ from openai import AuthenticationError, BadRequestError, OpenAI, RateLimitError
13
13
  from pydantic import BaseModel
14
14
  from speedy_utils import jloads
15
15
 
@@ -43,8 +43,8 @@ class AsyncLM(AsyncLMBase):
43
43
 
44
44
  def __init__(
45
45
  self,
46
- model: str,
47
46
  *,
47
+ model: Optional[str] = None,
48
48
  response_model: Optional[type[BaseModel]] = None,
49
49
  temperature: float = 0.0,
50
50
  max_tokens: int = 2_000,
@@ -63,6 +63,13 @@ class AsyncLM(AsyncLMBase):
63
63
  repetition_penalty: float = 1.0,
64
64
  frequency_penalty: Optional[float] = None,
65
65
  ) -> None:
66
+
67
+ if model is None:
68
+ models = OpenAI(base_url=f'http://{host}:{port}/v1', api_key='abc').models.list().data
69
+ assert len(models) == 1, f"Found {len(models)} models, please specify one."
70
+ model = models[0].id
71
+ print(f"Using model: {model}")
72
+
66
73
  super().__init__(
67
74
  host=host,
68
75
  port=port,
@@ -98,69 +105,35 @@ class AsyncLM(AsyncLMBase):
98
105
  self,
99
106
  messages: RawMsgs,
100
107
  extra_body: Optional[dict] = None,
101
- cache_suffix: str = "",
108
+ max_tokens: Optional[int] = None,
102
109
  ) -> dict:
103
- """Unified method for all client interactions with caching and error handling."""
110
+ """Unified method for all client interactions (caching handled by MAsyncOpenAI)."""
104
111
  converted_messages: Messages = (
105
112
  self._convert_messages(cast(LegacyMsgs, messages))
106
113
  if messages and isinstance(messages[0], dict)
107
114
  else cast(Messages, messages)
108
115
  )
109
- cache_key = None
110
- completion = None
116
+ # override max_tokens if provided
117
+ if max_tokens is not None:
118
+ self.model_kwargs["max_tokens"] = max_tokens
111
119
 
112
- # Handle caching
113
- if self._cache:
114
- cache_data = {
120
+ try:
121
+ # Get completion from API (caching handled by MAsyncOpenAI)
122
+ call_kwargs = {
115
123
  "messages": converted_messages,
116
- "model_kwargs": self.model_kwargs,
117
- "extra_body": extra_body or {},
118
- "cache_suffix": cache_suffix,
124
+ **self.model_kwargs,
119
125
  }
120
- cache_key = self._cache_key(cache_data, {}, str)
121
- completion = self._load_cache(cache_key)
122
-
123
- # Check for cached error responses
124
- if (
125
- completion
126
- and isinstance(completion, dict)
127
- and "error" in completion
128
- and completion["error"]
129
- ):
130
- error_type = completion.get("error_type", "Unknown")
131
- error_message = completion.get("error_message", "Cached error")
132
- logger.warning(f"Found cached error ({error_type}): {error_message}")
133
- raise ValueError(f"Cached {error_type}: {error_message}")
126
+ if extra_body:
127
+ call_kwargs["extra_body"] = extra_body
134
128
 
135
- try:
136
- # Get completion from API if not cached
137
- if not completion:
138
- call_kwargs = {
139
- "messages": converted_messages,
140
- **self.model_kwargs,
141
- }
142
- if extra_body:
143
- call_kwargs["extra_body"] = extra_body
144
-
145
- completion = await self.client.chat.completions.create(**call_kwargs)
146
-
147
- if hasattr(completion, "model_dump"):
148
- completion = completion.model_dump()
149
- if cache_key:
150
- self._dump_cache(cache_key, completion)
129
+ completion = await self.client.chat.completions.create(**call_kwargs)
130
+
131
+ if hasattr(completion, "model_dump"):
132
+ completion = completion.model_dump()
151
133
 
152
134
  except (AuthenticationError, RateLimitError, BadRequestError) as exc:
153
135
  error_msg = f"OpenAI API error ({type(exc).__name__}): {exc}"
154
136
  logger.error(error_msg)
155
- if isinstance(exc, BadRequestError) and cache_key:
156
- error_response = {
157
- "error": True,
158
- "error_type": "BadRequestError",
159
- "error_message": str(exc),
160
- "choices": [],
161
- }
162
- self._dump_cache(cache_key, error_response)
163
- logger.debug(f"Cached BadRequestError for key: {cache_key}")
164
137
  raise
165
138
 
166
139
  return completion
@@ -183,7 +156,6 @@ class AsyncLM(AsyncLMBase):
183
156
  completion = await self._unified_client_call(
184
157
  messages,
185
158
  extra_body={**self.extra_body},
186
- cache_suffix=f"_parse_{response_model.__name__}",
187
159
  )
188
160
 
189
161
  # Parse the response
@@ -238,7 +210,6 @@ class AsyncLM(AsyncLMBase):
238
210
  completion = await self._unified_client_call(
239
211
  messages,
240
212
  extra_body={"guided_json": json_schema, **self.extra_body},
241
- cache_suffix=f"_beta_parse_{response_model.__name__}",
242
213
  )
243
214
 
244
215
  # Parse the response
@@ -281,6 +252,7 @@ class AsyncLM(AsyncLMBase):
281
252
  self,
282
253
  prompt: Optional[str] = None,
283
254
  messages: Optional[RawMsgs] = None,
255
+ max_tokens: Optional[int] = None,
284
256
  ): # -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:# -> tuple[Any | dict[Any, Any], list[ChatCompletionMessagePar...:
285
257
  """Unified async call for language model, returns (assistant_message.model_dump(), messages)."""
286
258
  if (prompt is None) == (messages is None):
@@ -303,7 +275,7 @@ class AsyncLM(AsyncLMBase):
303
275
 
304
276
  # Use unified client call
305
277
  raw_response = await self._unified_client_call(
306
- list(openai_msgs), cache_suffix="_call"
278
+ list(openai_msgs), max_tokens=max_tokens
307
279
  )
308
280
 
309
281
  if hasattr(raw_response, "model_dump"):
@@ -1,6 +1,4 @@
1
1
  # from ._utils import *
2
- import base64
3
- import hashlib
4
2
  import json
5
3
  import os
6
4
  from typing import (
@@ -26,6 +24,8 @@ from openai.types.chat import (
26
24
  from openai.types.model import Model
27
25
  from pydantic import BaseModel
28
26
 
27
+ from llm_utils.lm.openai_memoize import MAsyncOpenAI
28
+
29
29
  from ._utils import (
30
30
  LegacyMsgs,
31
31
  Messages,
@@ -56,7 +56,7 @@ class AsyncLMBase:
56
56
  self._init_port = port # <-- store the port provided at init
57
57
 
58
58
  @property
59
- def client(self) -> AsyncOpenAI:
59
+ def client(self) -> MAsyncOpenAI:
60
60
  # if have multiple ports
61
61
  if self.ports:
62
62
  import random
@@ -66,9 +66,10 @@ class AsyncLMBase:
66
66
  logger.debug(f"Using port: {port}")
67
67
  else:
68
68
  api_base = self.base_url or f"http://{self._host}:{self._port}/v1"
69
- client = AsyncOpenAI(
69
+ client = MAsyncOpenAI(
70
70
  api_key=self.api_key,
71
71
  base_url=api_base,
72
+ cache=self._cache,
72
73
  )
73
74
  self._last_client = client
74
75
  return client
@@ -176,175 +177,6 @@ class AsyncLMBase:
176
177
  f"Model did not return valid JSON:\n---\n{raw_response}"
177
178
  ) from exc
178
179
 
179
- # ------------------------------------------------------------------ #
180
- # Simple disk cache (sync)
181
- # ------------------------------------------------------------------ #
182
- @staticmethod
183
- def _cache_key(
184
- messages: Any, kw: Any, response_format: Union[type[str], Type[BaseModel]]
185
- ) -> str:
186
- tag = response_format.__name__ if response_format is not str else "text"
187
- blob = json.dumps([messages, kw, tag], sort_keys=True).encode()
188
- return base64.urlsafe_b64encode(hashlib.sha256(blob).digest()).decode()[:22]
189
-
190
- @staticmethod
191
- def _cache_path(key: str) -> str:
192
- return os.path.expanduser(f"~/.cache/lm/{key}.json")
193
-
194
- def _dump_cache(self, key: str, val: Any) -> None:
195
- try:
196
- path = self._cache_path(key)
197
- os.makedirs(os.path.dirname(path), exist_ok=True)
198
- with open(path, "w") as fh:
199
- if isinstance(val, BaseModel):
200
- json.dump(val.model_dump(mode="json"), fh)
201
- else:
202
- json.dump(val, fh)
203
- except Exception as exc:
204
- logger.debug(f"cache write skipped: {exc}")
205
-
206
- def _load_cache(self, key: str) -> Any | None:
207
- path = self._cache_path(key)
208
- if not os.path.exists(path):
209
- return None
210
- try:
211
- with open(path) as fh:
212
- return json.load(fh)
213
- except Exception:
214
- return None
215
-
216
- # async def inspect_word_probs(
217
- # self,
218
- # messages: Optional[List[Dict[str, Any]]] = None,
219
- # tokenizer: Optional[Any] = None,
220
- # do_print=True,
221
- # add_think: bool = True,
222
- # ) -> tuple[List[Dict[str, Any]], Any, str]:
223
- # """
224
- # Inspect word probabilities in a language model response.
225
-
226
- # Args:
227
- # tokenizer: Tokenizer instance to encode words.
228
- # messages: List of messages to analyze.
229
-
230
- # Returns:
231
- # A tuple containing:
232
- # - List of word probabilities with their log probabilities.
233
- # - Token log probability dictionaries.
234
- # - Rendered string with colored word probabilities.
235
- # """
236
- # if messages is None:
237
- # messages = await self.last_messages(add_think=add_think)
238
- # if messages is None:
239
- # raise ValueError("No messages provided and no last messages available.")
240
-
241
- # if tokenizer is None:
242
- # tokenizer = get_tokenizer(self.model)
243
-
244
- # ret = await inspect_word_probs_async(self, tokenizer, messages)
245
- # if do_print:
246
- # print(ret[-1])
247
- # return ret
248
-
249
- # async def last_messages(
250
- # self, add_think: bool = True
251
- # ) -> Optional[List[Dict[str, str]]]:
252
- # """Get the last conversation messages including assistant response."""
253
- # if not hasattr(self, "last_log"):
254
- # return None
255
-
256
- # last_conv = self._last_log
257
- # messages = last_conv[1] if len(last_conv) > 1 else None
258
- # last_msg = last_conv[2]
259
- # if not isinstance(last_msg, dict):
260
- # last_conv[2] = last_conv[2].model_dump() # type: ignore
261
- # msg = last_conv[2]
262
- # # Ensure msg is a dict
263
- # if hasattr(msg, "model_dump"):
264
- # msg = msg.model_dump()
265
- # message = msg["choices"][0]["message"]
266
- # reasoning = message.get("reasoning_content")
267
- # answer = message.get("content")
268
- # if reasoning and add_think:
269
- # final_answer = f"<think>{reasoning}</think>\n{answer}"
270
- # else:
271
- # final_answer = f"<think>\n\n</think>\n{answer}"
272
- # assistant = {"role": "assistant", "content": final_answer}
273
- # messages = messages + [assistant] # type: ignore
274
- # return messages if messages else None
275
-
276
- # async def inspect_history(self) -> None:
277
- # """Inspect the conversation history with proper formatting."""
278
- # if not hasattr(self, "last_log"):
279
- # raise ValueError("No history available. Please call the model first.")
280
-
281
- # prompt, messages, response = self._last_log
282
- # if hasattr(response, "model_dump"):
283
- # response = response.model_dump()
284
- # if not messages:
285
- # messages = [{"role": "user", "content": prompt}]
286
-
287
- # print("\n\n")
288
- # print(_blue("[Conversation History]") + "\n")
289
-
290
- # for msg in messages:
291
- # role = msg["role"]
292
- # content = msg["content"]
293
- # print(_red(f"{role.capitalize()}:"))
294
- # if isinstance(content, str):
295
- # print(content.strip())
296
- # elif isinstance(content, list):
297
- # for item in content:
298
- # if item.get("type") == "text":
299
- # print(item["text"].strip())
300
- # elif item.get("type") == "image_url":
301
- # image_url = item["image_url"]["url"]
302
- # if "base64" in image_url:
303
- # len_base64 = len(image_url.split("base64,")[1])
304
- # print(_blue(f"<IMAGE BASE64 ENCODED({len_base64})>"))
305
- # else:
306
- # print(_blue(f"<image_url: {image_url}>"))
307
- # print("\n")
308
-
309
- # print(_red("Response:"))
310
- # if isinstance(response, dict) and response.get("choices"):
311
- # message = response["choices"][0].get("message", {})
312
- # reasoning = message.get("reasoning_content")
313
- # parsed = message.get("parsed")
314
- # content = message.get("content")
315
- # if reasoning:
316
- # print(_yellow("<think>"))
317
- # print(reasoning.strip())
318
- # print(_yellow("</think>\n"))
319
- # if parsed:
320
- # print(
321
- # json.dumps(
322
- # (
323
- # parsed.model_dump()
324
- # if hasattr(parsed, "model_dump")
325
- # else parsed
326
- # ),
327
- # indent=2,
328
- # )
329
- # + "\n"
330
- # )
331
- # elif content:
332
- # print(content.strip())
333
- # else:
334
- # print(_green("[No content]"))
335
- # if len(response["choices"]) > 1:
336
- # print(
337
- # _blue(f"\n(Plus {len(response['choices']) - 1} other completions)")
338
- # )
339
- # else:
340
- # print(_yellow("Warning: Not a standard OpenAI response object"))
341
- # if isinstance(response, str):
342
- # print(_green(response.strip()))
343
- # elif isinstance(response, dict):
344
- # print(_green(json.dumps(response, indent=2)))
345
- # else:
346
- # print(_green(str(response)))
347
-
348
180
  # ------------------------------------------------------------------ #
349
181
  # Misc helpers
350
182
  # ------------------------------------------------------------------ #
@@ -0,0 +1,72 @@
1
+ from openai import OpenAI, AsyncOpenAI
2
+
3
+ from speedy_utils.common.utils_cache import memoize
4
+
5
+
6
+ class MOpenAI(OpenAI):
7
+ """
8
+ MOpenAI(*args, **kwargs)
9
+
10
+ Subclass of OpenAI that transparently memoizes the instance's `post` method.
11
+
12
+ This class forwards all constructor arguments to the OpenAI base class and then
13
+ replaces the instance's `post` method with a memoized wrapper:
14
+
15
+ Behavior
16
+ - The memoized `post` caches responses based on the arguments with which it is
17
+ invoked, preventing repeated identical requests from invoking the underlying
18
+ OpenAI API repeatedly.
19
+ - Because `post` is replaced on the instance, the cache is by-default tied to
20
+ the MOpenAI instance (per-instance cache).
21
+ - Any initialization arguments are passed unchanged to OpenAI.__init__.
22
+
23
+ Notes and cautions
24
+ - The exact semantics of caching (cache key construction, expiry, max size,
25
+ persistence) depend on the implementation of `memoize`. Ensure that the
26
+ provided `memoize` supports the desired behavior (e.g., hashing of mutable
27
+ inputs, thread-safety, TTL, cache invalidation).
28
+ - If the original `post` method has important side effects or relies on
29
+ non-deterministic behavior, memoization may change program behavior.
30
+ - If you need a shared cache across instances, or more advanced cache controls,
31
+ modify `memoize` or wrap at a class/static level instead of assigning to the
32
+ bound method.
33
+
34
+ Example
35
+ m = MOpenAI(api_key="...", model="gpt-4")
36
+ r1 = m.post("Hello") # executes API call and caches result
37
+ r2 = m.post("Hello") # returns cached result (no API call)
38
+ """
39
+
40
+ def __init__(self, *args, cache=True, **kwargs):
41
+ super().__init__(*args, **kwargs)
42
+ if cache:
43
+ self.post = memoize(self.post)
44
+
45
+
46
+ class MAsyncOpenAI(AsyncOpenAI):
47
+ """
48
+ MAsyncOpenAI(*args, **kwargs)
49
+
50
+ Async subclass of AsyncOpenAI that transparently memoizes the instance's `post` method.
51
+
52
+ This class forwards all constructor arguments to the AsyncOpenAI base class and then
53
+ replaces the instance's `post` method with a memoized wrapper:
54
+
55
+ Behavior
56
+ - The memoized `post` caches responses based on the arguments with which it is
57
+ invoked, preventing repeated identical requests from invoking the underlying
58
+ OpenAI API repeatedly.
59
+ - Because `post` is replaced on the instance, the cache is by-default tied to
60
+ the MAsyncOpenAI instance (per-instance cache).
61
+ - Any initialization arguments are passed unchanged to AsyncOpenAI.__init__.
62
+
63
+ Example
64
+ m = MAsyncOpenAI(api_key="...", model="gpt-4")
65
+ r1 = await m.post("Hello") # executes API call and caches result
66
+ r2 = await m.post("Hello") # returns cached result (no API call)
67
+ """
68
+
69
+ def __init__(self, *args, cache=True, **kwargs):
70
+ super().__init__(*args, **kwargs)
71
+ if cache:
72
+ self.post = memoize(self.post)
@@ -72,6 +72,7 @@ import openai
72
72
  import requests
73
73
  from loguru import logger
74
74
 
75
+ from llm_utils.lm.openai_memoize import MOpenAI
75
76
  from speedy_utils.common.utils_io import load_by_ext
76
77
 
77
78
  LORA_DIR: str = os.environ.get("LORA_DIR", "/loras")
@@ -82,7 +83,7 @@ logger.info(f"LORA_DIR: {LORA_DIR}")
82
83
 
83
84
  def model_list(host_port: str, api_key: str = "abc") -> None:
84
85
  """List models from the vLLM server."""
85
- client = openai.OpenAI(base_url=f"http://{host_port}/v1", api_key=api_key)
86
+ client = MOpenAI(base_url=f"http://{host_port}/v1", api_key=api_key)
86
87
  models = client.models.list()
87
88
  for model in models:
88
89
  print(f"Model ID: {model.id}")
@@ -459,7 +459,12 @@ def both_memoize(
459
459
  disk_result: Optional[R] = None
460
460
  with disk_lock:
461
461
  if osp.exists(cache_path):
462
- disk_result = load_json_or_pickle(cache_path)
462
+ try:
463
+ disk_result = load_json_or_pickle(cache_path)
464
+ except Exception:
465
+ if osp.exists(cache_path):
466
+ os.remove(cache_path)
467
+ disk_result = None
463
468
 
464
469
  if disk_result is not None:
465
470
  with mem_lock:
@@ -555,6 +560,7 @@ def _async_both_memoize(
555
560
  # Public decorator (only export memoize)
556
561
  # --------------------------------------------------------------------------------------
557
562
 
563
+
558
564
  @overload
559
565
  def memoize(
560
566
  _func: Callable[P, R],
@@ -619,24 +625,34 @@ def memoize(
619
625
  """
620
626
  if "~/" in cache_dir:
621
627
  cache_dir = osp.expanduser(cache_dir)
628
+ from speedy_utils import timef
622
629
 
623
630
  def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
624
631
  is_async = inspect.iscoroutinefunction(func)
625
632
 
633
+ # Apply timing decorator if verbose=True
634
+ target_func = timef(func) if verbose else func
635
+
626
636
  if cache_type == "memory":
627
637
  if is_async:
628
- return _async_memory_memoize(func, size, keys, ignore_self, key) # type: ignore[return-value]
629
- return _memory_memoize(func, size, keys, ignore_self, key) # type: ignore[return-value]
638
+ return _async_memory_memoize(target_func, size, keys, ignore_self, key) # type: ignore[return-value]
639
+ return _memory_memoize(target_func, size, keys, ignore_self, key) # type: ignore[return-value]
630
640
 
631
641
  if cache_type == "disk":
632
642
  if is_async:
633
- return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose, key) # type: ignore[return-value]
634
- return _disk_memoize(func, keys, cache_dir, ignore_self, verbose, key) # type: ignore[return-value]
643
+ return _async_disk_memoize(
644
+ target_func, keys, cache_dir, ignore_self, verbose, key
645
+ ) # type: ignore[return-value]
646
+ return _disk_memoize(
647
+ target_func, keys, cache_dir, ignore_self, verbose, key
648
+ ) # type: ignore[return-value]
635
649
 
636
650
  # cache_type == "both"
637
651
  if is_async:
638
- return _async_both_memoize(func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
639
- return both_memoize(func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
652
+ return _async_both_memoize(
653
+ target_func, keys, cache_dir, ignore_self, size, key
654
+ ) # type: ignore[return-value]
655
+ return both_memoize(target_func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
640
656
 
641
657
  # Support both @memoize and @memoize(...)
642
658
  if _func is None:
@@ -10,6 +10,7 @@ from pathlib import Path
10
10
  from typing import Any
11
11
 
12
12
  from json_repair import loads as jloads
13
+ from pydantic import BaseModel
13
14
 
14
15
  from .utils_misc import mkdir_or_exist
15
16
 
@@ -46,8 +47,19 @@ def dump_json_or_pickle(
46
47
  elif fname.endswith(".jsonl"):
47
48
  dump_jsonl(obj, fname)
48
49
  elif fname.endswith(".pkl"):
49
- with open(fname, "wb") as f:
50
- pickle.dump(obj, f)
50
+ try:
51
+ with open(fname, "wb") as f:
52
+ pickle.dump(obj, f)
53
+ except Exception as e:
54
+ if isinstance(obj, BaseModel):
55
+ data = obj.model_dump()
56
+ from fastcore.all import obj2dict, dict2obj
57
+ obj2 = dict2obj(data)
58
+ with open(fname, "wb") as f:
59
+ pickle.dump(obj2, f)
60
+ else:
61
+ raise ValueError(f"Error {e} while dumping {fname}") from e
62
+
51
63
  else:
52
64
  raise NotImplementedError(f"File type {fname} not supported")
53
65
 
File without changes