speedy-utils 1.1.9__tar.gz → 1.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/PKG-INFO +1 -1
  2. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/pyproject.toml +1 -1
  3. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/async_llm_task.py +5 -1
  4. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/async_lm.py +16 -2
  5. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/__init__.py +1 -3
  6. speedy_utils-1.1.10/src/speedy_utils/common/utils_cache.py +648 -0
  7. speedy_utils-1.1.9/src/speedy_utils/common/utils_cache.py +0 -494
  8. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/README.md +0 -0
  9. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/__init__.py +0 -0
  10. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/__init__.py +0 -0
  11. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/display.py +0 -0
  12. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/transform.py +0 -0
  13. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/utils.py +0 -0
  14. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/group_messages.py +0 -0
  15. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/__init__.py +0 -0
  16. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/__init__.py +0 -0
  17. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/_utils.py +0 -0
  18. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
  19. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
  20. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/utils.py +0 -0
  21. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/scripts/README.md +0 -0
  22. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
  23. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/scripts/vllm_serve.py +0 -0
  24. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/all.py +0 -0
  25. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/__init__.py +0 -0
  26. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/clock.py +0 -0
  27. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/function_decorator.py +0 -0
  28. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/logger.py +0 -0
  29. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/notebook_utils.py +0 -0
  30. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/report_manager.py +0 -0
  31. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/utils_io.py +0 -0
  32. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/utils_misc.py +0 -0
  33. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/utils_print.py +0 -0
  34. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/multi_worker/__init__.py +0 -0
  35. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/multi_worker/process.py +0 -0
  36. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/multi_worker/thread.py +0 -0
  37. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/scripts/__init__.py +0 -0
  38. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/scripts/mpython.py +0 -0
  39. {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: speedy-utils
3
- Version: 1.1.9
3
+ Version: 1.1.10
4
4
  Summary: Fast and easy-to-use package for data science
5
5
  Author: AnhVTH
6
6
  Author-email: anhvth.226@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "speedy-utils"
3
- version = "1.1.9"
3
+ version = "1.1.10"
4
4
  description = "Fast and easy-to-use package for data science"
5
5
  authors = ["AnhVTH <anhvth.226@gmail.com>"]
6
6
  readme = "README.md"
@@ -389,7 +389,7 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
389
389
  input_data: InputModelType,
390
390
  expected_response: Optional[OutputModelType] = None,
391
391
  label: Optional[str] = None,
392
- cache_dir: pathlib.Path = DEFAULT_CACHE_DIR,
392
+ cache_dir: Optional[pathlib.Path] = None,
393
393
  ) -> OutputModelType:
394
394
  """
395
395
  Generate training data for both thinking and non-thinking modes.
@@ -415,6 +415,10 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
415
415
  # Create non-thinking mode equivalent
416
416
  no_think_messages = self._create_no_think_messages(think_messages)
417
417
 
418
+ # Use default cache directory if none provided
419
+ if cache_dir is None:
420
+ cache_dir = self.DEFAULT_CACHE_DIR or pathlib.Path("./cache")
421
+
418
422
  # Save training data
419
423
  self._save_training_data(
420
424
  input_data=input_data,
@@ -96,12 +96,16 @@ class AsyncLM(AsyncLMBase):
96
96
 
97
97
  async def _unified_client_call(
98
98
  self,
99
- messages: list[dict],
99
+ messages: RawMsgs,
100
100
  extra_body: Optional[dict] = None,
101
101
  cache_suffix: str = "",
102
102
  ) -> dict:
103
103
  """Unified method for all client interactions with caching and error handling."""
104
- converted_messages = self._convert_messages(messages)
104
+ converted_messages: Messages = (
105
+ self._convert_messages(cast(LegacyMsgs, messages))
106
+ if messages and isinstance(messages[0], dict)
107
+ else cast(Messages, messages)
108
+ )
105
109
  cache_key = None
106
110
  completion = None
107
111
 
@@ -385,3 +389,13 @@ class AsyncLM(AsyncLMBase):
385
389
  raise ValueError(
386
390
  f"Failed to validate against response model {response_model.__name__}: {exc}\nRaw content: {content}"
387
391
  ) from exc
392
+
393
+ async def __aenter__(self):
394
+ return self
395
+
396
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
397
+ if hasattr(self, "_last_client"):
398
+ last_client = self._last_client # type: ignore
399
+ await last_client._client.aclose()
400
+ else:
401
+ logger.warning("No last client to close")
@@ -108,7 +108,7 @@ from .common.notebook_utils import (
108
108
  )
109
109
 
110
110
  # Cache utilities
111
- from .common.utils_cache import amemoize, identify, identify_uuid, memoize
111
+ from .common.utils_cache import identify, identify_uuid, memoize
112
112
 
113
113
  # IO utilities
114
114
  from .common.utils_io import (
@@ -197,7 +197,6 @@ __all__ = [
197
197
  # Function decorators
198
198
  "retry_runtime",
199
199
  # Cache utilities
200
- "amemoize",
201
200
  "memoize",
202
201
  "identify",
203
202
  "identify_uuid",
@@ -227,5 +226,4 @@ __all__ = [
227
226
  "multi_thread",
228
227
  # Notebook utilities
229
228
  "change_dir",
230
- "amemoize",
231
229
  ]
@@ -0,0 +1,648 @@
1
+ import asyncio
2
+ import functools
3
+ import inspect
4
+ import json
5
+ import os
6
+ import os.path as osp
7
+ import pickle
8
+ import uuid
9
+ import weakref
10
+ from threading import Lock
11
+ from typing import Any, Awaitable, Callable, Literal, Optional, TypeVar, overload
12
+
13
+ try:
14
+ # Python 3.10+
15
+ from typing import ParamSpec
16
+ except ImportError: # pragma: no cover
17
+ from typing_extensions import ParamSpec # type: ignore
18
+
19
+ import cachetools
20
+ import pandas as pd
21
+ import xxhash
22
+ from loguru import logger
23
+ from pydantic import BaseModel
24
+
25
+ from speedy_utils.common.utils_io import dump_json_or_pickle, load_json_or_pickle
26
+ from speedy_utils.common.utils_misc import mkdir_or_exist
27
+
28
+ # --------------------------------------------------------------------------------------
29
+ # Defaults / Globals
30
+ # --------------------------------------------------------------------------------------
31
+
32
+ SPEED_CACHE_DIR = osp.join(osp.expanduser("~"), ".cache/speedy_cache")
33
+
34
+ # Thread locks for safety
35
+ disk_lock = Lock()
36
+ mem_lock = Lock()
37
+
38
+ # Quick identifier cache for big objects that support weakref
39
+ # (prevents recomputing expensive keys for the same object instance)
40
+ _QUICK_ID_MAP: "weakref.WeakKeyDictionary[Any, str]" = weakref.WeakKeyDictionary()
41
+
42
+ # Per-function memory caches (so different functions can have different LRU sizes)
43
+ _MEM_CACHES: "weakref.WeakKeyDictionary[Callable[..., Any], cachetools.LRUCache]" = (
44
+ weakref.WeakKeyDictionary()
45
+ )
46
+
47
+ # Backward-compat global symbol (internal only; not exported)
48
+ LRU_MEM_CACHE = cachetools.LRUCache(maxsize=256)
49
+
50
+ # Typing helpers
51
+ P = ParamSpec("P")
52
+ R = TypeVar("R")
53
+ AsyncFunc = Callable[P, Awaitable[R]]
54
+
55
+ # --------------------------------------------------------------------------------------
56
+ # Utilities
57
+ # --------------------------------------------------------------------------------------
58
+
59
+
60
+ def fast_serialize(x: Any) -> bytes:
61
+ """Serialize x quickly; JSON if possible (stable), else pickle."""
62
+ try:
63
+ return json.dumps(x, sort_keys=True, default=str).encode("utf-8")
64
+ except (TypeError, ValueError):
65
+ return pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
66
+
67
+
68
+ def identify_uuid(x: Any) -> str:
69
+ data = fast_serialize(x)
70
+ hash_obj = xxhash.xxh128(data, seed=0)
71
+ return str(uuid.UUID(bytes=hash_obj.digest()))
72
+
73
+
74
+ def get_source(func: Callable[..., Any]) -> str:
75
+ """Minified function source; falls back to module + qualname for builtins/lambdas."""
76
+ try:
77
+ code = inspect.getsource(func)
78
+ except OSError:
79
+ # source not available (e.g., builtins, some C extensions)
80
+ mod = getattr(func, "__module__", "unknown")
81
+ qn = getattr(func, "__qualname__", getattr(func, "__name__", "unknown"))
82
+ code = f"{mod}.{qn}"
83
+ # normalize whitespace to make it stable
84
+ for r in (" ", "\n", "\t", "\r"):
85
+ code = code.replace(r, "")
86
+ return code
87
+
88
+
89
+ def _try_get_quick_id(obj: Any) -> Optional[str]:
90
+ """Return a quick identifier if obj is weakref-able and cached."""
91
+ try:
92
+ return _QUICK_ID_MAP.get(obj) # type: ignore[arg-type]
93
+ except TypeError:
94
+ # not weakref-able (e.g., list/dict); cannot use WeakKeyDictionary
95
+ return None
96
+
97
+
98
+ def _try_store_quick_id(obj: Any, ident: str) -> None:
99
+ """Store quick identifier if obj is weakref-able."""
100
+ try:
101
+ _QUICK_ID_MAP[obj] = ident # type: ignore[index]
102
+ except TypeError:
103
+ # not weakref-able
104
+ pass
105
+
106
+
107
+ def identify(obj: Any, depth: int = 0, max_depth: int = 2) -> str:
108
+ """
109
+ Produce a stable, content-based identifier string for arbitrary Python objects.
110
+ Includes a quick path using a weakref cache for large, user-defined objects.
111
+ """
112
+ # Quick-path for user-defined objects (weakref-able)
113
+ if depth == 0:
114
+ quick = _try_get_quick_id(obj)
115
+ if quick is not None:
116
+ return quick
117
+
118
+ if isinstance(obj, (list, tuple)):
119
+ x = [identify(x, depth + 1, max_depth) for x in obj]
120
+ x = "\n".join(x)
121
+ out = identify(x, depth + 1, max_depth)
122
+ if depth == 0:
123
+ _try_store_quick_id(obj, out)
124
+ return out
125
+ elif isinstance(obj, (pd.DataFrame, pd.Series)):
126
+ x = str(obj.to_dict())
127
+ out = identify(x, depth + 1, max_depth)
128
+ if depth == 0:
129
+ _try_store_quick_id(obj, out)
130
+ return out
131
+ elif hasattr(obj, "__code__"):
132
+ out = identify(get_source(obj), depth + 1, max_depth)
133
+ if depth == 0:
134
+ _try_store_quick_id(obj, out)
135
+ return out
136
+ elif isinstance(obj, BaseModel):
137
+ out = identify(obj.model_dump(), depth + 1, max_depth)
138
+ if depth == 0:
139
+ _try_store_quick_id(obj, out)
140
+ return out
141
+ elif isinstance(obj, dict):
142
+ ks = sorted(obj.keys())
143
+ vs = [identify(obj[k], depth + 1, max_depth) for k in ks]
144
+ out = identify([ks, vs], depth + 1, max_depth)
145
+ if depth == 0:
146
+ _try_store_quick_id(obj, out)
147
+ return out
148
+ elif obj is None:
149
+ out = identify("None", depth + 1, max_depth)
150
+ if depth == 0:
151
+ _try_store_quick_id(obj, out)
152
+ return out
153
+ else:
154
+ # primitives / everything else
155
+ out = xxhash.xxh64_hexdigest(fast_serialize(obj), seed=0)
156
+ if depth == 0:
157
+ _try_store_quick_id(obj, out)
158
+ return out
159
+
160
+
161
+ def _build_named_keys(
162
+ func: Callable[..., Any],
163
+ args: tuple[Any, ...],
164
+ kwargs: dict[str, Any],
165
+ keys: list[str],
166
+ ) -> list[Any]:
167
+ """Extract named parameters in order from args/kwargs for keying."""
168
+ arg_spec = inspect.getfullargspec(func).args
169
+ used_args = {arg_spec[i]: arg for i, arg in enumerate(args[: len(arg_spec)])}
170
+ used_args.update(kwargs)
171
+ values = [used_args[k] for k in keys if k in used_args]
172
+ if not values:
173
+ raise ValueError(f"Keys {keys} not found in function arguments")
174
+ return values
175
+
176
+
177
+ def _compute_cache_components(
178
+ func: Callable[..., Any],
179
+ args: tuple[Any, ...],
180
+ kwargs: dict[str, Any],
181
+ ignore_self: bool,
182
+ keys: Optional[list[str]],
183
+ key_fn: Optional[Callable[..., Any]],
184
+ ):
185
+ """
186
+ Return (func_source, sub_dir, key_id) for disk paths and memory keying.
187
+ - If key_fn provided, it determines the cache key content.
188
+ - Else if keys list provided, use those argument names.
189
+ - Else use full (args, kwargs), optionally ignoring 'self' for methods.
190
+ """
191
+ func_source = get_source(func)
192
+
193
+ # Custom key function (most explicit & fastest when user knows what's important)
194
+ if key_fn is not None:
195
+ try:
196
+ custom_val = key_fn(*args, **kwargs)
197
+ except Exception as e:
198
+ raise ValueError(f"key function for {func.__name__} raised: {e}") from e
199
+ sub_dir = "custom"
200
+ key_id = f"{identify(custom_val)}.pkl"
201
+ return func_source, sub_dir, key_id
202
+
203
+ # Named keys (back-compat)
204
+ if keys:
205
+ values = _build_named_keys(func, args, kwargs, keys)
206
+ param_hash = identify(values)
207
+ dir_path = f"{func.__name__}_{identify(func_source)}"
208
+ key_id = f"{'_'.join(keys)}_{param_hash}.pkl"
209
+ return func_source, dir_path, key_id
210
+
211
+ # Default: full argument identity (optionally ignoring 'self')
212
+ if (
213
+ inspect.getfullargspec(func).args
214
+ and inspect.getfullargspec(func).args[0] == "self"
215
+ and ignore_self
216
+ ):
217
+ fid = (func_source, args[1:], kwargs)
218
+ else:
219
+ fid = (func_source, args, kwargs)
220
+
221
+ return func_source, "funcs", f"{identify(fid)}.pkl"
222
+
223
+
224
+ def _mem_cache_for(func: Callable[..., Any], size: int) -> cachetools.LRUCache:
225
+ """Get or create a per-function LRU cache with the given size."""
226
+ # Keep a per-function cache to avoid cross-talk of maxsize across functions
227
+ with mem_lock:
228
+ cache = _MEM_CACHES.get(func)
229
+ if cache is None or cache.maxsize != size:
230
+ cache = cachetools.LRUCache(maxsize=size)
231
+ _MEM_CACHES[func] = cache
232
+ # Keep global symbol backwards-compatible internally
233
+ global LRU_MEM_CACHE
234
+ LRU_MEM_CACHE = cache
235
+ return cache
236
+
237
+
238
+ # --------------------------------------------------------------------------------------
239
+ # Memory-only memoize (sync / async)
240
+ # --------------------------------------------------------------------------------------
241
+
242
+
243
+ def _memory_memoize(
244
+ func: Callable[P, R],
245
+ size: int,
246
+ keys: Optional[list[str]],
247
+ ignore_self: bool,
248
+ key_fn: Optional[Callable[..., Any]],
249
+ ) -> Callable[P, R]:
250
+ mem_cache = _mem_cache_for(func, size)
251
+
252
+ @functools.wraps(func)
253
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
254
+ func_source, sub_dir, key_id = _compute_cache_components(
255
+ func, args, kwargs, ignore_self, keys, key_fn
256
+ )
257
+ name = identify((func_source, sub_dir, key_id))
258
+
259
+ with mem_lock:
260
+ if name in mem_cache:
261
+ return mem_cache[name] # type: ignore[return-value]
262
+
263
+ result = func(*args, **kwargs)
264
+
265
+ with mem_lock:
266
+ if name not in mem_cache:
267
+ mem_cache[name] = result # type: ignore[index]
268
+ return result
269
+
270
+ return wrapper
271
+
272
+
273
+ def _async_memory_memoize(
274
+ func: AsyncFunc[P, R],
275
+ size: int,
276
+ keys: Optional[list[str]],
277
+ ignore_self: bool,
278
+ key_fn: Optional[Callable[..., Any]],
279
+ ) -> AsyncFunc[P, R]:
280
+ mem_cache = _mem_cache_for(func, size)
281
+
282
+ # Avoid duplicate in-flight computations for the same key
283
+ inflight: dict[str, asyncio.Task[R]] = {}
284
+ alock = asyncio.Lock()
285
+
286
+ @functools.wraps(func)
287
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
288
+ func_source, sub_dir, key_id = _compute_cache_components(
289
+ func, args, kwargs, ignore_self, keys, key_fn
290
+ )
291
+ name = identify((func_source, sub_dir, key_id))
292
+
293
+ async with alock:
294
+ if name in mem_cache:
295
+ return mem_cache[name] # type: ignore[return-value]
296
+ task = inflight.get(name)
297
+ if task is None:
298
+ task = asyncio.create_task(func(*args, **kwargs)) # type: ignore[arg-type]
299
+ inflight[name] = task
300
+
301
+ try:
302
+ result = await task
303
+ finally:
304
+ async with alock:
305
+ inflight.pop(name, None)
306
+
307
+ with mem_lock:
308
+ mem_cache[name] = result # type: ignore[index]
309
+ return result
310
+
311
+ return wrapper
312
+
313
+
314
+ # --------------------------------------------------------------------------------------
315
+ # Disk-only memoize (sync / async)
316
+ # --------------------------------------------------------------------------------------
317
+
318
+
319
+ def _disk_memoize(
320
+ func: Callable[P, R],
321
+ keys: Optional[list[str]],
322
+ cache_dir: str,
323
+ ignore_self: bool,
324
+ verbose: bool,
325
+ key_fn: Optional[Callable[..., Any]],
326
+ ) -> Callable[P, R]:
327
+ @functools.wraps(func)
328
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
329
+ try:
330
+ func_source, sub_dir, key_id = _compute_cache_components(
331
+ func, args, kwargs, ignore_self, keys, key_fn
332
+ )
333
+ if sub_dir == "funcs":
334
+ cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
335
+ else:
336
+ cache_path = osp.join(cache_dir, sub_dir, key_id)
337
+ mkdir_or_exist(osp.dirname(cache_path))
338
+
339
+ with disk_lock:
340
+ if osp.exists(cache_path):
341
+ try:
342
+ return load_json_or_pickle(cache_path)
343
+ except Exception as e:
344
+ if osp.exists(cache_path):
345
+ os.remove(cache_path)
346
+ if verbose:
347
+ logger.opt(depth=1).warning(
348
+ f"Error loading cache: {str(e)[:100]}, recomputing"
349
+ )
350
+
351
+ result = func(*args, **kwargs)
352
+
353
+ with disk_lock:
354
+ if not osp.exists(cache_path):
355
+ dump_json_or_pickle(result, cache_path)
356
+ return result
357
+ except Exception as e:
358
+ if verbose:
359
+ logger.opt(depth=1).warning(
360
+ f"Failed to cache {func.__name__}: {e}, executing without cache"
361
+ )
362
+ return func(*args, **kwargs)
363
+
364
+ return wrapper
365
+
366
+
367
+ def _async_disk_memoize(
368
+ func: AsyncFunc[P, R],
369
+ keys: Optional[list[str]],
370
+ cache_dir: str,
371
+ ignore_self: bool,
372
+ verbose: bool,
373
+ key_fn: Optional[Callable[..., Any]],
374
+ ) -> AsyncFunc[P, R]:
375
+ @functools.wraps(func)
376
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
377
+ try:
378
+ func_source, sub_dir, key_id = _compute_cache_components(
379
+ func, args, kwargs, ignore_self, keys, key_fn
380
+ )
381
+ if sub_dir == "funcs":
382
+ cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
383
+ else:
384
+ cache_path = osp.join(cache_dir, sub_dir, key_id)
385
+ mkdir_or_exist(osp.dirname(cache_path))
386
+
387
+ def check_cache() -> Optional[R]:
388
+ with disk_lock:
389
+ if osp.exists(cache_path):
390
+ try:
391
+ return load_json_or_pickle(cache_path)
392
+ except Exception as e:
393
+ if osp.exists(cache_path):
394
+ os.remove(cache_path)
395
+ if verbose:
396
+ logger.opt(depth=1).warning(
397
+ f"Error loading cache: {str(e)[:100]}, recomputing"
398
+ )
399
+ return None
400
+
401
+ loop = asyncio.get_event_loop()
402
+ cached_result = await loop.run_in_executor(None, check_cache)
403
+ if cached_result is not None:
404
+ return cached_result
405
+
406
+ result = await func(*args, **kwargs)
407
+
408
+ def write_cache() -> None:
409
+ with disk_lock:
410
+ if not osp.exists(cache_path):
411
+ dump_json_or_pickle(result, cache_path)
412
+
413
+ await loop.run_in_executor(None, write_cache)
414
+ return result
415
+ except Exception as e:
416
+ if verbose:
417
+ logger.opt(depth=1).warning(
418
+ f"Failed to cache {func.__name__}: {e}, executing without cache"
419
+ )
420
+ return await func(*args, **kwargs)
421
+
422
+ return wrapper
423
+
424
+
425
+ # --------------------------------------------------------------------------------------
426
+ # Memory+Disk (sync / async)
427
+ # --------------------------------------------------------------------------------------
428
+
429
+
430
+ def both_memoize(
431
+ func: Callable[P, R],
432
+ keys: Optional[list[str]],
433
+ cache_dir: str,
434
+ ignore_self: bool,
435
+ size: int,
436
+ key_fn: Optional[Callable[..., Any]],
437
+ ) -> Callable[P, R]:
438
+ mem_cache = _mem_cache_for(func, size)
439
+
440
+ @functools.wraps(func)
441
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
442
+ func_source, sub_dir, key_id = _compute_cache_components(
443
+ func, args, kwargs, ignore_self, keys, key_fn
444
+ )
445
+ mem_key = identify((func_source, sub_dir, key_id))
446
+
447
+ # Memory first
448
+ with mem_lock:
449
+ if mem_key in mem_cache:
450
+ return mem_cache[mem_key] # type: ignore[return-value]
451
+
452
+ # Disk next
453
+ if sub_dir == "funcs":
454
+ cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
455
+ else:
456
+ cache_path = osp.join(cache_dir, sub_dir, key_id)
457
+ mkdir_or_exist(osp.dirname(cache_path))
458
+
459
+ disk_result: Optional[R] = None
460
+ with disk_lock:
461
+ if osp.exists(cache_path):
462
+ disk_result = load_json_or_pickle(cache_path)
463
+
464
+ if disk_result is not None:
465
+ with mem_lock:
466
+ mem_cache[mem_key] = disk_result # type: ignore[index]
467
+ return disk_result
468
+
469
+ # Miss: compute, then write both
470
+ result = func(*args, **kwargs)
471
+ with disk_lock:
472
+ if not osp.exists(cache_path):
473
+ dump_json_or_pickle(result, cache_path)
474
+ with mem_lock:
475
+ mem_cache[mem_key] = result # type: ignore[index]
476
+ return result
477
+
478
+ return wrapper
479
+
480
+
481
+ def _async_both_memoize(
482
+ func: AsyncFunc[P, R],
483
+ keys: Optional[list[str]],
484
+ cache_dir: str,
485
+ ignore_self: bool,
486
+ size: int,
487
+ key_fn: Optional[Callable[..., Any]],
488
+ ) -> AsyncFunc[P, R]:
489
+ mem_cache = _mem_cache_for(func, size)
490
+
491
+ inflight: dict[str, asyncio.Task[R]] = {}
492
+ alock = asyncio.Lock()
493
+
494
+ @functools.wraps(func)
495
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
496
+ func_source, sub_dir, key_id = _compute_cache_components(
497
+ func, args, kwargs, ignore_self, keys, key_fn
498
+ )
499
+ mem_key = identify((func_source, sub_dir, key_id))
500
+
501
+ # Memory
502
+ async with alock:
503
+ if mem_key in mem_cache:
504
+ return mem_cache[mem_key] # type: ignore[return-value]
505
+
506
+ # Disk
507
+ if sub_dir == "funcs":
508
+ cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
509
+ else:
510
+ cache_path = osp.join(cache_dir, sub_dir, key_id)
511
+ mkdir_or_exist(osp.dirname(cache_path))
512
+
513
+ def check_disk_cache() -> Optional[R]:
514
+ with disk_lock:
515
+ if osp.exists(cache_path):
516
+ return load_json_or_pickle(cache_path)
517
+ return None
518
+
519
+ loop = asyncio.get_event_loop()
520
+ disk_result = await loop.run_in_executor(None, check_disk_cache)
521
+
522
+ if disk_result is not None:
523
+ with mem_lock:
524
+ mem_cache[mem_key] = disk_result # type: ignore[index]
525
+ return disk_result
526
+
527
+ # Avoid duplicate async work for same key
528
+ async with alock:
529
+ task = inflight.get(mem_key)
530
+ if task is None:
531
+ task = asyncio.create_task(func(*args, **kwargs)) # type: ignore[arg-type]
532
+ inflight[mem_key] = task
533
+
534
+ try:
535
+ result = await task
536
+ finally:
537
+ async with alock:
538
+ inflight.pop(mem_key, None)
539
+
540
+ def write_disk_cache() -> None:
541
+ with disk_lock:
542
+ if not osp.exists(cache_path):
543
+ dump_json_or_pickle(result, cache_path)
544
+
545
+ await loop.run_in_executor(None, write_disk_cache)
546
+
547
+ with mem_lock:
548
+ mem_cache[mem_key] = result # type: ignore[index]
549
+ return result
550
+
551
+ return wrapper
552
+
553
+
554
+ # --------------------------------------------------------------------------------------
555
+ # Public decorator (only export memoize)
556
+ # --------------------------------------------------------------------------------------
557
+
558
+ @overload
559
+ def memoize(
560
+ _func: Callable[P, R],
561
+ *,
562
+ keys: Optional[list[str]] = ...,
563
+ key: Optional[Callable[..., Any]] = ...,
564
+ cache_dir: str = ...,
565
+ cache_type: Literal["memory", "disk", "both"] = ...,
566
+ size: int = ...,
567
+ ignore_self: bool = ...,
568
+ verbose: bool = ...,
569
+ ) -> Callable[P, R]: ...
570
+ @overload
571
+ def memoize(
572
+ _func: Callable[P, Awaitable[R]],
573
+ *,
574
+ keys: Optional[list[str]] = ...,
575
+ key: Optional[Callable[..., Any]] = ...,
576
+ cache_dir: str = ...,
577
+ cache_type: Literal["memory", "disk", "both"] = ...,
578
+ size: int = ...,
579
+ ignore_self: bool = ...,
580
+ verbose: bool = ...,
581
+ ) -> Callable[P, Awaitable[R]]: ...
582
+ @overload
583
+ def memoize(
584
+ _func: None = ...,
585
+ *,
586
+ keys: Optional[list[str]] = ...,
587
+ key: Optional[Callable[..., Any]] = ...,
588
+ cache_dir: str = ...,
589
+ cache_type: Literal["memory", "disk", "both"] = ...,
590
+ size: int = ...,
591
+ ignore_self: bool = ...,
592
+ verbose: bool = ...,
593
+ ) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
594
+
595
+
596
+ def memoize(
597
+ _func: Optional[Callable[P, Any]] = None,
598
+ *,
599
+ keys: Optional[list[str]] = None,
600
+ key: Optional[Callable[..., Any]] = None,
601
+ cache_dir: str = SPEED_CACHE_DIR,
602
+ cache_type: Literal["memory", "disk", "both"] = "both",
603
+ size: int = 256,
604
+ ignore_self: bool = True,
605
+ verbose: bool = False,
606
+ ):
607
+ """
608
+ Universal memoizer that supports sync and async functions, preserves annotations
609
+ for Pylance via ParamSpec/TypeVar, and caches in memory + disk by default.
610
+
611
+ - keys: list of argument names to include in key (back-compat).
612
+ - key: custom callable (*args, **kwargs) -> hashable/serializable object for keying.
613
+ Prefer this for performance on big inputs (e.g., key=lambda x: x.id).
614
+ - cache_dir: disk cache base directory (unlimited size).
615
+ - cache_type: "memory" | "disk" | "both" (default "both").
616
+ - size: memory LRU size per-function (default 256 items).
617
+ - ignore_self: ignore 'self' when building the default key for bound methods.
618
+ - verbose: enable warnings on cache load/write errors.
619
+ """
620
+ if "~/" in cache_dir:
621
+ cache_dir = osp.expanduser(cache_dir)
622
+
623
+ def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
624
+ is_async = inspect.iscoroutinefunction(func)
625
+
626
+ if cache_type == "memory":
627
+ if is_async:
628
+ return _async_memory_memoize(func, size, keys, ignore_self, key) # type: ignore[return-value]
629
+ return _memory_memoize(func, size, keys, ignore_self, key) # type: ignore[return-value]
630
+
631
+ if cache_type == "disk":
632
+ if is_async:
633
+ return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose, key) # type: ignore[return-value]
634
+ return _disk_memoize(func, keys, cache_dir, ignore_self, verbose, key) # type: ignore[return-value]
635
+
636
+ # cache_type == "both"
637
+ if is_async:
638
+ return _async_both_memoize(func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
639
+ return both_memoize(func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
640
+
641
+ # Support both @memoize and @memoize(...)
642
+ if _func is None:
643
+ return decorator
644
+ else:
645
+ return decorator(_func)
646
+
647
+
648
+ __all__ = ["memoize", "identify"]
@@ -1,494 +0,0 @@
1
- import asyncio
2
- import functools
3
- import inspect
4
- import json
5
- import os
6
- import os.path as osp
7
- import pickle
8
- import uuid
9
- from threading import Lock
10
- from typing import Any, Awaitable, Callable, Literal, TypeVar
11
-
12
- import cachetools
13
- import pandas as pd
14
- import xxhash
15
- from loguru import logger
16
- from pydantic import BaseModel
17
-
18
- from speedy_utils.common.utils_io import dump_json_or_pickle, load_json_or_pickle
19
- from speedy_utils.common.utils_misc import mkdir_or_exist
20
-
21
- SPEED_CACHE_DIR = osp.join(osp.expanduser("~"), ".cache/speedy_cache")
22
- LRU_MEM_CACHE = cachetools.LRUCache(maxsize=128_000)
23
-
24
- thread_locker = Lock()
25
-
26
- # Add two locks for thread-safe cache access
27
- disk_lock = Lock()
28
- mem_lock = Lock()
29
-
30
- # Add async-specific types
31
- T = TypeVar('T')
32
- AsyncFunc = Callable[..., Awaitable[T]]
33
-
34
-
35
- def compute_func_id(func, args, kwargs, ignore_self, keys):
36
- func_source = get_source(func)
37
- if keys:
38
- arg_spec = inspect.getfullargspec(func).args
39
- used_args = {arg_spec[i]: arg for i, arg in enumerate(args)}
40
- used_args.update(kwargs)
41
- values = [used_args[k] for k in keys if k in used_args]
42
- if not values:
43
- raise ValueError(f"Keys {keys} not found in function arguments")
44
- param_hash = identify(values)
45
- dir_path = f"{func.__name__}_{identify(func_source)}"
46
- key_id = f"{'_'.join(keys)}_{param_hash}.pkl"
47
- return func_source, dir_path, key_id
48
-
49
- if (
50
- inspect.getfullargspec(func).args
51
- and inspect.getfullargspec(func).args[0] == "self"
52
- and ignore_self
53
- ):
54
- fid = (func_source, args[1:], kwargs)
55
- else:
56
- fid = (func_source, args, kwargs)
57
- return func_source, "funcs", f"{identify(fid)}.pkl"
58
-
59
-
60
- def fast_serialize(x: Any) -> bytes:
61
- try:
62
- return json.dumps(x, sort_keys=True).encode("utf-8")
63
- except (TypeError, ValueError):
64
- return pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
65
-
66
-
67
- def identify(obj: Any, depth=0, max_depth=2) -> str:
68
- if isinstance(obj, (list, tuple)):
69
- x = [identify(x, depth + 1, max_depth) for x in obj]
70
- x = "\n".join(x)
71
- return identify(x, depth + 1, max_depth)
72
- # is pandas row or dict
73
- elif isinstance(obj, (pd.DataFrame, pd.Series)):
74
- x = str(obj.to_dict())
75
- return identify(x, depth + 1, max_depth)
76
- elif hasattr(obj, "__code__"):
77
- return identify(get_source(obj), depth + 1, max_depth)
78
- elif isinstance(obj, BaseModel):
79
- obj = obj.model_dump()
80
- return identify(obj, depth + 1, max_depth)
81
- elif isinstance(obj, dict):
82
- ks = sorted(obj.keys())
83
- vs = [identify(obj[k], depth + 1, max_depth) for k in ks]
84
- return identify([ks, vs], depth + 1, max_depth)
85
- elif obj is None:
86
- return identify("None", depth + 1, max_depth)
87
- else:
88
- # primitive_types = [int, float, str, bool]
89
- # if not type(obj) in primitive_types:
90
- # logger.warning(f"Unknown type: {type(obj)}")
91
- return xxhash.xxh64_hexdigest(fast_serialize(obj), seed=0)
92
-
93
-
94
- def identify_uuid(x: Any) -> str:
95
- data = fast_serialize(x)
96
- hash_obj = xxhash.xxh128(data, seed=0)
97
- return str(uuid.UUID(bytes=hash_obj.digest()))
98
-
99
-
100
- def get_source(func):
101
- code = inspect.getsource(func)
102
- for r in [" ", "\n", "\t", "\r"]:
103
- code = code.replace(r, "")
104
- return code
105
-
106
-
107
- def _disk_memoize(func, keys, cache_dir, ignore_self, verbose):
108
- @functools.wraps(func)
109
- def wrapper(*args, **kwargs):
110
- try:
111
- # Compute cache path as before
112
- func_source, sub_dir, key_id = compute_func_id(
113
- func, args, kwargs, ignore_self, keys
114
- )
115
- if func_source is None:
116
- return func(*args, **kwargs)
117
- if sub_dir == "funcs":
118
- cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
119
- else:
120
- cache_path = osp.join(cache_dir, sub_dir, key_id)
121
- mkdir_or_exist(osp.dirname(cache_path))
122
-
123
- # First check with disk lock
124
- with disk_lock:
125
- if osp.exists(cache_path):
126
- # logger.debug(f"Cache HIT for {func.__name__}, key={cache_path}")
127
- try:
128
- return load_json_or_pickle(cache_path)
129
- except Exception as e:
130
- if osp.exists(cache_path):
131
- os.remove(cache_path)
132
- logger.opt(depth=1).warning(
133
- f"Error loading cache: {str(e)[:100]}, continue to recompute"
134
- )
135
-
136
- result = func(*args, **kwargs)
137
-
138
- # Write result under disk lock to avoid race conditions
139
- with disk_lock:
140
- if not osp.exists(cache_path):
141
- dump_json_or_pickle(result, cache_path)
142
- return result
143
- except Exception as e:
144
- logger.opt(depth=1).warning(
145
- f"Failed to cache {func.__name__}: {e}, continue to recompute without cache"
146
- )
147
- return func(*args, **kwargs)
148
-
149
- return wrapper
150
-
151
-
152
- def _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose):
153
- @functools.wraps(func)
154
- async def wrapper(*args, **kwargs):
155
- try:
156
- # Compute cache path as before
157
- func_source, sub_dir, key_id = compute_func_id(
158
- func, args, kwargs, ignore_self, keys
159
- )
160
- if func_source is None:
161
- return await func(*args, **kwargs)
162
- if sub_dir == "funcs":
163
- cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
164
- else:
165
- cache_path = osp.join(cache_dir, sub_dir, key_id)
166
- mkdir_or_exist(osp.dirname(cache_path))
167
-
168
- # First check with disk lock (run in thread to avoid blocking)
169
- def check_cache():
170
- with disk_lock:
171
- if osp.exists(cache_path):
172
- try:
173
- return load_json_or_pickle(cache_path)
174
- except Exception as e:
175
- if osp.exists(cache_path):
176
- os.remove(cache_path)
177
- logger.opt(depth=1).warning(
178
- f"Error loading cache: {str(e)[:100]}, continue to recompute"
179
- )
180
- return None
181
-
182
- # Run cache check in thread pool to avoid blocking
183
- loop = asyncio.get_event_loop()
184
- cached_result = await loop.run_in_executor(None, check_cache)
185
- if cached_result is not None:
186
- return cached_result
187
-
188
- result = await func(*args, **kwargs)
189
-
190
- # Write result under disk lock (run in thread to avoid blocking)
191
- def write_cache():
192
- with disk_lock:
193
- if not osp.exists(cache_path):
194
- dump_json_or_pickle(result, cache_path)
195
-
196
- await loop.run_in_executor(None, write_cache)
197
- return result
198
- except Exception as e:
199
- logger.opt(depth=1).warning(
200
- f"Failed to cache {func.__name__}: {e}, continue to recompute without cache"
201
- )
202
- return await func(*args, **kwargs)
203
-
204
- return wrapper
205
-
206
-
207
- def _memory_memoize(func, size, keys, ignore_self):
208
- global LRU_MEM_CACHE
209
- if LRU_MEM_CACHE.maxsize != size:
210
- LRU_MEM_CACHE = cachetools.LRUCache(maxsize=size)
211
-
212
- @functools.wraps(func)
213
- def wrapper(*args, **kwargs):
214
- func_source, sub_dir, key_id = compute_func_id(
215
- func, args, kwargs, ignore_self, keys
216
- )
217
- if func_source is None:
218
- return func(*args, **kwargs)
219
- name = identify((func_source, sub_dir, key_id))
220
-
221
- if not hasattr(func, "_mem_cache"):
222
- func._mem_cache = LRU_MEM_CACHE
223
-
224
- with mem_lock:
225
- if name in func._mem_cache:
226
- # logger.debug(f"Cache HIT (memory) for {func.__name__}, key={name}")
227
- return func._mem_cache[name]
228
-
229
- result = func(*args, **kwargs)
230
-
231
- with mem_lock:
232
- if name not in func._mem_cache:
233
- func._mem_cache[name] = result
234
- return result
235
-
236
- return wrapper
237
-
238
-
239
- def _async_memory_memoize(func, size, keys, ignore_self):
240
- global LRU_MEM_CACHE
241
- if LRU_MEM_CACHE.maxsize != size:
242
- LRU_MEM_CACHE = cachetools.LRUCache(maxsize=size)
243
-
244
- @functools.wraps(func)
245
- async def wrapper(*args, **kwargs):
246
- func_source, sub_dir, key_id = compute_func_id(
247
- func, args, kwargs, ignore_self, keys
248
- )
249
- if func_source is None:
250
- return await func(*args, **kwargs)
251
- name = identify((func_source, sub_dir, key_id))
252
-
253
- if not hasattr(func, "_mem_cache"):
254
- func._mem_cache = LRU_MEM_CACHE
255
-
256
- with mem_lock:
257
- if name in func._mem_cache:
258
- # logger.debug(f"Cache HIT (memory) for {func.__name__}, key={name}")
259
- return func._mem_cache[name]
260
-
261
- result = await func(*args, **kwargs)
262
-
263
- with mem_lock:
264
- if name not in func._mem_cache:
265
- func._mem_cache[name] = result
266
- return result
267
-
268
- return wrapper
269
-
270
-
271
- def both_memoize(func, keys, cache_dir, ignore_self):
272
- @functools.wraps(func)
273
- def wrapper(*args, **kwargs):
274
- func_source, sub_dir, key_id = compute_func_id(
275
- func, args, kwargs, ignore_self, keys
276
- )
277
- if func_source is None:
278
- return func(*args, **kwargs)
279
-
280
- mem_key = identify((func_source, sub_dir, key_id))
281
- if not hasattr(func, "_mem_cache"):
282
- func._mem_cache = LRU_MEM_CACHE
283
-
284
- with mem_lock:
285
- if mem_key in func._mem_cache:
286
- # logger.debug(f"Cache HIT (memory) for {func.__name__}, key={mem_key}")
287
- return func._mem_cache[mem_key]
288
-
289
- if sub_dir == "funcs":
290
- cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
291
- else:
292
- cache_path = osp.join(cache_dir, sub_dir, key_id)
293
- mkdir_or_exist(osp.dirname(cache_path))
294
-
295
- with disk_lock:
296
- if osp.exists(cache_path):
297
- # logger.debug(f"Cache HIT (disk) for {func.__name__}, key={cache_path}")
298
- result = load_json_or_pickle(cache_path)
299
- with mem_lock:
300
- func._mem_cache[mem_key] = result
301
- return result
302
- # logger.debug(f"Cache MISS for {func.__name__}, key={cache_path}")
303
- result = func(*args, **kwargs)
304
-
305
- with disk_lock:
306
- if not osp.exists(cache_path):
307
- dump_json_or_pickle(result, cache_path)
308
- with mem_lock:
309
- func._mem_cache[mem_key] = result
310
- return result
311
-
312
- return wrapper
313
-
314
-
315
- def _async_both_memoize(func, keys, cache_dir, ignore_self):
316
- @functools.wraps(func)
317
- async def wrapper(*args, **kwargs):
318
- func_source, sub_dir, key_id = compute_func_id(
319
- func, args, kwargs, ignore_self, keys
320
- )
321
- if func_source is None:
322
- return await func(*args, **kwargs)
323
-
324
- mem_key = identify((func_source, sub_dir, key_id))
325
- if not hasattr(func, "_mem_cache"):
326
- func._mem_cache = LRU_MEM_CACHE
327
-
328
- with mem_lock:
329
- if mem_key in func._mem_cache:
330
- # logger.debug(f"Cache HIT (memory) for {func.__name__}, key={mem_key}")
331
- return func._mem_cache[mem_key]
332
-
333
- if sub_dir == "funcs":
334
- cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
335
- else:
336
- cache_path = osp.join(cache_dir, sub_dir, key_id)
337
- mkdir_or_exist(osp.dirname(cache_path))
338
-
339
- # Check disk cache in thread pool to avoid blocking
340
- def check_disk_cache():
341
- with disk_lock:
342
- if osp.exists(cache_path):
343
- return load_json_or_pickle(cache_path)
344
- return None
345
-
346
- loop = asyncio.get_event_loop()
347
- disk_result = await loop.run_in_executor(None, check_disk_cache)
348
-
349
- if disk_result is not None:
350
- with mem_lock:
351
- func._mem_cache[mem_key] = disk_result
352
- return disk_result
353
-
354
- # logger.debug(f"Cache MISS for {func.__name__}, key={cache_path}")
355
- result = await func(*args, **kwargs)
356
-
357
- # Write to disk in thread pool to avoid blocking
358
- def write_disk_cache():
359
- with disk_lock:
360
- if not osp.exists(cache_path):
361
- dump_json_or_pickle(result, cache_path)
362
-
363
- await loop.run_in_executor(None, write_disk_cache)
364
-
365
- with mem_lock:
366
- func._mem_cache[mem_key] = result
367
- return result
368
-
369
- return wrapper
370
-
371
-
372
- def memoize(
373
- _func=None,
374
- *,
375
- keys=None,
376
- cache_dir=SPEED_CACHE_DIR,
377
- cache_type: Literal["memory", "disk", "both"] = "disk",
378
- size=10240,
379
- ignore_self=True,
380
- verbose=False,
381
- ):
382
- if "~/" in cache_dir:
383
- cache_dir = osp.expanduser(cache_dir)
384
-
385
- def decorator(func):
386
- # Check if function is async
387
- is_async = inspect.iscoroutinefunction(func)
388
-
389
- if cache_type == "memory":
390
- if is_async:
391
- return _async_memory_memoize(
392
- func,
393
- size,
394
- keys,
395
- ignore_self,
396
- )
397
- return _memory_memoize(
398
- func,
399
- size,
400
- keys,
401
- ignore_self,
402
- )
403
- elif cache_type == "disk":
404
- if is_async:
405
- return _async_disk_memoize(
406
- func,
407
- keys,
408
- cache_dir,
409
- ignore_self,
410
- verbose,
411
- )
412
- return _disk_memoize(
413
- func,
414
- keys,
415
- cache_dir,
416
- ignore_self,
417
- verbose,
418
- )
419
-
420
- # cache_type == "both"
421
- if is_async:
422
- return _async_both_memoize(
423
- func,
424
- keys,
425
- cache_dir,
426
- ignore_self,
427
- )
428
- return both_memoize(
429
- func,
430
- keys,
431
- cache_dir,
432
- verbose,
433
- )
434
-
435
- # Handle both @memoize and @memoize() usage patterns
436
- if _func is None:
437
- return decorator
438
- else:
439
- return decorator(_func)
440
-
441
-
442
- def amemoize(
443
- _func=None,
444
- *,
445
- keys: list[str] | None = None,
446
- cache_dir: str = SPEED_CACHE_DIR,
447
- cache_type: Literal["memory", "disk", "both"] = "disk",
448
- size: int = 10240,
449
- ignore_self: bool = True,
450
- verbose: bool = False,
451
- ):
452
- """
453
- Async-specific memoization decorator for coroutine functions.
454
-
455
- Args:
456
- _func: The async function to memoize (when used without parentheses)
457
- keys: Specific argument keys to use for cache key generation
458
- cache_dir: Directory for disk cache storage
459
- cache_type: Type of caching - "memory", "disk", or "both"
460
- size: Size of memory cache (for memory/both types)
461
- ignore_self: Whether to ignore 'self' parameter in cache key
462
- verbose: Enable verbose logging
463
-
464
- Returns:
465
- Decorated async function with memoization
466
-
467
- Example:
468
- @amemoize(cache_type="both")
469
- async def my_async_func(x: int) -> str:
470
- return str(x)
471
- """
472
- if "~/" in cache_dir:
473
- cache_dir = osp.expanduser(cache_dir)
474
-
475
- def decorator(func):
476
- # Ensure the function is actually async
477
- if not inspect.iscoroutinefunction(func):
478
- raise ValueError(f"amemoize can only be used with async functions. {func.__name__} is not async.")
479
-
480
- if cache_type == "memory":
481
- return _async_memory_memoize(func, size, keys, ignore_self)
482
- elif cache_type == "disk":
483
- return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose)
484
- else: # cache_type == "both"
485
- return _async_both_memoize(func, keys, cache_dir, ignore_self)
486
-
487
- # Handle both @amemoize and @amemoize() usage patterns
488
- if _func is None:
489
- return decorator
490
- else:
491
- return decorator(_func)
492
-
493
-
494
- __all__ = ["memoize", "identify", "identify_uuid", "amemoize"]
File without changes