speedy-utils 1.1.9__tar.gz → 1.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/PKG-INFO +1 -1
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/pyproject.toml +1 -1
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/async_llm_task.py +5 -1
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/async_lm.py +16 -2
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/__init__.py +1 -3
- speedy_utils-1.1.10/src/speedy_utils/common/utils_cache.py +648 -0
- speedy_utils-1.1.9/src/speedy_utils/common/utils_cache.py +0 -494
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/README.md +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/__init__.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/__init__.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/display.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/transform.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/chat_format/utils.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/group_messages.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/__init__.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/__init__.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/_utils.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/async_lm_base.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/async_lm/lm_specific.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/lm/utils.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/scripts/README.md +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/scripts/vllm_load_balancer.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/llm_utils/scripts/vllm_serve.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/all.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/__init__.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/clock.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/function_decorator.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/logger.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/notebook_utils.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/report_manager.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/utils_io.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/utils_misc.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/common/utils_print.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/multi_worker/__init__.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/multi_worker/process.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/multi_worker/thread.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/scripts/__init__.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/scripts/mpython.py +0 -0
- {speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/scripts/openapi_client_codegen.py +0 -0
|
@@ -389,7 +389,7 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
|
|
|
389
389
|
input_data: InputModelType,
|
|
390
390
|
expected_response: Optional[OutputModelType] = None,
|
|
391
391
|
label: Optional[str] = None,
|
|
392
|
-
cache_dir: pathlib.Path =
|
|
392
|
+
cache_dir: Optional[pathlib.Path] = None,
|
|
393
393
|
) -> OutputModelType:
|
|
394
394
|
"""
|
|
395
395
|
Generate training data for both thinking and non-thinking modes.
|
|
@@ -415,6 +415,10 @@ class AsyncLLMTask(ABC, Generic[InputModelType, OutputModelType]):
|
|
|
415
415
|
# Create non-thinking mode equivalent
|
|
416
416
|
no_think_messages = self._create_no_think_messages(think_messages)
|
|
417
417
|
|
|
418
|
+
# Use default cache directory if none provided
|
|
419
|
+
if cache_dir is None:
|
|
420
|
+
cache_dir = self.DEFAULT_CACHE_DIR or pathlib.Path("./cache")
|
|
421
|
+
|
|
418
422
|
# Save training data
|
|
419
423
|
self._save_training_data(
|
|
420
424
|
input_data=input_data,
|
|
@@ -96,12 +96,16 @@ class AsyncLM(AsyncLMBase):
|
|
|
96
96
|
|
|
97
97
|
async def _unified_client_call(
|
|
98
98
|
self,
|
|
99
|
-
messages:
|
|
99
|
+
messages: RawMsgs,
|
|
100
100
|
extra_body: Optional[dict] = None,
|
|
101
101
|
cache_suffix: str = "",
|
|
102
102
|
) -> dict:
|
|
103
103
|
"""Unified method for all client interactions with caching and error handling."""
|
|
104
|
-
converted_messages =
|
|
104
|
+
converted_messages: Messages = (
|
|
105
|
+
self._convert_messages(cast(LegacyMsgs, messages))
|
|
106
|
+
if messages and isinstance(messages[0], dict)
|
|
107
|
+
else cast(Messages, messages)
|
|
108
|
+
)
|
|
105
109
|
cache_key = None
|
|
106
110
|
completion = None
|
|
107
111
|
|
|
@@ -385,3 +389,13 @@ class AsyncLM(AsyncLMBase):
|
|
|
385
389
|
raise ValueError(
|
|
386
390
|
f"Failed to validate against response model {response_model.__name__}: {exc}\nRaw content: {content}"
|
|
387
391
|
) from exc
|
|
392
|
+
|
|
393
|
+
async def __aenter__(self):
|
|
394
|
+
return self
|
|
395
|
+
|
|
396
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
397
|
+
if hasattr(self, "_last_client"):
|
|
398
|
+
last_client = self._last_client # type: ignore
|
|
399
|
+
await last_client._client.aclose()
|
|
400
|
+
else:
|
|
401
|
+
logger.warning("No last client to close")
|
|
@@ -108,7 +108,7 @@ from .common.notebook_utils import (
|
|
|
108
108
|
)
|
|
109
109
|
|
|
110
110
|
# Cache utilities
|
|
111
|
-
from .common.utils_cache import
|
|
111
|
+
from .common.utils_cache import identify, identify_uuid, memoize
|
|
112
112
|
|
|
113
113
|
# IO utilities
|
|
114
114
|
from .common.utils_io import (
|
|
@@ -197,7 +197,6 @@ __all__ = [
|
|
|
197
197
|
# Function decorators
|
|
198
198
|
"retry_runtime",
|
|
199
199
|
# Cache utilities
|
|
200
|
-
"amemoize",
|
|
201
200
|
"memoize",
|
|
202
201
|
"identify",
|
|
203
202
|
"identify_uuid",
|
|
@@ -227,5 +226,4 @@ __all__ = [
|
|
|
227
226
|
"multi_thread",
|
|
228
227
|
# Notebook utilities
|
|
229
228
|
"change_dir",
|
|
230
|
-
"amemoize",
|
|
231
229
|
]
|
|
@@ -0,0 +1,648 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import functools
|
|
3
|
+
import inspect
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import os.path as osp
|
|
7
|
+
import pickle
|
|
8
|
+
import uuid
|
|
9
|
+
import weakref
|
|
10
|
+
from threading import Lock
|
|
11
|
+
from typing import Any, Awaitable, Callable, Literal, Optional, TypeVar, overload
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
# Python 3.10+
|
|
15
|
+
from typing import ParamSpec
|
|
16
|
+
except ImportError: # pragma: no cover
|
|
17
|
+
from typing_extensions import ParamSpec # type: ignore
|
|
18
|
+
|
|
19
|
+
import cachetools
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import xxhash
|
|
22
|
+
from loguru import logger
|
|
23
|
+
from pydantic import BaseModel
|
|
24
|
+
|
|
25
|
+
from speedy_utils.common.utils_io import dump_json_or_pickle, load_json_or_pickle
|
|
26
|
+
from speedy_utils.common.utils_misc import mkdir_or_exist
|
|
27
|
+
|
|
28
|
+
# --------------------------------------------------------------------------------------
|
|
29
|
+
# Defaults / Globals
|
|
30
|
+
# --------------------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
SPEED_CACHE_DIR = osp.join(osp.expanduser("~"), ".cache/speedy_cache")
|
|
33
|
+
|
|
34
|
+
# Thread locks for safety
|
|
35
|
+
disk_lock = Lock()
|
|
36
|
+
mem_lock = Lock()
|
|
37
|
+
|
|
38
|
+
# Quick identifier cache for big objects that support weakref
|
|
39
|
+
# (prevents recomputing expensive keys for the same object instance)
|
|
40
|
+
_QUICK_ID_MAP: "weakref.WeakKeyDictionary[Any, str]" = weakref.WeakKeyDictionary()
|
|
41
|
+
|
|
42
|
+
# Per-function memory caches (so different functions can have different LRU sizes)
|
|
43
|
+
_MEM_CACHES: "weakref.WeakKeyDictionary[Callable[..., Any], cachetools.LRUCache]" = (
|
|
44
|
+
weakref.WeakKeyDictionary()
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Backward-compat global symbol (internal only; not exported)
|
|
48
|
+
LRU_MEM_CACHE = cachetools.LRUCache(maxsize=256)
|
|
49
|
+
|
|
50
|
+
# Typing helpers
|
|
51
|
+
P = ParamSpec("P")
|
|
52
|
+
R = TypeVar("R")
|
|
53
|
+
AsyncFunc = Callable[P, Awaitable[R]]
|
|
54
|
+
|
|
55
|
+
# --------------------------------------------------------------------------------------
|
|
56
|
+
# Utilities
|
|
57
|
+
# --------------------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def fast_serialize(x: Any) -> bytes:
|
|
61
|
+
"""Serialize x quickly; JSON if possible (stable), else pickle."""
|
|
62
|
+
try:
|
|
63
|
+
return json.dumps(x, sort_keys=True, default=str).encode("utf-8")
|
|
64
|
+
except (TypeError, ValueError):
|
|
65
|
+
return pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def identify_uuid(x: Any) -> str:
|
|
69
|
+
data = fast_serialize(x)
|
|
70
|
+
hash_obj = xxhash.xxh128(data, seed=0)
|
|
71
|
+
return str(uuid.UUID(bytes=hash_obj.digest()))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_source(func: Callable[..., Any]) -> str:
|
|
75
|
+
"""Minified function source; falls back to module + qualname for builtins/lambdas."""
|
|
76
|
+
try:
|
|
77
|
+
code = inspect.getsource(func)
|
|
78
|
+
except OSError:
|
|
79
|
+
# source not available (e.g., builtins, some C extensions)
|
|
80
|
+
mod = getattr(func, "__module__", "unknown")
|
|
81
|
+
qn = getattr(func, "__qualname__", getattr(func, "__name__", "unknown"))
|
|
82
|
+
code = f"{mod}.{qn}"
|
|
83
|
+
# normalize whitespace to make it stable
|
|
84
|
+
for r in (" ", "\n", "\t", "\r"):
|
|
85
|
+
code = code.replace(r, "")
|
|
86
|
+
return code
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _try_get_quick_id(obj: Any) -> Optional[str]:
|
|
90
|
+
"""Return a quick identifier if obj is weakref-able and cached."""
|
|
91
|
+
try:
|
|
92
|
+
return _QUICK_ID_MAP.get(obj) # type: ignore[arg-type]
|
|
93
|
+
except TypeError:
|
|
94
|
+
# not weakref-able (e.g., list/dict); cannot use WeakKeyDictionary
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _try_store_quick_id(obj: Any, ident: str) -> None:
|
|
99
|
+
"""Store quick identifier if obj is weakref-able."""
|
|
100
|
+
try:
|
|
101
|
+
_QUICK_ID_MAP[obj] = ident # type: ignore[index]
|
|
102
|
+
except TypeError:
|
|
103
|
+
# not weakref-able
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def identify(obj: Any, depth: int = 0, max_depth: int = 2) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Produce a stable, content-based identifier string for arbitrary Python objects.
|
|
110
|
+
Includes a quick path using a weakref cache for large, user-defined objects.
|
|
111
|
+
"""
|
|
112
|
+
# Quick-path for user-defined objects (weakref-able)
|
|
113
|
+
if depth == 0:
|
|
114
|
+
quick = _try_get_quick_id(obj)
|
|
115
|
+
if quick is not None:
|
|
116
|
+
return quick
|
|
117
|
+
|
|
118
|
+
if isinstance(obj, (list, tuple)):
|
|
119
|
+
x = [identify(x, depth + 1, max_depth) for x in obj]
|
|
120
|
+
x = "\n".join(x)
|
|
121
|
+
out = identify(x, depth + 1, max_depth)
|
|
122
|
+
if depth == 0:
|
|
123
|
+
_try_store_quick_id(obj, out)
|
|
124
|
+
return out
|
|
125
|
+
elif isinstance(obj, (pd.DataFrame, pd.Series)):
|
|
126
|
+
x = str(obj.to_dict())
|
|
127
|
+
out = identify(x, depth + 1, max_depth)
|
|
128
|
+
if depth == 0:
|
|
129
|
+
_try_store_quick_id(obj, out)
|
|
130
|
+
return out
|
|
131
|
+
elif hasattr(obj, "__code__"):
|
|
132
|
+
out = identify(get_source(obj), depth + 1, max_depth)
|
|
133
|
+
if depth == 0:
|
|
134
|
+
_try_store_quick_id(obj, out)
|
|
135
|
+
return out
|
|
136
|
+
elif isinstance(obj, BaseModel):
|
|
137
|
+
out = identify(obj.model_dump(), depth + 1, max_depth)
|
|
138
|
+
if depth == 0:
|
|
139
|
+
_try_store_quick_id(obj, out)
|
|
140
|
+
return out
|
|
141
|
+
elif isinstance(obj, dict):
|
|
142
|
+
ks = sorted(obj.keys())
|
|
143
|
+
vs = [identify(obj[k], depth + 1, max_depth) for k in ks]
|
|
144
|
+
out = identify([ks, vs], depth + 1, max_depth)
|
|
145
|
+
if depth == 0:
|
|
146
|
+
_try_store_quick_id(obj, out)
|
|
147
|
+
return out
|
|
148
|
+
elif obj is None:
|
|
149
|
+
out = identify("None", depth + 1, max_depth)
|
|
150
|
+
if depth == 0:
|
|
151
|
+
_try_store_quick_id(obj, out)
|
|
152
|
+
return out
|
|
153
|
+
else:
|
|
154
|
+
# primitives / everything else
|
|
155
|
+
out = xxhash.xxh64_hexdigest(fast_serialize(obj), seed=0)
|
|
156
|
+
if depth == 0:
|
|
157
|
+
_try_store_quick_id(obj, out)
|
|
158
|
+
return out
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _build_named_keys(
|
|
162
|
+
func: Callable[..., Any],
|
|
163
|
+
args: tuple[Any, ...],
|
|
164
|
+
kwargs: dict[str, Any],
|
|
165
|
+
keys: list[str],
|
|
166
|
+
) -> list[Any]:
|
|
167
|
+
"""Extract named parameters in order from args/kwargs for keying."""
|
|
168
|
+
arg_spec = inspect.getfullargspec(func).args
|
|
169
|
+
used_args = {arg_spec[i]: arg for i, arg in enumerate(args[: len(arg_spec)])}
|
|
170
|
+
used_args.update(kwargs)
|
|
171
|
+
values = [used_args[k] for k in keys if k in used_args]
|
|
172
|
+
if not values:
|
|
173
|
+
raise ValueError(f"Keys {keys} not found in function arguments")
|
|
174
|
+
return values
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _compute_cache_components(
|
|
178
|
+
func: Callable[..., Any],
|
|
179
|
+
args: tuple[Any, ...],
|
|
180
|
+
kwargs: dict[str, Any],
|
|
181
|
+
ignore_self: bool,
|
|
182
|
+
keys: Optional[list[str]],
|
|
183
|
+
key_fn: Optional[Callable[..., Any]],
|
|
184
|
+
):
|
|
185
|
+
"""
|
|
186
|
+
Return (func_source, sub_dir, key_id) for disk paths and memory keying.
|
|
187
|
+
- If key_fn provided, it determines the cache key content.
|
|
188
|
+
- Else if keys list provided, use those argument names.
|
|
189
|
+
- Else use full (args, kwargs), optionally ignoring 'self' for methods.
|
|
190
|
+
"""
|
|
191
|
+
func_source = get_source(func)
|
|
192
|
+
|
|
193
|
+
# Custom key function (most explicit & fastest when user knows what's important)
|
|
194
|
+
if key_fn is not None:
|
|
195
|
+
try:
|
|
196
|
+
custom_val = key_fn(*args, **kwargs)
|
|
197
|
+
except Exception as e:
|
|
198
|
+
raise ValueError(f"key function for {func.__name__} raised: {e}") from e
|
|
199
|
+
sub_dir = "custom"
|
|
200
|
+
key_id = f"{identify(custom_val)}.pkl"
|
|
201
|
+
return func_source, sub_dir, key_id
|
|
202
|
+
|
|
203
|
+
# Named keys (back-compat)
|
|
204
|
+
if keys:
|
|
205
|
+
values = _build_named_keys(func, args, kwargs, keys)
|
|
206
|
+
param_hash = identify(values)
|
|
207
|
+
dir_path = f"{func.__name__}_{identify(func_source)}"
|
|
208
|
+
key_id = f"{'_'.join(keys)}_{param_hash}.pkl"
|
|
209
|
+
return func_source, dir_path, key_id
|
|
210
|
+
|
|
211
|
+
# Default: full argument identity (optionally ignoring 'self')
|
|
212
|
+
if (
|
|
213
|
+
inspect.getfullargspec(func).args
|
|
214
|
+
and inspect.getfullargspec(func).args[0] == "self"
|
|
215
|
+
and ignore_self
|
|
216
|
+
):
|
|
217
|
+
fid = (func_source, args[1:], kwargs)
|
|
218
|
+
else:
|
|
219
|
+
fid = (func_source, args, kwargs)
|
|
220
|
+
|
|
221
|
+
return func_source, "funcs", f"{identify(fid)}.pkl"
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _mem_cache_for(func: Callable[..., Any], size: int) -> cachetools.LRUCache:
|
|
225
|
+
"""Get or create a per-function LRU cache with the given size."""
|
|
226
|
+
# Keep a per-function cache to avoid cross-talk of maxsize across functions
|
|
227
|
+
with mem_lock:
|
|
228
|
+
cache = _MEM_CACHES.get(func)
|
|
229
|
+
if cache is None or cache.maxsize != size:
|
|
230
|
+
cache = cachetools.LRUCache(maxsize=size)
|
|
231
|
+
_MEM_CACHES[func] = cache
|
|
232
|
+
# Keep global symbol backwards-compatible internally
|
|
233
|
+
global LRU_MEM_CACHE
|
|
234
|
+
LRU_MEM_CACHE = cache
|
|
235
|
+
return cache
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# --------------------------------------------------------------------------------------
|
|
239
|
+
# Memory-only memoize (sync / async)
|
|
240
|
+
# --------------------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _memory_memoize(
|
|
244
|
+
func: Callable[P, R],
|
|
245
|
+
size: int,
|
|
246
|
+
keys: Optional[list[str]],
|
|
247
|
+
ignore_self: bool,
|
|
248
|
+
key_fn: Optional[Callable[..., Any]],
|
|
249
|
+
) -> Callable[P, R]:
|
|
250
|
+
mem_cache = _mem_cache_for(func, size)
|
|
251
|
+
|
|
252
|
+
@functools.wraps(func)
|
|
253
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
254
|
+
func_source, sub_dir, key_id = _compute_cache_components(
|
|
255
|
+
func, args, kwargs, ignore_self, keys, key_fn
|
|
256
|
+
)
|
|
257
|
+
name = identify((func_source, sub_dir, key_id))
|
|
258
|
+
|
|
259
|
+
with mem_lock:
|
|
260
|
+
if name in mem_cache:
|
|
261
|
+
return mem_cache[name] # type: ignore[return-value]
|
|
262
|
+
|
|
263
|
+
result = func(*args, **kwargs)
|
|
264
|
+
|
|
265
|
+
with mem_lock:
|
|
266
|
+
if name not in mem_cache:
|
|
267
|
+
mem_cache[name] = result # type: ignore[index]
|
|
268
|
+
return result
|
|
269
|
+
|
|
270
|
+
return wrapper
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _async_memory_memoize(
|
|
274
|
+
func: AsyncFunc[P, R],
|
|
275
|
+
size: int,
|
|
276
|
+
keys: Optional[list[str]],
|
|
277
|
+
ignore_self: bool,
|
|
278
|
+
key_fn: Optional[Callable[..., Any]],
|
|
279
|
+
) -> AsyncFunc[P, R]:
|
|
280
|
+
mem_cache = _mem_cache_for(func, size)
|
|
281
|
+
|
|
282
|
+
# Avoid duplicate in-flight computations for the same key
|
|
283
|
+
inflight: dict[str, asyncio.Task[R]] = {}
|
|
284
|
+
alock = asyncio.Lock()
|
|
285
|
+
|
|
286
|
+
@functools.wraps(func)
|
|
287
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
288
|
+
func_source, sub_dir, key_id = _compute_cache_components(
|
|
289
|
+
func, args, kwargs, ignore_self, keys, key_fn
|
|
290
|
+
)
|
|
291
|
+
name = identify((func_source, sub_dir, key_id))
|
|
292
|
+
|
|
293
|
+
async with alock:
|
|
294
|
+
if name in mem_cache:
|
|
295
|
+
return mem_cache[name] # type: ignore[return-value]
|
|
296
|
+
task = inflight.get(name)
|
|
297
|
+
if task is None:
|
|
298
|
+
task = asyncio.create_task(func(*args, **kwargs)) # type: ignore[arg-type]
|
|
299
|
+
inflight[name] = task
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
result = await task
|
|
303
|
+
finally:
|
|
304
|
+
async with alock:
|
|
305
|
+
inflight.pop(name, None)
|
|
306
|
+
|
|
307
|
+
with mem_lock:
|
|
308
|
+
mem_cache[name] = result # type: ignore[index]
|
|
309
|
+
return result
|
|
310
|
+
|
|
311
|
+
return wrapper
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
# --------------------------------------------------------------------------------------
|
|
315
|
+
# Disk-only memoize (sync / async)
|
|
316
|
+
# --------------------------------------------------------------------------------------
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _disk_memoize(
|
|
320
|
+
func: Callable[P, R],
|
|
321
|
+
keys: Optional[list[str]],
|
|
322
|
+
cache_dir: str,
|
|
323
|
+
ignore_self: bool,
|
|
324
|
+
verbose: bool,
|
|
325
|
+
key_fn: Optional[Callable[..., Any]],
|
|
326
|
+
) -> Callable[P, R]:
|
|
327
|
+
@functools.wraps(func)
|
|
328
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
329
|
+
try:
|
|
330
|
+
func_source, sub_dir, key_id = _compute_cache_components(
|
|
331
|
+
func, args, kwargs, ignore_self, keys, key_fn
|
|
332
|
+
)
|
|
333
|
+
if sub_dir == "funcs":
|
|
334
|
+
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
335
|
+
else:
|
|
336
|
+
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
337
|
+
mkdir_or_exist(osp.dirname(cache_path))
|
|
338
|
+
|
|
339
|
+
with disk_lock:
|
|
340
|
+
if osp.exists(cache_path):
|
|
341
|
+
try:
|
|
342
|
+
return load_json_or_pickle(cache_path)
|
|
343
|
+
except Exception as e:
|
|
344
|
+
if osp.exists(cache_path):
|
|
345
|
+
os.remove(cache_path)
|
|
346
|
+
if verbose:
|
|
347
|
+
logger.opt(depth=1).warning(
|
|
348
|
+
f"Error loading cache: {str(e)[:100]}, recomputing"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
result = func(*args, **kwargs)
|
|
352
|
+
|
|
353
|
+
with disk_lock:
|
|
354
|
+
if not osp.exists(cache_path):
|
|
355
|
+
dump_json_or_pickle(result, cache_path)
|
|
356
|
+
return result
|
|
357
|
+
except Exception as e:
|
|
358
|
+
if verbose:
|
|
359
|
+
logger.opt(depth=1).warning(
|
|
360
|
+
f"Failed to cache {func.__name__}: {e}, executing without cache"
|
|
361
|
+
)
|
|
362
|
+
return func(*args, **kwargs)
|
|
363
|
+
|
|
364
|
+
return wrapper
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _async_disk_memoize(
|
|
368
|
+
func: AsyncFunc[P, R],
|
|
369
|
+
keys: Optional[list[str]],
|
|
370
|
+
cache_dir: str,
|
|
371
|
+
ignore_self: bool,
|
|
372
|
+
verbose: bool,
|
|
373
|
+
key_fn: Optional[Callable[..., Any]],
|
|
374
|
+
) -> AsyncFunc[P, R]:
|
|
375
|
+
@functools.wraps(func)
|
|
376
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
377
|
+
try:
|
|
378
|
+
func_source, sub_dir, key_id = _compute_cache_components(
|
|
379
|
+
func, args, kwargs, ignore_self, keys, key_fn
|
|
380
|
+
)
|
|
381
|
+
if sub_dir == "funcs":
|
|
382
|
+
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
383
|
+
else:
|
|
384
|
+
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
385
|
+
mkdir_or_exist(osp.dirname(cache_path))
|
|
386
|
+
|
|
387
|
+
def check_cache() -> Optional[R]:
|
|
388
|
+
with disk_lock:
|
|
389
|
+
if osp.exists(cache_path):
|
|
390
|
+
try:
|
|
391
|
+
return load_json_or_pickle(cache_path)
|
|
392
|
+
except Exception as e:
|
|
393
|
+
if osp.exists(cache_path):
|
|
394
|
+
os.remove(cache_path)
|
|
395
|
+
if verbose:
|
|
396
|
+
logger.opt(depth=1).warning(
|
|
397
|
+
f"Error loading cache: {str(e)[:100]}, recomputing"
|
|
398
|
+
)
|
|
399
|
+
return None
|
|
400
|
+
|
|
401
|
+
loop = asyncio.get_event_loop()
|
|
402
|
+
cached_result = await loop.run_in_executor(None, check_cache)
|
|
403
|
+
if cached_result is not None:
|
|
404
|
+
return cached_result
|
|
405
|
+
|
|
406
|
+
result = await func(*args, **kwargs)
|
|
407
|
+
|
|
408
|
+
def write_cache() -> None:
|
|
409
|
+
with disk_lock:
|
|
410
|
+
if not osp.exists(cache_path):
|
|
411
|
+
dump_json_or_pickle(result, cache_path)
|
|
412
|
+
|
|
413
|
+
await loop.run_in_executor(None, write_cache)
|
|
414
|
+
return result
|
|
415
|
+
except Exception as e:
|
|
416
|
+
if verbose:
|
|
417
|
+
logger.opt(depth=1).warning(
|
|
418
|
+
f"Failed to cache {func.__name__}: {e}, executing without cache"
|
|
419
|
+
)
|
|
420
|
+
return await func(*args, **kwargs)
|
|
421
|
+
|
|
422
|
+
return wrapper
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# --------------------------------------------------------------------------------------
|
|
426
|
+
# Memory+Disk (sync / async)
|
|
427
|
+
# --------------------------------------------------------------------------------------
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def both_memoize(
|
|
431
|
+
func: Callable[P, R],
|
|
432
|
+
keys: Optional[list[str]],
|
|
433
|
+
cache_dir: str,
|
|
434
|
+
ignore_self: bool,
|
|
435
|
+
size: int,
|
|
436
|
+
key_fn: Optional[Callable[..., Any]],
|
|
437
|
+
) -> Callable[P, R]:
|
|
438
|
+
mem_cache = _mem_cache_for(func, size)
|
|
439
|
+
|
|
440
|
+
@functools.wraps(func)
|
|
441
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
442
|
+
func_source, sub_dir, key_id = _compute_cache_components(
|
|
443
|
+
func, args, kwargs, ignore_self, keys, key_fn
|
|
444
|
+
)
|
|
445
|
+
mem_key = identify((func_source, sub_dir, key_id))
|
|
446
|
+
|
|
447
|
+
# Memory first
|
|
448
|
+
with mem_lock:
|
|
449
|
+
if mem_key in mem_cache:
|
|
450
|
+
return mem_cache[mem_key] # type: ignore[return-value]
|
|
451
|
+
|
|
452
|
+
# Disk next
|
|
453
|
+
if sub_dir == "funcs":
|
|
454
|
+
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
455
|
+
else:
|
|
456
|
+
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
457
|
+
mkdir_or_exist(osp.dirname(cache_path))
|
|
458
|
+
|
|
459
|
+
disk_result: Optional[R] = None
|
|
460
|
+
with disk_lock:
|
|
461
|
+
if osp.exists(cache_path):
|
|
462
|
+
disk_result = load_json_or_pickle(cache_path)
|
|
463
|
+
|
|
464
|
+
if disk_result is not None:
|
|
465
|
+
with mem_lock:
|
|
466
|
+
mem_cache[mem_key] = disk_result # type: ignore[index]
|
|
467
|
+
return disk_result
|
|
468
|
+
|
|
469
|
+
# Miss: compute, then write both
|
|
470
|
+
result = func(*args, **kwargs)
|
|
471
|
+
with disk_lock:
|
|
472
|
+
if not osp.exists(cache_path):
|
|
473
|
+
dump_json_or_pickle(result, cache_path)
|
|
474
|
+
with mem_lock:
|
|
475
|
+
mem_cache[mem_key] = result # type: ignore[index]
|
|
476
|
+
return result
|
|
477
|
+
|
|
478
|
+
return wrapper
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def _async_both_memoize(
|
|
482
|
+
func: AsyncFunc[P, R],
|
|
483
|
+
keys: Optional[list[str]],
|
|
484
|
+
cache_dir: str,
|
|
485
|
+
ignore_self: bool,
|
|
486
|
+
size: int,
|
|
487
|
+
key_fn: Optional[Callable[..., Any]],
|
|
488
|
+
) -> AsyncFunc[P, R]:
|
|
489
|
+
mem_cache = _mem_cache_for(func, size)
|
|
490
|
+
|
|
491
|
+
inflight: dict[str, asyncio.Task[R]] = {}
|
|
492
|
+
alock = asyncio.Lock()
|
|
493
|
+
|
|
494
|
+
@functools.wraps(func)
|
|
495
|
+
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
496
|
+
func_source, sub_dir, key_id = _compute_cache_components(
|
|
497
|
+
func, args, kwargs, ignore_self, keys, key_fn
|
|
498
|
+
)
|
|
499
|
+
mem_key = identify((func_source, sub_dir, key_id))
|
|
500
|
+
|
|
501
|
+
# Memory
|
|
502
|
+
async with alock:
|
|
503
|
+
if mem_key in mem_cache:
|
|
504
|
+
return mem_cache[mem_key] # type: ignore[return-value]
|
|
505
|
+
|
|
506
|
+
# Disk
|
|
507
|
+
if sub_dir == "funcs":
|
|
508
|
+
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
509
|
+
else:
|
|
510
|
+
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
511
|
+
mkdir_or_exist(osp.dirname(cache_path))
|
|
512
|
+
|
|
513
|
+
def check_disk_cache() -> Optional[R]:
|
|
514
|
+
with disk_lock:
|
|
515
|
+
if osp.exists(cache_path):
|
|
516
|
+
return load_json_or_pickle(cache_path)
|
|
517
|
+
return None
|
|
518
|
+
|
|
519
|
+
loop = asyncio.get_event_loop()
|
|
520
|
+
disk_result = await loop.run_in_executor(None, check_disk_cache)
|
|
521
|
+
|
|
522
|
+
if disk_result is not None:
|
|
523
|
+
with mem_lock:
|
|
524
|
+
mem_cache[mem_key] = disk_result # type: ignore[index]
|
|
525
|
+
return disk_result
|
|
526
|
+
|
|
527
|
+
# Avoid duplicate async work for same key
|
|
528
|
+
async with alock:
|
|
529
|
+
task = inflight.get(mem_key)
|
|
530
|
+
if task is None:
|
|
531
|
+
task = asyncio.create_task(func(*args, **kwargs)) # type: ignore[arg-type]
|
|
532
|
+
inflight[mem_key] = task
|
|
533
|
+
|
|
534
|
+
try:
|
|
535
|
+
result = await task
|
|
536
|
+
finally:
|
|
537
|
+
async with alock:
|
|
538
|
+
inflight.pop(mem_key, None)
|
|
539
|
+
|
|
540
|
+
def write_disk_cache() -> None:
|
|
541
|
+
with disk_lock:
|
|
542
|
+
if not osp.exists(cache_path):
|
|
543
|
+
dump_json_or_pickle(result, cache_path)
|
|
544
|
+
|
|
545
|
+
await loop.run_in_executor(None, write_disk_cache)
|
|
546
|
+
|
|
547
|
+
with mem_lock:
|
|
548
|
+
mem_cache[mem_key] = result # type: ignore[index]
|
|
549
|
+
return result
|
|
550
|
+
|
|
551
|
+
return wrapper
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
# --------------------------------------------------------------------------------------
|
|
555
|
+
# Public decorator (only export memoize)
|
|
556
|
+
# --------------------------------------------------------------------------------------
|
|
557
|
+
|
|
558
|
+
@overload
|
|
559
|
+
def memoize(
|
|
560
|
+
_func: Callable[P, R],
|
|
561
|
+
*,
|
|
562
|
+
keys: Optional[list[str]] = ...,
|
|
563
|
+
key: Optional[Callable[..., Any]] = ...,
|
|
564
|
+
cache_dir: str = ...,
|
|
565
|
+
cache_type: Literal["memory", "disk", "both"] = ...,
|
|
566
|
+
size: int = ...,
|
|
567
|
+
ignore_self: bool = ...,
|
|
568
|
+
verbose: bool = ...,
|
|
569
|
+
) -> Callable[P, R]: ...
|
|
570
|
+
@overload
|
|
571
|
+
def memoize(
|
|
572
|
+
_func: Callable[P, Awaitable[R]],
|
|
573
|
+
*,
|
|
574
|
+
keys: Optional[list[str]] = ...,
|
|
575
|
+
key: Optional[Callable[..., Any]] = ...,
|
|
576
|
+
cache_dir: str = ...,
|
|
577
|
+
cache_type: Literal["memory", "disk", "both"] = ...,
|
|
578
|
+
size: int = ...,
|
|
579
|
+
ignore_self: bool = ...,
|
|
580
|
+
verbose: bool = ...,
|
|
581
|
+
) -> Callable[P, Awaitable[R]]: ...
|
|
582
|
+
@overload
|
|
583
|
+
def memoize(
|
|
584
|
+
_func: None = ...,
|
|
585
|
+
*,
|
|
586
|
+
keys: Optional[list[str]] = ...,
|
|
587
|
+
key: Optional[Callable[..., Any]] = ...,
|
|
588
|
+
cache_dir: str = ...,
|
|
589
|
+
cache_type: Literal["memory", "disk", "both"] = ...,
|
|
590
|
+
size: int = ...,
|
|
591
|
+
ignore_self: bool = ...,
|
|
592
|
+
verbose: bool = ...,
|
|
593
|
+
) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def memoize(
|
|
597
|
+
_func: Optional[Callable[P, Any]] = None,
|
|
598
|
+
*,
|
|
599
|
+
keys: Optional[list[str]] = None,
|
|
600
|
+
key: Optional[Callable[..., Any]] = None,
|
|
601
|
+
cache_dir: str = SPEED_CACHE_DIR,
|
|
602
|
+
cache_type: Literal["memory", "disk", "both"] = "both",
|
|
603
|
+
size: int = 256,
|
|
604
|
+
ignore_self: bool = True,
|
|
605
|
+
verbose: bool = False,
|
|
606
|
+
):
|
|
607
|
+
"""
|
|
608
|
+
Universal memoizer that supports sync and async functions, preserves annotations
|
|
609
|
+
for Pylance via ParamSpec/TypeVar, and caches in memory + disk by default.
|
|
610
|
+
|
|
611
|
+
- keys: list of argument names to include in key (back-compat).
|
|
612
|
+
- key: custom callable (*args, **kwargs) -> hashable/serializable object for keying.
|
|
613
|
+
Prefer this for performance on big inputs (e.g., key=lambda x: x.id).
|
|
614
|
+
- cache_dir: disk cache base directory (unlimited size).
|
|
615
|
+
- cache_type: "memory" | "disk" | "both" (default "both").
|
|
616
|
+
- size: memory LRU size per-function (default 256 items).
|
|
617
|
+
- ignore_self: ignore 'self' when building the default key for bound methods.
|
|
618
|
+
- verbose: enable warnings on cache load/write errors.
|
|
619
|
+
"""
|
|
620
|
+
if "~/" in cache_dir:
|
|
621
|
+
cache_dir = osp.expanduser(cache_dir)
|
|
622
|
+
|
|
623
|
+
def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
|
|
624
|
+
is_async = inspect.iscoroutinefunction(func)
|
|
625
|
+
|
|
626
|
+
if cache_type == "memory":
|
|
627
|
+
if is_async:
|
|
628
|
+
return _async_memory_memoize(func, size, keys, ignore_self, key) # type: ignore[return-value]
|
|
629
|
+
return _memory_memoize(func, size, keys, ignore_self, key) # type: ignore[return-value]
|
|
630
|
+
|
|
631
|
+
if cache_type == "disk":
|
|
632
|
+
if is_async:
|
|
633
|
+
return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose, key) # type: ignore[return-value]
|
|
634
|
+
return _disk_memoize(func, keys, cache_dir, ignore_self, verbose, key) # type: ignore[return-value]
|
|
635
|
+
|
|
636
|
+
# cache_type == "both"
|
|
637
|
+
if is_async:
|
|
638
|
+
return _async_both_memoize(func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
|
|
639
|
+
return both_memoize(func, keys, cache_dir, ignore_self, size, key) # type: ignore[return-value]
|
|
640
|
+
|
|
641
|
+
# Support both @memoize and @memoize(...)
|
|
642
|
+
if _func is None:
|
|
643
|
+
return decorator
|
|
644
|
+
else:
|
|
645
|
+
return decorator(_func)
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
__all__ = ["memoize", "identify"]
|
|
@@ -1,494 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import functools
|
|
3
|
-
import inspect
|
|
4
|
-
import json
|
|
5
|
-
import os
|
|
6
|
-
import os.path as osp
|
|
7
|
-
import pickle
|
|
8
|
-
import uuid
|
|
9
|
-
from threading import Lock
|
|
10
|
-
from typing import Any, Awaitable, Callable, Literal, TypeVar
|
|
11
|
-
|
|
12
|
-
import cachetools
|
|
13
|
-
import pandas as pd
|
|
14
|
-
import xxhash
|
|
15
|
-
from loguru import logger
|
|
16
|
-
from pydantic import BaseModel
|
|
17
|
-
|
|
18
|
-
from speedy_utils.common.utils_io import dump_json_or_pickle, load_json_or_pickle
|
|
19
|
-
from speedy_utils.common.utils_misc import mkdir_or_exist
|
|
20
|
-
|
|
21
|
-
SPEED_CACHE_DIR = osp.join(osp.expanduser("~"), ".cache/speedy_cache")
|
|
22
|
-
LRU_MEM_CACHE = cachetools.LRUCache(maxsize=128_000)
|
|
23
|
-
|
|
24
|
-
thread_locker = Lock()
|
|
25
|
-
|
|
26
|
-
# Add two locks for thread-safe cache access
|
|
27
|
-
disk_lock = Lock()
|
|
28
|
-
mem_lock = Lock()
|
|
29
|
-
|
|
30
|
-
# Add async-specific types
|
|
31
|
-
T = TypeVar('T')
|
|
32
|
-
AsyncFunc = Callable[..., Awaitable[T]]
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def compute_func_id(func, args, kwargs, ignore_self, keys):
|
|
36
|
-
func_source = get_source(func)
|
|
37
|
-
if keys:
|
|
38
|
-
arg_spec = inspect.getfullargspec(func).args
|
|
39
|
-
used_args = {arg_spec[i]: arg for i, arg in enumerate(args)}
|
|
40
|
-
used_args.update(kwargs)
|
|
41
|
-
values = [used_args[k] for k in keys if k in used_args]
|
|
42
|
-
if not values:
|
|
43
|
-
raise ValueError(f"Keys {keys} not found in function arguments")
|
|
44
|
-
param_hash = identify(values)
|
|
45
|
-
dir_path = f"{func.__name__}_{identify(func_source)}"
|
|
46
|
-
key_id = f"{'_'.join(keys)}_{param_hash}.pkl"
|
|
47
|
-
return func_source, dir_path, key_id
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
inspect.getfullargspec(func).args
|
|
51
|
-
and inspect.getfullargspec(func).args[0] == "self"
|
|
52
|
-
and ignore_self
|
|
53
|
-
):
|
|
54
|
-
fid = (func_source, args[1:], kwargs)
|
|
55
|
-
else:
|
|
56
|
-
fid = (func_source, args, kwargs)
|
|
57
|
-
return func_source, "funcs", f"{identify(fid)}.pkl"
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def fast_serialize(x: Any) -> bytes:
|
|
61
|
-
try:
|
|
62
|
-
return json.dumps(x, sort_keys=True).encode("utf-8")
|
|
63
|
-
except (TypeError, ValueError):
|
|
64
|
-
return pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def identify(obj: Any, depth=0, max_depth=2) -> str:
|
|
68
|
-
if isinstance(obj, (list, tuple)):
|
|
69
|
-
x = [identify(x, depth + 1, max_depth) for x in obj]
|
|
70
|
-
x = "\n".join(x)
|
|
71
|
-
return identify(x, depth + 1, max_depth)
|
|
72
|
-
# is pandas row or dict
|
|
73
|
-
elif isinstance(obj, (pd.DataFrame, pd.Series)):
|
|
74
|
-
x = str(obj.to_dict())
|
|
75
|
-
return identify(x, depth + 1, max_depth)
|
|
76
|
-
elif hasattr(obj, "__code__"):
|
|
77
|
-
return identify(get_source(obj), depth + 1, max_depth)
|
|
78
|
-
elif isinstance(obj, BaseModel):
|
|
79
|
-
obj = obj.model_dump()
|
|
80
|
-
return identify(obj, depth + 1, max_depth)
|
|
81
|
-
elif isinstance(obj, dict):
|
|
82
|
-
ks = sorted(obj.keys())
|
|
83
|
-
vs = [identify(obj[k], depth + 1, max_depth) for k in ks]
|
|
84
|
-
return identify([ks, vs], depth + 1, max_depth)
|
|
85
|
-
elif obj is None:
|
|
86
|
-
return identify("None", depth + 1, max_depth)
|
|
87
|
-
else:
|
|
88
|
-
# primitive_types = [int, float, str, bool]
|
|
89
|
-
# if not type(obj) in primitive_types:
|
|
90
|
-
# logger.warning(f"Unknown type: {type(obj)}")
|
|
91
|
-
return xxhash.xxh64_hexdigest(fast_serialize(obj), seed=0)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def identify_uuid(x: Any) -> str:
|
|
95
|
-
data = fast_serialize(x)
|
|
96
|
-
hash_obj = xxhash.xxh128(data, seed=0)
|
|
97
|
-
return str(uuid.UUID(bytes=hash_obj.digest()))
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def get_source(func):
|
|
101
|
-
code = inspect.getsource(func)
|
|
102
|
-
for r in [" ", "\n", "\t", "\r"]:
|
|
103
|
-
code = code.replace(r, "")
|
|
104
|
-
return code
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def _disk_memoize(func, keys, cache_dir, ignore_self, verbose):
|
|
108
|
-
@functools.wraps(func)
|
|
109
|
-
def wrapper(*args, **kwargs):
|
|
110
|
-
try:
|
|
111
|
-
# Compute cache path as before
|
|
112
|
-
func_source, sub_dir, key_id = compute_func_id(
|
|
113
|
-
func, args, kwargs, ignore_self, keys
|
|
114
|
-
)
|
|
115
|
-
if func_source is None:
|
|
116
|
-
return func(*args, **kwargs)
|
|
117
|
-
if sub_dir == "funcs":
|
|
118
|
-
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
119
|
-
else:
|
|
120
|
-
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
121
|
-
mkdir_or_exist(osp.dirname(cache_path))
|
|
122
|
-
|
|
123
|
-
# First check with disk lock
|
|
124
|
-
with disk_lock:
|
|
125
|
-
if osp.exists(cache_path):
|
|
126
|
-
# logger.debug(f"Cache HIT for {func.__name__}, key={cache_path}")
|
|
127
|
-
try:
|
|
128
|
-
return load_json_or_pickle(cache_path)
|
|
129
|
-
except Exception as e:
|
|
130
|
-
if osp.exists(cache_path):
|
|
131
|
-
os.remove(cache_path)
|
|
132
|
-
logger.opt(depth=1).warning(
|
|
133
|
-
f"Error loading cache: {str(e)[:100]}, continue to recompute"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
result = func(*args, **kwargs)
|
|
137
|
-
|
|
138
|
-
# Write result under disk lock to avoid race conditions
|
|
139
|
-
with disk_lock:
|
|
140
|
-
if not osp.exists(cache_path):
|
|
141
|
-
dump_json_or_pickle(result, cache_path)
|
|
142
|
-
return result
|
|
143
|
-
except Exception as e:
|
|
144
|
-
logger.opt(depth=1).warning(
|
|
145
|
-
f"Failed to cache {func.__name__}: {e}, continue to recompute without cache"
|
|
146
|
-
)
|
|
147
|
-
return func(*args, **kwargs)
|
|
148
|
-
|
|
149
|
-
return wrapper
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose):
|
|
153
|
-
@functools.wraps(func)
|
|
154
|
-
async def wrapper(*args, **kwargs):
|
|
155
|
-
try:
|
|
156
|
-
# Compute cache path as before
|
|
157
|
-
func_source, sub_dir, key_id = compute_func_id(
|
|
158
|
-
func, args, kwargs, ignore_self, keys
|
|
159
|
-
)
|
|
160
|
-
if func_source is None:
|
|
161
|
-
return await func(*args, **kwargs)
|
|
162
|
-
if sub_dir == "funcs":
|
|
163
|
-
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
164
|
-
else:
|
|
165
|
-
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
166
|
-
mkdir_or_exist(osp.dirname(cache_path))
|
|
167
|
-
|
|
168
|
-
# First check with disk lock (run in thread to avoid blocking)
|
|
169
|
-
def check_cache():
|
|
170
|
-
with disk_lock:
|
|
171
|
-
if osp.exists(cache_path):
|
|
172
|
-
try:
|
|
173
|
-
return load_json_or_pickle(cache_path)
|
|
174
|
-
except Exception as e:
|
|
175
|
-
if osp.exists(cache_path):
|
|
176
|
-
os.remove(cache_path)
|
|
177
|
-
logger.opt(depth=1).warning(
|
|
178
|
-
f"Error loading cache: {str(e)[:100]}, continue to recompute"
|
|
179
|
-
)
|
|
180
|
-
return None
|
|
181
|
-
|
|
182
|
-
# Run cache check in thread pool to avoid blocking
|
|
183
|
-
loop = asyncio.get_event_loop()
|
|
184
|
-
cached_result = await loop.run_in_executor(None, check_cache)
|
|
185
|
-
if cached_result is not None:
|
|
186
|
-
return cached_result
|
|
187
|
-
|
|
188
|
-
result = await func(*args, **kwargs)
|
|
189
|
-
|
|
190
|
-
# Write result under disk lock (run in thread to avoid blocking)
|
|
191
|
-
def write_cache():
|
|
192
|
-
with disk_lock:
|
|
193
|
-
if not osp.exists(cache_path):
|
|
194
|
-
dump_json_or_pickle(result, cache_path)
|
|
195
|
-
|
|
196
|
-
await loop.run_in_executor(None, write_cache)
|
|
197
|
-
return result
|
|
198
|
-
except Exception as e:
|
|
199
|
-
logger.opt(depth=1).warning(
|
|
200
|
-
f"Failed to cache {func.__name__}: {e}, continue to recompute without cache"
|
|
201
|
-
)
|
|
202
|
-
return await func(*args, **kwargs)
|
|
203
|
-
|
|
204
|
-
return wrapper
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def _memory_memoize(func, size, keys, ignore_self):
|
|
208
|
-
global LRU_MEM_CACHE
|
|
209
|
-
if LRU_MEM_CACHE.maxsize != size:
|
|
210
|
-
LRU_MEM_CACHE = cachetools.LRUCache(maxsize=size)
|
|
211
|
-
|
|
212
|
-
@functools.wraps(func)
|
|
213
|
-
def wrapper(*args, **kwargs):
|
|
214
|
-
func_source, sub_dir, key_id = compute_func_id(
|
|
215
|
-
func, args, kwargs, ignore_self, keys
|
|
216
|
-
)
|
|
217
|
-
if func_source is None:
|
|
218
|
-
return func(*args, **kwargs)
|
|
219
|
-
name = identify((func_source, sub_dir, key_id))
|
|
220
|
-
|
|
221
|
-
if not hasattr(func, "_mem_cache"):
|
|
222
|
-
func._mem_cache = LRU_MEM_CACHE
|
|
223
|
-
|
|
224
|
-
with mem_lock:
|
|
225
|
-
if name in func._mem_cache:
|
|
226
|
-
# logger.debug(f"Cache HIT (memory) for {func.__name__}, key={name}")
|
|
227
|
-
return func._mem_cache[name]
|
|
228
|
-
|
|
229
|
-
result = func(*args, **kwargs)
|
|
230
|
-
|
|
231
|
-
with mem_lock:
|
|
232
|
-
if name not in func._mem_cache:
|
|
233
|
-
func._mem_cache[name] = result
|
|
234
|
-
return result
|
|
235
|
-
|
|
236
|
-
return wrapper
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def _async_memory_memoize(func, size, keys, ignore_self):
|
|
240
|
-
global LRU_MEM_CACHE
|
|
241
|
-
if LRU_MEM_CACHE.maxsize != size:
|
|
242
|
-
LRU_MEM_CACHE = cachetools.LRUCache(maxsize=size)
|
|
243
|
-
|
|
244
|
-
@functools.wraps(func)
|
|
245
|
-
async def wrapper(*args, **kwargs):
|
|
246
|
-
func_source, sub_dir, key_id = compute_func_id(
|
|
247
|
-
func, args, kwargs, ignore_self, keys
|
|
248
|
-
)
|
|
249
|
-
if func_source is None:
|
|
250
|
-
return await func(*args, **kwargs)
|
|
251
|
-
name = identify((func_source, sub_dir, key_id))
|
|
252
|
-
|
|
253
|
-
if not hasattr(func, "_mem_cache"):
|
|
254
|
-
func._mem_cache = LRU_MEM_CACHE
|
|
255
|
-
|
|
256
|
-
with mem_lock:
|
|
257
|
-
if name in func._mem_cache:
|
|
258
|
-
# logger.debug(f"Cache HIT (memory) for {func.__name__}, key={name}")
|
|
259
|
-
return func._mem_cache[name]
|
|
260
|
-
|
|
261
|
-
result = await func(*args, **kwargs)
|
|
262
|
-
|
|
263
|
-
with mem_lock:
|
|
264
|
-
if name not in func._mem_cache:
|
|
265
|
-
func._mem_cache[name] = result
|
|
266
|
-
return result
|
|
267
|
-
|
|
268
|
-
return wrapper
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
def both_memoize(func, keys, cache_dir, ignore_self):
|
|
272
|
-
@functools.wraps(func)
|
|
273
|
-
def wrapper(*args, **kwargs):
|
|
274
|
-
func_source, sub_dir, key_id = compute_func_id(
|
|
275
|
-
func, args, kwargs, ignore_self, keys
|
|
276
|
-
)
|
|
277
|
-
if func_source is None:
|
|
278
|
-
return func(*args, **kwargs)
|
|
279
|
-
|
|
280
|
-
mem_key = identify((func_source, sub_dir, key_id))
|
|
281
|
-
if not hasattr(func, "_mem_cache"):
|
|
282
|
-
func._mem_cache = LRU_MEM_CACHE
|
|
283
|
-
|
|
284
|
-
with mem_lock:
|
|
285
|
-
if mem_key in func._mem_cache:
|
|
286
|
-
# logger.debug(f"Cache HIT (memory) for {func.__name__}, key={mem_key}")
|
|
287
|
-
return func._mem_cache[mem_key]
|
|
288
|
-
|
|
289
|
-
if sub_dir == "funcs":
|
|
290
|
-
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
291
|
-
else:
|
|
292
|
-
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
293
|
-
mkdir_or_exist(osp.dirname(cache_path))
|
|
294
|
-
|
|
295
|
-
with disk_lock:
|
|
296
|
-
if osp.exists(cache_path):
|
|
297
|
-
# logger.debug(f"Cache HIT (disk) for {func.__name__}, key={cache_path}")
|
|
298
|
-
result = load_json_or_pickle(cache_path)
|
|
299
|
-
with mem_lock:
|
|
300
|
-
func._mem_cache[mem_key] = result
|
|
301
|
-
return result
|
|
302
|
-
# logger.debug(f"Cache MISS for {func.__name__}, key={cache_path}")
|
|
303
|
-
result = func(*args, **kwargs)
|
|
304
|
-
|
|
305
|
-
with disk_lock:
|
|
306
|
-
if not osp.exists(cache_path):
|
|
307
|
-
dump_json_or_pickle(result, cache_path)
|
|
308
|
-
with mem_lock:
|
|
309
|
-
func._mem_cache[mem_key] = result
|
|
310
|
-
return result
|
|
311
|
-
|
|
312
|
-
return wrapper
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
def _async_both_memoize(func, keys, cache_dir, ignore_self):
|
|
316
|
-
@functools.wraps(func)
|
|
317
|
-
async def wrapper(*args, **kwargs):
|
|
318
|
-
func_source, sub_dir, key_id = compute_func_id(
|
|
319
|
-
func, args, kwargs, ignore_self, keys
|
|
320
|
-
)
|
|
321
|
-
if func_source is None:
|
|
322
|
-
return await func(*args, **kwargs)
|
|
323
|
-
|
|
324
|
-
mem_key = identify((func_source, sub_dir, key_id))
|
|
325
|
-
if not hasattr(func, "_mem_cache"):
|
|
326
|
-
func._mem_cache = LRU_MEM_CACHE
|
|
327
|
-
|
|
328
|
-
with mem_lock:
|
|
329
|
-
if mem_key in func._mem_cache:
|
|
330
|
-
# logger.debug(f"Cache HIT (memory) for {func.__name__}, key={mem_key}")
|
|
331
|
-
return func._mem_cache[mem_key]
|
|
332
|
-
|
|
333
|
-
if sub_dir == "funcs":
|
|
334
|
-
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
335
|
-
else:
|
|
336
|
-
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
337
|
-
mkdir_or_exist(osp.dirname(cache_path))
|
|
338
|
-
|
|
339
|
-
# Check disk cache in thread pool to avoid blocking
|
|
340
|
-
def check_disk_cache():
|
|
341
|
-
with disk_lock:
|
|
342
|
-
if osp.exists(cache_path):
|
|
343
|
-
return load_json_or_pickle(cache_path)
|
|
344
|
-
return None
|
|
345
|
-
|
|
346
|
-
loop = asyncio.get_event_loop()
|
|
347
|
-
disk_result = await loop.run_in_executor(None, check_disk_cache)
|
|
348
|
-
|
|
349
|
-
if disk_result is not None:
|
|
350
|
-
with mem_lock:
|
|
351
|
-
func._mem_cache[mem_key] = disk_result
|
|
352
|
-
return disk_result
|
|
353
|
-
|
|
354
|
-
# logger.debug(f"Cache MISS for {func.__name__}, key={cache_path}")
|
|
355
|
-
result = await func(*args, **kwargs)
|
|
356
|
-
|
|
357
|
-
# Write to disk in thread pool to avoid blocking
|
|
358
|
-
def write_disk_cache():
|
|
359
|
-
with disk_lock:
|
|
360
|
-
if not osp.exists(cache_path):
|
|
361
|
-
dump_json_or_pickle(result, cache_path)
|
|
362
|
-
|
|
363
|
-
await loop.run_in_executor(None, write_disk_cache)
|
|
364
|
-
|
|
365
|
-
with mem_lock:
|
|
366
|
-
func._mem_cache[mem_key] = result
|
|
367
|
-
return result
|
|
368
|
-
|
|
369
|
-
return wrapper
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
def memoize(
|
|
373
|
-
_func=None,
|
|
374
|
-
*,
|
|
375
|
-
keys=None,
|
|
376
|
-
cache_dir=SPEED_CACHE_DIR,
|
|
377
|
-
cache_type: Literal["memory", "disk", "both"] = "disk",
|
|
378
|
-
size=10240,
|
|
379
|
-
ignore_self=True,
|
|
380
|
-
verbose=False,
|
|
381
|
-
):
|
|
382
|
-
if "~/" in cache_dir:
|
|
383
|
-
cache_dir = osp.expanduser(cache_dir)
|
|
384
|
-
|
|
385
|
-
def decorator(func):
|
|
386
|
-
# Check if function is async
|
|
387
|
-
is_async = inspect.iscoroutinefunction(func)
|
|
388
|
-
|
|
389
|
-
if cache_type == "memory":
|
|
390
|
-
if is_async:
|
|
391
|
-
return _async_memory_memoize(
|
|
392
|
-
func,
|
|
393
|
-
size,
|
|
394
|
-
keys,
|
|
395
|
-
ignore_self,
|
|
396
|
-
)
|
|
397
|
-
return _memory_memoize(
|
|
398
|
-
func,
|
|
399
|
-
size,
|
|
400
|
-
keys,
|
|
401
|
-
ignore_self,
|
|
402
|
-
)
|
|
403
|
-
elif cache_type == "disk":
|
|
404
|
-
if is_async:
|
|
405
|
-
return _async_disk_memoize(
|
|
406
|
-
func,
|
|
407
|
-
keys,
|
|
408
|
-
cache_dir,
|
|
409
|
-
ignore_self,
|
|
410
|
-
verbose,
|
|
411
|
-
)
|
|
412
|
-
return _disk_memoize(
|
|
413
|
-
func,
|
|
414
|
-
keys,
|
|
415
|
-
cache_dir,
|
|
416
|
-
ignore_self,
|
|
417
|
-
verbose,
|
|
418
|
-
)
|
|
419
|
-
|
|
420
|
-
# cache_type == "both"
|
|
421
|
-
if is_async:
|
|
422
|
-
return _async_both_memoize(
|
|
423
|
-
func,
|
|
424
|
-
keys,
|
|
425
|
-
cache_dir,
|
|
426
|
-
ignore_self,
|
|
427
|
-
)
|
|
428
|
-
return both_memoize(
|
|
429
|
-
func,
|
|
430
|
-
keys,
|
|
431
|
-
cache_dir,
|
|
432
|
-
verbose,
|
|
433
|
-
)
|
|
434
|
-
|
|
435
|
-
# Handle both @memoize and @memoize() usage patterns
|
|
436
|
-
if _func is None:
|
|
437
|
-
return decorator
|
|
438
|
-
else:
|
|
439
|
-
return decorator(_func)
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
def amemoize(
|
|
443
|
-
_func=None,
|
|
444
|
-
*,
|
|
445
|
-
keys: list[str] | None = None,
|
|
446
|
-
cache_dir: str = SPEED_CACHE_DIR,
|
|
447
|
-
cache_type: Literal["memory", "disk", "both"] = "disk",
|
|
448
|
-
size: int = 10240,
|
|
449
|
-
ignore_self: bool = True,
|
|
450
|
-
verbose: bool = False,
|
|
451
|
-
):
|
|
452
|
-
"""
|
|
453
|
-
Async-specific memoization decorator for coroutine functions.
|
|
454
|
-
|
|
455
|
-
Args:
|
|
456
|
-
_func: The async function to memoize (when used without parentheses)
|
|
457
|
-
keys: Specific argument keys to use for cache key generation
|
|
458
|
-
cache_dir: Directory for disk cache storage
|
|
459
|
-
cache_type: Type of caching - "memory", "disk", or "both"
|
|
460
|
-
size: Size of memory cache (for memory/both types)
|
|
461
|
-
ignore_self: Whether to ignore 'self' parameter in cache key
|
|
462
|
-
verbose: Enable verbose logging
|
|
463
|
-
|
|
464
|
-
Returns:
|
|
465
|
-
Decorated async function with memoization
|
|
466
|
-
|
|
467
|
-
Example:
|
|
468
|
-
@amemoize(cache_type="both")
|
|
469
|
-
async def my_async_func(x: int) -> str:
|
|
470
|
-
return str(x)
|
|
471
|
-
"""
|
|
472
|
-
if "~/" in cache_dir:
|
|
473
|
-
cache_dir = osp.expanduser(cache_dir)
|
|
474
|
-
|
|
475
|
-
def decorator(func):
|
|
476
|
-
# Ensure the function is actually async
|
|
477
|
-
if not inspect.iscoroutinefunction(func):
|
|
478
|
-
raise ValueError(f"amemoize can only be used with async functions. {func.__name__} is not async.")
|
|
479
|
-
|
|
480
|
-
if cache_type == "memory":
|
|
481
|
-
return _async_memory_memoize(func, size, keys, ignore_self)
|
|
482
|
-
elif cache_type == "disk":
|
|
483
|
-
return _async_disk_memoize(func, keys, cache_dir, ignore_self, verbose)
|
|
484
|
-
else: # cache_type == "both"
|
|
485
|
-
return _async_both_memoize(func, keys, cache_dir, ignore_self)
|
|
486
|
-
|
|
487
|
-
# Handle both @amemoize and @amemoize() usage patterns
|
|
488
|
-
if _func is None:
|
|
489
|
-
return decorator
|
|
490
|
-
else:
|
|
491
|
-
return decorator(_func)
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
__all__ = ["memoize", "identify", "identify_uuid", "amemoize"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{speedy_utils-1.1.9 → speedy_utils-1.1.10}/src/speedy_utils/scripts/openapi_client_codegen.py
RENAMED
|
File without changes
|