speedy-utils 1.1.27__py3-none-any.whl → 1.1.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_utils/__init__.py +16 -4
- llm_utils/chat_format/__init__.py +10 -10
- llm_utils/chat_format/display.py +33 -21
- llm_utils/chat_format/transform.py +17 -19
- llm_utils/chat_format/utils.py +6 -4
- llm_utils/group_messages.py +17 -14
- llm_utils/lm/__init__.py +6 -5
- llm_utils/lm/async_lm/__init__.py +1 -0
- llm_utils/lm/async_lm/_utils.py +10 -9
- llm_utils/lm/async_lm/async_llm_task.py +141 -137
- llm_utils/lm/async_lm/async_lm.py +48 -42
- llm_utils/lm/async_lm/async_lm_base.py +59 -60
- llm_utils/lm/async_lm/lm_specific.py +4 -3
- llm_utils/lm/base_prompt_builder.py +93 -70
- llm_utils/lm/llm.py +126 -108
- llm_utils/lm/llm_signature.py +4 -2
- llm_utils/lm/lm_base.py +72 -73
- llm_utils/lm/mixins.py +102 -62
- llm_utils/lm/openai_memoize.py +124 -87
- llm_utils/lm/signature.py +105 -92
- llm_utils/lm/utils.py +42 -23
- llm_utils/scripts/vllm_load_balancer.py +23 -30
- llm_utils/scripts/vllm_serve.py +8 -7
- llm_utils/vector_cache/__init__.py +9 -3
- llm_utils/vector_cache/cli.py +1 -1
- llm_utils/vector_cache/core.py +59 -63
- llm_utils/vector_cache/types.py +7 -5
- llm_utils/vector_cache/utils.py +12 -8
- speedy_utils/__imports.py +244 -0
- speedy_utils/__init__.py +90 -194
- speedy_utils/all.py +125 -227
- speedy_utils/common/clock.py +37 -42
- speedy_utils/common/function_decorator.py +6 -12
- speedy_utils/common/logger.py +43 -52
- speedy_utils/common/notebook_utils.py +13 -21
- speedy_utils/common/patcher.py +21 -17
- speedy_utils/common/report_manager.py +42 -44
- speedy_utils/common/utils_cache.py +152 -169
- speedy_utils/common/utils_io.py +137 -103
- speedy_utils/common/utils_misc.py +15 -21
- speedy_utils/common/utils_print.py +22 -28
- speedy_utils/multi_worker/process.py +66 -79
- speedy_utils/multi_worker/thread.py +78 -155
- speedy_utils/scripts/mpython.py +38 -36
- speedy_utils/scripts/openapi_client_codegen.py +10 -10
- {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/METADATA +1 -1
- speedy_utils-1.1.29.dist-info/RECORD +57 -0
- vision_utils/README.md +202 -0
- vision_utils/__init__.py +4 -0
- vision_utils/io_utils.py +735 -0
- vision_utils/plot.py +345 -0
- speedy_utils-1.1.27.dist-info/RECORD +0 -52
- {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/WHEEL +0 -0
- {speedy_utils-1.1.27.dist-info → speedy_utils-1.1.29.dist-info}/entry_points.txt +0 -0
|
@@ -1,35 +1,13 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
import os.path as osp
|
|
7
|
-
import pickle
|
|
8
|
-
import uuid
|
|
9
|
-
import weakref
|
|
10
|
-
from threading import Lock
|
|
11
|
-
from typing import Any, Awaitable, Callable, Literal, Optional, TypeVar, overload
|
|
12
|
-
|
|
13
|
-
try:
|
|
14
|
-
# Python 3.10+
|
|
15
|
-
from typing import ParamSpec
|
|
16
|
-
except ImportError: # pragma: no cover
|
|
17
|
-
from typing_extensions import ParamSpec # type: ignore
|
|
18
|
-
|
|
19
|
-
import cachetools
|
|
20
|
-
import pandas as pd
|
|
21
|
-
import xxhash
|
|
22
|
-
from loguru import logger
|
|
23
|
-
from pydantic import BaseModel
|
|
24
|
-
|
|
25
|
-
from speedy_utils.common.utils_io import dump_json_or_pickle, load_json_or_pickle
|
|
26
|
-
from speedy_utils.common.utils_misc import mkdir_or_exist
|
|
1
|
+
from ..__imports import *
|
|
2
|
+
from .utils_io import dump_json_or_pickle, load_json_or_pickle
|
|
3
|
+
from .utils_misc import mkdir_or_exist
|
|
4
|
+
|
|
27
5
|
|
|
28
6
|
# --------------------------------------------------------------------------------------
|
|
29
7
|
# Defaults / Globals
|
|
30
8
|
# --------------------------------------------------------------------------------------
|
|
31
9
|
|
|
32
|
-
SPEED_CACHE_DIR = osp.join(osp.expanduser(
|
|
10
|
+
SPEED_CACHE_DIR = osp.join(osp.expanduser('~'), '.cache/speedy_cache')
|
|
33
11
|
|
|
34
12
|
# Thread locks for safety
|
|
35
13
|
disk_lock = Lock()
|
|
@@ -37,10 +15,10 @@ mem_lock = Lock()
|
|
|
37
15
|
|
|
38
16
|
# Quick identifier cache for big objects that support weakref
|
|
39
17
|
# (prevents recomputing expensive keys for the same object instance)
|
|
40
|
-
_QUICK_ID_MAP:
|
|
18
|
+
_QUICK_ID_MAP: 'weakref.WeakKeyDictionary[Any, str]' = weakref.WeakKeyDictionary()
|
|
41
19
|
|
|
42
20
|
# Per-function memory caches (so different functions can have different LRU sizes)
|
|
43
|
-
_MEM_CACHES:
|
|
21
|
+
_MEM_CACHES: 'weakref.WeakKeyDictionary[Callable[..., Any], cachetools.LRUCache]' = (
|
|
44
22
|
weakref.WeakKeyDictionary()
|
|
45
23
|
)
|
|
46
24
|
|
|
@@ -51,8 +29,8 @@ _GLOBAL_MEMORY_CACHE: dict[str, Any] = {}
|
|
|
51
29
|
LRU_MEM_CACHE = cachetools.LRUCache(maxsize=256)
|
|
52
30
|
|
|
53
31
|
# Typing helpers
|
|
54
|
-
P = ParamSpec(
|
|
55
|
-
R = TypeVar(
|
|
32
|
+
P = ParamSpec('P')
|
|
33
|
+
R = TypeVar('R')
|
|
56
34
|
AsyncFunc = Callable[P, Awaitable[R]]
|
|
57
35
|
|
|
58
36
|
# --------------------------------------------------------------------------------------
|
|
@@ -63,7 +41,7 @@ AsyncFunc = Callable[P, Awaitable[R]]
|
|
|
63
41
|
def fast_serialize(x: Any) -> bytes:
|
|
64
42
|
"""Serialize x quickly; JSON if possible (stable), else pickle."""
|
|
65
43
|
try:
|
|
66
|
-
return json.dumps(x, sort_keys=True, default=str).encode(
|
|
44
|
+
return json.dumps(x, sort_keys=True, default=str).encode('utf-8')
|
|
67
45
|
except (TypeError, ValueError):
|
|
68
46
|
return pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
|
|
69
47
|
|
|
@@ -80,31 +58,28 @@ def get_source(func: Callable[..., Any]) -> str:
|
|
|
80
58
|
code = inspect.getsource(func)
|
|
81
59
|
except OSError:
|
|
82
60
|
# source not available (e.g., builtins, some C extensions)
|
|
83
|
-
mod = getattr(func,
|
|
84
|
-
qn = getattr(func,
|
|
85
|
-
code = f
|
|
61
|
+
mod = getattr(func, '__module__', 'unknown')
|
|
62
|
+
qn = getattr(func, '__qualname__', getattr(func, '__name__', 'unknown'))
|
|
63
|
+
code = f'{mod}.{qn}'
|
|
86
64
|
# normalize whitespace to make it stable
|
|
87
|
-
for r in (
|
|
88
|
-
code = code.replace(r,
|
|
65
|
+
for r in (' ', '\n', '\t', '\r'):
|
|
66
|
+
code = code.replace(r, '')
|
|
89
67
|
return code
|
|
90
68
|
|
|
91
69
|
|
|
92
|
-
def _try_get_quick_id(obj: Any) ->
|
|
70
|
+
def _try_get_quick_id(obj: Any) -> str | None:
|
|
93
71
|
"""Return a quick identifier if obj is weakref-able and cached."""
|
|
94
|
-
|
|
72
|
+
with contextlib.suppress(TypeError):
|
|
95
73
|
return _QUICK_ID_MAP.get(obj) # type: ignore[arg-type]
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
return None
|
|
74
|
+
# not weakref-able (e.g., list/dict); cannot use WeakKeyDictionary
|
|
75
|
+
return None
|
|
99
76
|
|
|
100
77
|
|
|
101
78
|
def _try_store_quick_id(obj: Any, ident: str) -> None:
|
|
102
79
|
"""Store quick identifier if obj is weakref-able."""
|
|
103
|
-
|
|
80
|
+
with contextlib.suppress(TypeError):
|
|
104
81
|
_QUICK_ID_MAP[obj] = ident # type: ignore[index]
|
|
105
|
-
|
|
106
|
-
# not weakref-able
|
|
107
|
-
pass
|
|
82
|
+
# not weakref-able
|
|
108
83
|
|
|
109
84
|
|
|
110
85
|
def identify(obj: Any, depth: int = 0, max_depth: int = 2) -> str:
|
|
@@ -120,45 +95,50 @@ def identify(obj: Any, depth: int = 0, max_depth: int = 2) -> str:
|
|
|
120
95
|
|
|
121
96
|
if isinstance(obj, (list, tuple)):
|
|
122
97
|
x = [identify(x, depth + 1, max_depth) for x in obj]
|
|
123
|
-
x =
|
|
98
|
+
x = '\n'.join(x)
|
|
124
99
|
out = identify(x, depth + 1, max_depth)
|
|
125
100
|
if depth == 0:
|
|
126
101
|
_try_store_quick_id(obj, out)
|
|
127
102
|
return out
|
|
128
|
-
|
|
103
|
+
if isinstance(obj, (pd.DataFrame, pd.Series)):
|
|
129
104
|
x = str(obj.to_dict())
|
|
130
105
|
out = identify(x, depth + 1, max_depth)
|
|
131
106
|
if depth == 0:
|
|
132
107
|
_try_store_quick_id(obj, out)
|
|
133
108
|
return out
|
|
134
|
-
|
|
109
|
+
if hasattr(obj, '__code__'):
|
|
135
110
|
out = identify(get_source(obj), depth + 1, max_depth)
|
|
136
111
|
if depth == 0:
|
|
137
112
|
_try_store_quick_id(obj, out)
|
|
138
113
|
return out
|
|
139
|
-
|
|
140
|
-
|
|
114
|
+
if isinstance(obj, BaseModel):
|
|
115
|
+
# Use hasattr for type checker compatibility
|
|
116
|
+
model_data = (
|
|
117
|
+
obj.model_dump() # type: ignore
|
|
118
|
+
if hasattr(obj, 'model_dump')
|
|
119
|
+
else obj.dict()
|
|
120
|
+
)
|
|
121
|
+
out = identify(model_data, depth + 1, max_depth)
|
|
141
122
|
if depth == 0:
|
|
142
123
|
_try_store_quick_id(obj, out)
|
|
143
124
|
return out
|
|
144
|
-
|
|
125
|
+
if isinstance(obj, dict):
|
|
145
126
|
ks = sorted(obj.keys())
|
|
146
127
|
vs = [identify(obj[k], depth + 1, max_depth) for k in ks]
|
|
147
128
|
out = identify([ks, vs], depth + 1, max_depth)
|
|
148
129
|
if depth == 0:
|
|
149
130
|
_try_store_quick_id(obj, out)
|
|
150
131
|
return out
|
|
151
|
-
|
|
152
|
-
out = identify(
|
|
153
|
-
if depth == 0:
|
|
154
|
-
_try_store_quick_id(obj, out)
|
|
155
|
-
return out
|
|
156
|
-
else:
|
|
157
|
-
# primitives / everything else
|
|
158
|
-
out = xxhash.xxh64_hexdigest(fast_serialize(obj), seed=0)
|
|
132
|
+
if obj is None:
|
|
133
|
+
out = identify('None', depth + 1, max_depth)
|
|
159
134
|
if depth == 0:
|
|
160
135
|
_try_store_quick_id(obj, out)
|
|
161
136
|
return out
|
|
137
|
+
# primitives / everything else
|
|
138
|
+
out = xxhash.xxh64_hexdigest(fast_serialize(obj), seed=0)
|
|
139
|
+
if depth == 0:
|
|
140
|
+
_try_store_quick_id(obj, out)
|
|
141
|
+
return out
|
|
162
142
|
|
|
163
143
|
|
|
164
144
|
def _build_named_keys(
|
|
@@ -173,7 +153,7 @@ def _build_named_keys(
|
|
|
173
153
|
used_args.update(kwargs)
|
|
174
154
|
values = [used_args[k] for k in keys if k in used_args]
|
|
175
155
|
if not values:
|
|
176
|
-
raise ValueError(f
|
|
156
|
+
raise ValueError(f'Keys {keys} not found in function arguments')
|
|
177
157
|
return values
|
|
178
158
|
|
|
179
159
|
|
|
@@ -182,8 +162,8 @@ def _compute_cache_components(
|
|
|
182
162
|
args: tuple[Any, ...],
|
|
183
163
|
kwargs: dict[str, Any],
|
|
184
164
|
ignore_self: bool,
|
|
185
|
-
keys:
|
|
186
|
-
key_fn:
|
|
165
|
+
keys: list[str] | None,
|
|
166
|
+
key_fn: Callable[..., Any] | None,
|
|
187
167
|
):
|
|
188
168
|
"""
|
|
189
169
|
Return (func_source, sub_dir, key_id) for disk paths and memory keying.
|
|
@@ -198,30 +178,30 @@ def _compute_cache_components(
|
|
|
198
178
|
try:
|
|
199
179
|
custom_val = key_fn(*args, **kwargs)
|
|
200
180
|
except Exception as e:
|
|
201
|
-
raise ValueError(f
|
|
202
|
-
sub_dir =
|
|
203
|
-
key_id = f
|
|
181
|
+
raise ValueError(f'key function for {func.__name__} raised: {e}') from e
|
|
182
|
+
sub_dir = 'custom'
|
|
183
|
+
key_id = f'{identify(custom_val)}.pkl'
|
|
204
184
|
return func_source, sub_dir, key_id
|
|
205
185
|
|
|
206
186
|
# Named keys (back-compat)
|
|
207
187
|
if keys:
|
|
208
188
|
values = _build_named_keys(func, args, kwargs, keys)
|
|
209
189
|
param_hash = identify(values)
|
|
210
|
-
dir_path = f
|
|
211
|
-
key_id = f"
|
|
190
|
+
dir_path = f'{func.__name__}_{identify(func_source)}'
|
|
191
|
+
key_id = f'{"_".join(keys)}_{param_hash}.pkl'
|
|
212
192
|
return func_source, dir_path, key_id
|
|
213
193
|
|
|
214
194
|
# Default: full argument identity (optionally ignoring 'self')
|
|
215
195
|
if (
|
|
216
196
|
inspect.getfullargspec(func).args
|
|
217
|
-
and inspect.getfullargspec(func).args[0] ==
|
|
197
|
+
and inspect.getfullargspec(func).args[0] == 'self'
|
|
218
198
|
and ignore_self
|
|
219
199
|
):
|
|
220
200
|
fid = (func_source, args[1:], kwargs)
|
|
221
201
|
else:
|
|
222
202
|
fid = (func_source, args, kwargs)
|
|
223
203
|
|
|
224
|
-
return func_source,
|
|
204
|
+
return func_source, 'funcs', f'{identify(fid)}.pkl'
|
|
225
205
|
|
|
226
206
|
|
|
227
207
|
def _mem_cache_for(func: Callable[..., Any], size: int) -> cachetools.LRUCache:
|
|
@@ -246,9 +226,9 @@ def _mem_cache_for(func: Callable[..., Any], size: int) -> cachetools.LRUCache:
|
|
|
246
226
|
def _memory_memoize(
|
|
247
227
|
func: Callable[P, R],
|
|
248
228
|
size: int,
|
|
249
|
-
keys:
|
|
229
|
+
keys: list[str] | None,
|
|
250
230
|
ignore_self: bool,
|
|
251
|
-
key_fn:
|
|
231
|
+
key_fn: Callable[..., Any] | None,
|
|
252
232
|
) -> Callable[P, R]:
|
|
253
233
|
mem_cache = _mem_cache_for(func, size)
|
|
254
234
|
|
|
@@ -276,9 +256,9 @@ def _memory_memoize(
|
|
|
276
256
|
def _async_memory_memoize(
|
|
277
257
|
func: AsyncFunc[P, R],
|
|
278
258
|
size: int,
|
|
279
|
-
keys:
|
|
259
|
+
keys: list[str] | None,
|
|
280
260
|
ignore_self: bool,
|
|
281
|
-
key_fn:
|
|
261
|
+
key_fn: Callable[..., Any] | None,
|
|
282
262
|
) -> AsyncFunc[P, R]:
|
|
283
263
|
mem_cache = _mem_cache_for(func, size)
|
|
284
264
|
|
|
@@ -321,11 +301,11 @@ def _async_memory_memoize(
|
|
|
321
301
|
|
|
322
302
|
def _disk_memoize(
|
|
323
303
|
func: Callable[P, R],
|
|
324
|
-
keys:
|
|
304
|
+
keys: list[str] | None,
|
|
325
305
|
cache_dir: str,
|
|
326
306
|
ignore_self: bool,
|
|
327
307
|
verbose: bool,
|
|
328
|
-
key_fn:
|
|
308
|
+
key_fn: Callable[..., Any] | None,
|
|
329
309
|
) -> Callable[P, R]:
|
|
330
310
|
@functools.wraps(func)
|
|
331
311
|
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
@@ -333,7 +313,7 @@ def _disk_memoize(
|
|
|
333
313
|
func_source, sub_dir, key_id = _compute_cache_components(
|
|
334
314
|
func, args, kwargs, ignore_self, keys, key_fn
|
|
335
315
|
)
|
|
336
|
-
if sub_dir ==
|
|
316
|
+
if sub_dir == 'funcs':
|
|
337
317
|
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
338
318
|
else:
|
|
339
319
|
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
@@ -348,7 +328,7 @@ def _disk_memoize(
|
|
|
348
328
|
os.remove(cache_path)
|
|
349
329
|
if verbose:
|
|
350
330
|
logger.opt(depth=1).warning(
|
|
351
|
-
f
|
|
331
|
+
f'Error loading cache: {str(e)[:100]}, recomputing'
|
|
352
332
|
)
|
|
353
333
|
|
|
354
334
|
result = func(*args, **kwargs)
|
|
@@ -360,7 +340,7 @@ def _disk_memoize(
|
|
|
360
340
|
except Exception as e:
|
|
361
341
|
if verbose:
|
|
362
342
|
logger.opt(depth=1).warning(
|
|
363
|
-
f
|
|
343
|
+
f'Failed to cache {func.__name__}: {e}, executing without cache'
|
|
364
344
|
)
|
|
365
345
|
return func(*args, **kwargs)
|
|
366
346
|
|
|
@@ -369,11 +349,11 @@ def _disk_memoize(
|
|
|
369
349
|
|
|
370
350
|
def _async_disk_memoize(
|
|
371
351
|
func: AsyncFunc[P, R],
|
|
372
|
-
keys:
|
|
352
|
+
keys: list[str] | None,
|
|
373
353
|
cache_dir: str,
|
|
374
354
|
ignore_self: bool,
|
|
375
355
|
verbose: bool,
|
|
376
|
-
key_fn:
|
|
356
|
+
key_fn: Callable[..., Any] | None,
|
|
377
357
|
) -> AsyncFunc[P, R]:
|
|
378
358
|
@functools.wraps(func)
|
|
379
359
|
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
|
@@ -381,13 +361,13 @@ def _async_disk_memoize(
|
|
|
381
361
|
func_source, sub_dir, key_id = _compute_cache_components(
|
|
382
362
|
func, args, kwargs, ignore_self, keys, key_fn
|
|
383
363
|
)
|
|
384
|
-
if sub_dir ==
|
|
364
|
+
if sub_dir == 'funcs':
|
|
385
365
|
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
386
366
|
else:
|
|
387
367
|
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
388
368
|
mkdir_or_exist(osp.dirname(cache_path))
|
|
389
369
|
|
|
390
|
-
def check_cache() ->
|
|
370
|
+
def check_cache() -> R | None:
|
|
391
371
|
with disk_lock:
|
|
392
372
|
if osp.exists(cache_path):
|
|
393
373
|
try:
|
|
@@ -397,7 +377,7 @@ def _async_disk_memoize(
|
|
|
397
377
|
os.remove(cache_path)
|
|
398
378
|
if verbose:
|
|
399
379
|
logger.opt(depth=1).warning(
|
|
400
|
-
f
|
|
380
|
+
f'Error loading cache: {str(e)[:100]}, recomputing'
|
|
401
381
|
)
|
|
402
382
|
return None
|
|
403
383
|
|
|
@@ -418,7 +398,7 @@ def _async_disk_memoize(
|
|
|
418
398
|
except Exception as e:
|
|
419
399
|
if verbose:
|
|
420
400
|
logger.opt(depth=1).warning(
|
|
421
|
-
f
|
|
401
|
+
f'Failed to cache {func.__name__}: {e}, executing without cache'
|
|
422
402
|
)
|
|
423
403
|
return await func(*args, **kwargs)
|
|
424
404
|
|
|
@@ -432,11 +412,11 @@ def _async_disk_memoize(
|
|
|
432
412
|
|
|
433
413
|
def both_memoize(
|
|
434
414
|
func: Callable[P, R],
|
|
435
|
-
keys:
|
|
415
|
+
keys: list[str] | None,
|
|
436
416
|
cache_dir: str,
|
|
437
417
|
ignore_self: bool,
|
|
438
418
|
size: int,
|
|
439
|
-
key_fn:
|
|
419
|
+
key_fn: Callable[..., Any] | None,
|
|
440
420
|
) -> Callable[P, R]:
|
|
441
421
|
mem_cache = _mem_cache_for(func, size)
|
|
442
422
|
|
|
@@ -453,13 +433,13 @@ def both_memoize(
|
|
|
453
433
|
return mem_cache[mem_key]
|
|
454
434
|
|
|
455
435
|
# Disk next
|
|
456
|
-
if sub_dir ==
|
|
436
|
+
if sub_dir == 'funcs':
|
|
457
437
|
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
458
438
|
else:
|
|
459
439
|
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
460
440
|
mkdir_or_exist(osp.dirname(cache_path))
|
|
461
441
|
|
|
462
|
-
disk_result:
|
|
442
|
+
disk_result: R | None = None
|
|
463
443
|
with disk_lock:
|
|
464
444
|
if osp.exists(cache_path):
|
|
465
445
|
try:
|
|
@@ -488,11 +468,11 @@ def both_memoize(
|
|
|
488
468
|
|
|
489
469
|
def _async_both_memoize(
|
|
490
470
|
func: AsyncFunc[P, R],
|
|
491
|
-
keys:
|
|
471
|
+
keys: list[str] | None,
|
|
492
472
|
cache_dir: str,
|
|
493
473
|
ignore_self: bool,
|
|
494
474
|
size: int,
|
|
495
|
-
key_fn:
|
|
475
|
+
key_fn: Callable[..., Any] | None,
|
|
496
476
|
) -> AsyncFunc[P, R]:
|
|
497
477
|
mem_cache = _mem_cache_for(func, size)
|
|
498
478
|
|
|
@@ -512,13 +492,13 @@ def _async_both_memoize(
|
|
|
512
492
|
return mem_cache[mem_key]
|
|
513
493
|
|
|
514
494
|
# Disk
|
|
515
|
-
if sub_dir ==
|
|
495
|
+
if sub_dir == 'funcs':
|
|
516
496
|
cache_path = osp.join(cache_dir, sub_dir, func.__name__, key_id)
|
|
517
497
|
else:
|
|
518
498
|
cache_path = osp.join(cache_dir, sub_dir, key_id)
|
|
519
499
|
mkdir_or_exist(osp.dirname(cache_path))
|
|
520
500
|
|
|
521
|
-
def check_disk_cache() ->
|
|
501
|
+
def check_disk_cache() -> R | None:
|
|
522
502
|
with disk_lock:
|
|
523
503
|
if osp.exists(cache_path):
|
|
524
504
|
return load_json_or_pickle(cache_path)
|
|
@@ -569,10 +549,10 @@ def _async_both_memoize(
|
|
|
569
549
|
def memoize(
|
|
570
550
|
_func: Callable[P, R],
|
|
571
551
|
*,
|
|
572
|
-
keys:
|
|
573
|
-
key:
|
|
552
|
+
keys: list[str] | None = ...,
|
|
553
|
+
key: Callable[..., Any] | None = ...,
|
|
574
554
|
cache_dir: str = ...,
|
|
575
|
-
cache_type: Literal[
|
|
555
|
+
cache_type: Literal['memory', 'disk', 'both'] = ...,
|
|
576
556
|
size: int = ...,
|
|
577
557
|
ignore_self: bool = ...,
|
|
578
558
|
verbose: bool = ...,
|
|
@@ -583,10 +563,10 @@ def memoize(
|
|
|
583
563
|
def memoize(
|
|
584
564
|
_func: Callable[P, Awaitable[R]],
|
|
585
565
|
*,
|
|
586
|
-
keys:
|
|
587
|
-
key:
|
|
566
|
+
keys: list[str] | None = ...,
|
|
567
|
+
key: Callable[..., Any] | None = ...,
|
|
588
568
|
cache_dir: str = ...,
|
|
589
|
-
cache_type: Literal[
|
|
569
|
+
cache_type: Literal['memory', 'disk', 'both'] = ...,
|
|
590
570
|
size: int = ...,
|
|
591
571
|
ignore_self: bool = ...,
|
|
592
572
|
verbose: bool = ...,
|
|
@@ -597,10 +577,10 @@ def memoize(
|
|
|
597
577
|
def memoize(
|
|
598
578
|
_func: None = ...,
|
|
599
579
|
*,
|
|
600
|
-
keys:
|
|
601
|
-
key:
|
|
580
|
+
keys: list[str] | None = ...,
|
|
581
|
+
key: Callable[..., Any] | None = ...,
|
|
602
582
|
cache_dir: str = ...,
|
|
603
|
-
cache_type: Literal[
|
|
583
|
+
cache_type: Literal['memory', 'disk', 'both'] = ...,
|
|
604
584
|
size: int = ...,
|
|
605
585
|
ignore_self: bool = ...,
|
|
606
586
|
verbose: bool = ...,
|
|
@@ -608,13 +588,13 @@ def memoize(
|
|
|
608
588
|
|
|
609
589
|
|
|
610
590
|
@overload
|
|
611
|
-
def memoize(
|
|
591
|
+
def memoize( # type: ignore
|
|
612
592
|
_func: None = ...,
|
|
613
593
|
*,
|
|
614
|
-
keys:
|
|
615
|
-
key:
|
|
594
|
+
keys: list[str] | None = ...,
|
|
595
|
+
key: Callable[..., Any] | None = ...,
|
|
616
596
|
cache_dir: str = ...,
|
|
617
|
-
cache_type: Literal[
|
|
597
|
+
cache_type: Literal['memory', 'disk', 'both'] = ...,
|
|
618
598
|
size: int = ...,
|
|
619
599
|
ignore_self: bool = ...,
|
|
620
600
|
verbose: bool = ...,
|
|
@@ -622,12 +602,12 @@ def memoize( # type: ignore
|
|
|
622
602
|
|
|
623
603
|
|
|
624
604
|
def memoize(
|
|
625
|
-
_func:
|
|
605
|
+
_func: Callable[P, Any] | None = None,
|
|
626
606
|
*,
|
|
627
|
-
keys:
|
|
628
|
-
key:
|
|
607
|
+
keys: list[str] | None = None,
|
|
608
|
+
key: Callable[..., Any] | None = None,
|
|
629
609
|
cache_dir: str = SPEED_CACHE_DIR,
|
|
630
|
-
cache_type: Literal[
|
|
610
|
+
cache_type: Literal['memory', 'disk', 'both'] = 'both',
|
|
631
611
|
size: int = 256,
|
|
632
612
|
ignore_self: bool = True,
|
|
633
613
|
verbose: bool = False,
|
|
@@ -645,22 +625,26 @@ def memoize(
|
|
|
645
625
|
- ignore_self: ignore 'self' when building the default key for bound methods.
|
|
646
626
|
- verbose: enable warnings on cache load/write errors.
|
|
647
627
|
"""
|
|
648
|
-
if
|
|
628
|
+
if '~/' in cache_dir:
|
|
649
629
|
cache_dir = osp.expanduser(cache_dir)
|
|
650
|
-
from speedy_utils import timef
|
|
651
630
|
|
|
652
631
|
def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
|
|
653
632
|
is_async = inspect.iscoroutinefunction(func)
|
|
654
633
|
|
|
655
634
|
# Apply timing decorator if verbose=True
|
|
656
|
-
|
|
635
|
+
if verbose:
|
|
636
|
+
from speedy_utils import timef
|
|
637
|
+
|
|
638
|
+
target_func = timef(func)
|
|
639
|
+
else:
|
|
640
|
+
target_func = func
|
|
657
641
|
|
|
658
|
-
if cache_type ==
|
|
642
|
+
if cache_type == 'memory':
|
|
659
643
|
if is_async:
|
|
660
644
|
return _async_memory_memoize(target_func, size, keys, ignore_self, key)
|
|
661
645
|
return _memory_memoize(target_func, size, keys, ignore_self, key)
|
|
662
646
|
|
|
663
|
-
if cache_type ==
|
|
647
|
+
if cache_type == 'disk':
|
|
664
648
|
if is_async:
|
|
665
649
|
return _async_disk_memoize(
|
|
666
650
|
target_func, keys, cache_dir, ignore_self, verbose, key
|
|
@@ -679,8 +663,7 @@ def memoize(
|
|
|
679
663
|
# Support both @memoize and @memoize(...)
|
|
680
664
|
if _func is None:
|
|
681
665
|
return decorator
|
|
682
|
-
|
|
683
|
-
return decorator(_func)
|
|
666
|
+
return decorator(_func)
|
|
684
667
|
|
|
685
668
|
|
|
686
669
|
# --------------------------------------------------------------------------------------
|
|
@@ -692,8 +675,8 @@ def memoize(
|
|
|
692
675
|
def imemoize(
|
|
693
676
|
_func: Callable[P, R],
|
|
694
677
|
*,
|
|
695
|
-
keys:
|
|
696
|
-
key:
|
|
678
|
+
keys: list[str] | None = ...,
|
|
679
|
+
key: Callable[..., Any] | None = ...,
|
|
697
680
|
ignore_self: bool = ...,
|
|
698
681
|
) -> Callable[P, R]: ...
|
|
699
682
|
|
|
@@ -702,8 +685,8 @@ def imemoize(
|
|
|
702
685
|
def imemoize(
|
|
703
686
|
_func: Callable[P, Awaitable[R]],
|
|
704
687
|
*,
|
|
705
|
-
keys:
|
|
706
|
-
key:
|
|
688
|
+
keys: list[str] | None = ...,
|
|
689
|
+
key: Callable[..., Any] | None = ...,
|
|
707
690
|
ignore_self: bool = ...,
|
|
708
691
|
) -> Callable[P, Awaitable[R]]: ...
|
|
709
692
|
|
|
@@ -712,8 +695,8 @@ def imemoize(
|
|
|
712
695
|
def imemoize(
|
|
713
696
|
_func: None = ...,
|
|
714
697
|
*,
|
|
715
|
-
keys:
|
|
716
|
-
key:
|
|
698
|
+
keys: list[str] | None = ...,
|
|
699
|
+
key: Callable[..., Any] | None = ...,
|
|
717
700
|
ignore_self: bool = ...,
|
|
718
701
|
) -> Callable[[Callable[P, R]], Callable[P, R]]: ...
|
|
719
702
|
|
|
@@ -722,52 +705,53 @@ def imemoize(
|
|
|
722
705
|
def imemoize( # type: ignore
|
|
723
706
|
_func: None = ...,
|
|
724
707
|
*,
|
|
725
|
-
keys:
|
|
726
|
-
key:
|
|
708
|
+
keys: list[str] | None = ...,
|
|
709
|
+
key: Callable[..., Any] | None = ...,
|
|
727
710
|
ignore_self: bool = ...,
|
|
728
711
|
) -> Callable[[Callable[P, Awaitable[R]]], Callable[P, Awaitable[R]]]: ...
|
|
729
712
|
|
|
730
713
|
|
|
731
714
|
def imemoize(
|
|
732
|
-
_func:
|
|
715
|
+
_func: Callable[P, Any] | None = None,
|
|
733
716
|
*,
|
|
734
|
-
keys:
|
|
735
|
-
key:
|
|
717
|
+
keys: list[str] | None = None,
|
|
718
|
+
key: Callable[..., Any] | None = None,
|
|
736
719
|
ignore_self: bool = True,
|
|
737
720
|
):
|
|
738
721
|
"""
|
|
739
722
|
In-memory memoization decorator with global persistent cache.
|
|
740
|
-
|
|
723
|
+
|
|
741
724
|
Unlike regular memoize, this uses a global memory cache that persists
|
|
742
725
|
across IPython %load executions. The cache key is based on the function's
|
|
743
726
|
source code combined with runtime arguments, making it suitable for
|
|
744
727
|
notebook environments where functions may be reloaded.
|
|
745
|
-
|
|
728
|
+
|
|
746
729
|
Args:
|
|
747
730
|
keys: list of argument names to include in key (optional).
|
|
748
731
|
key: custom callable (*args, **kwargs) -> hashable for keying (optional).
|
|
749
732
|
ignore_self: ignore 'self' when building cache key for bound methods.
|
|
750
|
-
|
|
733
|
+
|
|
751
734
|
Example:
|
|
752
735
|
@imemoize
|
|
753
736
|
def expensive_computation(x):
|
|
754
737
|
import time
|
|
755
738
|
time.sleep(2)
|
|
756
739
|
return x * x
|
|
757
|
-
|
|
740
|
+
|
|
758
741
|
# First call computes and caches
|
|
759
742
|
result1 = expensive_computation(5)
|
|
760
|
-
|
|
743
|
+
|
|
761
744
|
# Second call retrieves from memory cache
|
|
762
|
-
result2 = expensive_computation(5)
|
|
763
|
-
|
|
745
|
+
result2 = expensive_computation(5)
|
|
746
|
+
|
|
764
747
|
# Even after %load file.py in IPython, the cache persists
|
|
765
748
|
"""
|
|
766
|
-
|
|
749
|
+
|
|
767
750
|
def decorator(func: Callable[P, Any]) -> Callable[P, Any]:
|
|
768
751
|
is_async = inspect.iscoroutinefunction(func)
|
|
769
|
-
|
|
752
|
+
|
|
770
753
|
if is_async:
|
|
754
|
+
|
|
771
755
|
@functools.wraps(func)
|
|
772
756
|
async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
|
|
773
757
|
# Compute cache key based on function source + args
|
|
@@ -775,50 +759,49 @@ def imemoize(
|
|
|
775
759
|
func, args, kwargs, ignore_self, keys, key
|
|
776
760
|
)
|
|
777
761
|
cache_key = identify((func_source, sub_dir, key_id))
|
|
778
|
-
|
|
762
|
+
|
|
779
763
|
# Check global memory cache
|
|
780
764
|
with mem_lock:
|
|
781
765
|
if cache_key in _GLOBAL_MEMORY_CACHE:
|
|
782
766
|
return _GLOBAL_MEMORY_CACHE[cache_key]
|
|
783
|
-
|
|
767
|
+
|
|
784
768
|
# Compute result and store in cache
|
|
785
769
|
result = await func(*args, **kwargs)
|
|
786
|
-
|
|
770
|
+
|
|
787
771
|
with mem_lock:
|
|
788
772
|
_GLOBAL_MEMORY_CACHE[cache_key] = result
|
|
789
|
-
|
|
773
|
+
|
|
790
774
|
return result
|
|
791
|
-
|
|
775
|
+
|
|
792
776
|
return async_wrapper
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
777
|
+
|
|
778
|
+
@functools.wraps(func)
|
|
779
|
+
def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
|
|
780
|
+
# Compute cache key based on function source + args
|
|
781
|
+
func_source, sub_dir, key_id = _compute_cache_components(
|
|
782
|
+
func, args, kwargs, ignore_self, keys, key
|
|
783
|
+
)
|
|
784
|
+
cache_key = identify((func_source, sub_dir, key_id))
|
|
785
|
+
|
|
786
|
+
# Check global memory cache
|
|
787
|
+
with mem_lock:
|
|
788
|
+
if cache_key in _GLOBAL_MEMORY_CACHE:
|
|
789
|
+
return _GLOBAL_MEMORY_CACHE[cache_key]
|
|
790
|
+
|
|
791
|
+
# Compute result and store in cache
|
|
792
|
+
result = func(*args, **kwargs)
|
|
793
|
+
|
|
794
|
+
with mem_lock:
|
|
795
|
+
_GLOBAL_MEMORY_CACHE[cache_key] = result
|
|
796
|
+
|
|
797
|
+
return result
|
|
798
|
+
|
|
799
|
+
return sync_wrapper
|
|
800
|
+
|
|
817
801
|
# Support both @imemoize and @imemoize(...)
|
|
818
802
|
if _func is None:
|
|
819
803
|
return decorator
|
|
820
|
-
|
|
821
|
-
return decorator(_func)
|
|
804
|
+
return decorator(_func)
|
|
822
805
|
|
|
823
806
|
|
|
824
|
-
__all__ = [
|
|
807
|
+
__all__ = ['memoize', 'imemoize', 'identify']
|