yes3 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yes3/__init__.py +1 -0
- yes3/caching/__init__.py +6 -0
- yes3/caching/base.py +302 -0
- yes3/caching/local_cache.py +236 -0
- yes3/caching/memory_cache.py +74 -0
- yes3/caching/multi_cache.py +169 -0
- yes3/caching/s3_cache.py +183 -0
- yes3/caching/setup_helpers.py +42 -0
- yes3/client.py +82 -0
- yes3/config.py +26 -0
- yes3/s3.py +756 -0
- yes3/utils/__init__.py +0 -0
- yes3/utils/decorators.py +26 -0
- yes3/utils/testing.py +39 -0
- yes3-0.0.1.dist-info/METADATA +145 -0
- yes3-0.0.1.dist-info/RECORD +19 -0
- yes3-0.0.1.dist-info/WHEEL +5 -0
- yes3-0.0.1.dist-info/licenses/LICENSE +21 -0
- yes3-0.0.1.dist-info/top_level.txt +1 -0
yes3/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .s3 import S3Location, is_s3_url
|
yes3/caching/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from .base import Cache, CacheCore, CachedItemMeta, Serializer, check_meta_mismatches
|
|
2
|
+
from .local_cache import LocalDiskCache
|
|
3
|
+
from .memory_cache import MemoryCache
|
|
4
|
+
from .multi_cache import MultiCache
|
|
5
|
+
from .s3_cache import S3Cache
|
|
6
|
+
from .setup_helpers import setup_cache
|
yes3/caching/base.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
from abc import ABCMeta, abstractmethod
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime, UTC
|
|
5
|
+
from typing import Iterable, Iterator, Optional, Self
|
|
6
|
+
|
|
7
|
+
_NotSpecified = object()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def raise_not_found(key) -> KeyError:
|
|
11
|
+
raise KeyError(f"key '{key}' not found in cache")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class CachedItemMeta:
|
|
16
|
+
key: str
|
|
17
|
+
path: Optional[str]
|
|
18
|
+
size: Optional[int]
|
|
19
|
+
timestamp: Optional[datetime]
|
|
20
|
+
|
|
21
|
+
_ts_format = '%Y-%m-%d %H:%M:%S.%f %z'
|
|
22
|
+
|
|
23
|
+
def __post_init__(self):
|
|
24
|
+
if isinstance(self.timestamp, float):
|
|
25
|
+
self.timestamp = datetime.fromtimestamp(self.timestamp, UTC)
|
|
26
|
+
if isinstance(self.timestamp, str):
|
|
27
|
+
self.timestamp = datetime.strptime(self.timestamp, self._ts_format)
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict:
|
|
30
|
+
return {
|
|
31
|
+
'key': self.key,
|
|
32
|
+
'path': self.path,
|
|
33
|
+
'size': self.size,
|
|
34
|
+
'timestamp': self.timestamp.strftime(self._ts_format) if self.timestamp else None,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class CacheCore(metaclass=ABCMeta):
|
|
39
|
+
def __init__(self, active=True, read_only=False):
|
|
40
|
+
self._read_only = read_only
|
|
41
|
+
self._active = active
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def __contains__(self, key):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def get(self, key, default=_NotSpecified):
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def get_meta(self, key) -> CachedItemMeta:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def put(self, key, obj, update=False, meta: Optional[CachedItemMeta] = None):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def remove(self, key):
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def keys(self):
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def __getitem__(self, key: str):
|
|
68
|
+
return self.get(key)
|
|
69
|
+
|
|
70
|
+
def __setitem__(self, key: str, obj) -> None:
|
|
71
|
+
self.put(key, obj)
|
|
72
|
+
|
|
73
|
+
def __delitem__(self, key: str) -> None:
|
|
74
|
+
self.remove(key)
|
|
75
|
+
|
|
76
|
+
def is_active(self) -> bool:
|
|
77
|
+
return self._active
|
|
78
|
+
|
|
79
|
+
def activate(self):
|
|
80
|
+
self._active = True
|
|
81
|
+
return self
|
|
82
|
+
|
|
83
|
+
def deactivate(self):
|
|
84
|
+
self._active = False
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
def is_read_only(self) -> bool:
|
|
88
|
+
return self._read_only
|
|
89
|
+
|
|
90
|
+
def set_read_only(self, value: bool) -> Self:
|
|
91
|
+
self._read_only = value
|
|
92
|
+
return self
|
|
93
|
+
|
|
94
|
+
def update(self, key: str, obj):
|
|
95
|
+
if key not in self:
|
|
96
|
+
raise_not_found(key)
|
|
97
|
+
self.put(key, obj, update=True)
|
|
98
|
+
|
|
99
|
+
def pop(self, key: str, default=_NotSpecified):
|
|
100
|
+
obj = self.get(key, default=default)
|
|
101
|
+
self.remove(key)
|
|
102
|
+
return obj
|
|
103
|
+
|
|
104
|
+
def list(self) -> dict[str, CachedItemMeta]:
|
|
105
|
+
items_meta = {}
|
|
106
|
+
for key in self.keys():
|
|
107
|
+
items_meta[key] = self.get_meta(key)
|
|
108
|
+
return items_meta
|
|
109
|
+
|
|
110
|
+
def subcache(self, *args, **kwargs) -> Self:
|
|
111
|
+
raise NotImplementedError(f"`subcache` method is not defined for class {type(self).__name__}")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class CacheReaderWriter(metaclass=ABCMeta):
|
|
115
|
+
@abstractmethod
|
|
116
|
+
def read(self, key: str):
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
def get_meta(self, key: str) -> CachedItemMeta:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def write(self, key: str, obj, meta=None) -> CachedItemMeta:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
@abstractmethod
|
|
128
|
+
def delete(self, key: str, meta_only=False):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class CacheCatalog(metaclass=ABCMeta):
|
|
133
|
+
@abstractmethod
|
|
134
|
+
def contains(self, key: str):
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def add(self, key: str, info: CachedItemMeta):
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
@abstractmethod
|
|
142
|
+
def get(self, key: str) -> CachedItemMeta:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
@abstractmethod
|
|
146
|
+
def remove(self, key: str):
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
@abstractmethod
|
|
150
|
+
def keys(self):
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def items(self):
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
_CatalogT = dict[str, CachedItemMeta]
|
|
159
|
+
_CatalogBuilderT = Callable[[], _CatalogT]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class CacheDictCatalog(CacheCatalog):
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
catalog: Optional[dict[str, CachedItemMeta]] = None,
|
|
166
|
+
catalog_builder: Optional[_CatalogBuilderT] = None,
|
|
167
|
+
):
|
|
168
|
+
self._catalog = catalog
|
|
169
|
+
if catalog_builder is None:
|
|
170
|
+
catalog_builder = dict
|
|
171
|
+
self._build_catalog = catalog_builder
|
|
172
|
+
if self._catalog is None:
|
|
173
|
+
self.rebuild()
|
|
174
|
+
|
|
175
|
+
def rebuild(self):
|
|
176
|
+
self._catalog = self._build_catalog().copy()
|
|
177
|
+
|
|
178
|
+
def contains(self, key: str):
|
|
179
|
+
return str(key) in self._catalog
|
|
180
|
+
|
|
181
|
+
def add(self, key: str, meta: CachedItemMeta):
|
|
182
|
+
self._catalog[str(key)] = meta
|
|
183
|
+
|
|
184
|
+
def get(self, key: str) -> CachedItemMeta:
|
|
185
|
+
return self._catalog[str(key)]
|
|
186
|
+
|
|
187
|
+
def remove(self, key: str):
|
|
188
|
+
self._catalog.pop(str(key))
|
|
189
|
+
|
|
190
|
+
def keys(self):
|
|
191
|
+
return list(self._catalog.keys())
|
|
192
|
+
|
|
193
|
+
def items(self) -> Iterator[tuple[str, CachedItemMeta]]:
|
|
194
|
+
return iter(self._catalog.items())
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class Cache(CacheCore, metaclass=ABCMeta):
|
|
198
|
+
def __init__(self, catalog: CacheCatalog, reader_writer: CacheReaderWriter, active=True, read_only=False):
|
|
199
|
+
super().__init__(active=active, read_only=read_only)
|
|
200
|
+
self._catalog = catalog
|
|
201
|
+
self._reader_writer = reader_writer
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
@abstractmethod
|
|
205
|
+
def create(cls, *args, **kwargs):
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
def __contains__(self, key: str) -> bool:
|
|
209
|
+
if not self.is_active():
|
|
210
|
+
return False
|
|
211
|
+
return self._catalog.contains(key)
|
|
212
|
+
|
|
213
|
+
def get(self, key: str, default=_NotSpecified):
|
|
214
|
+
if not self.is_active() or key not in self:
|
|
215
|
+
if default is _NotSpecified:
|
|
216
|
+
raise_not_found(key)
|
|
217
|
+
else:
|
|
218
|
+
return default
|
|
219
|
+
return self._reader_writer.read(key)
|
|
220
|
+
|
|
221
|
+
def get_meta(self, key: str) -> CachedItemMeta:
|
|
222
|
+
if not self.is_active() or key not in self:
|
|
223
|
+
raise_not_found(key)
|
|
224
|
+
return self._catalog.get(key)
|
|
225
|
+
|
|
226
|
+
def put(self, key: str, obj, *, update=False, meta: Optional[CachedItemMeta] = None) -> Self:
|
|
227
|
+
if self.is_read_only():
|
|
228
|
+
raise TypeError('Cache is in read only mode')
|
|
229
|
+
if self.is_active():
|
|
230
|
+
if key in self and not update:
|
|
231
|
+
raise ValueError(f"key '{key}' already exists in cache; use 'update' to overwrite")
|
|
232
|
+
meta = self._reader_writer.write(key, obj, meta=meta)
|
|
233
|
+
self._catalog.add(key, meta)
|
|
234
|
+
else:
|
|
235
|
+
print(f'WARNING: {type(self).__name__} is not active')
|
|
236
|
+
return self
|
|
237
|
+
|
|
238
|
+
def remove(self, key: str, meta_only=False) -> Self:
|
|
239
|
+
if self.is_active() and key in self:
|
|
240
|
+
if self.is_read_only():
|
|
241
|
+
raise TypeError('Cache is in read only mode')
|
|
242
|
+
self._catalog.remove(key)
|
|
243
|
+
self._reader_writer.delete(key, meta_only=meta_only)
|
|
244
|
+
return self
|
|
245
|
+
|
|
246
|
+
def remove_meta(self, key: str) -> Self:
|
|
247
|
+
return self.remove(key, meta_only=True)
|
|
248
|
+
|
|
249
|
+
def keys(self) -> list[str]:
|
|
250
|
+
if not self.is_active():
|
|
251
|
+
return []
|
|
252
|
+
else:
|
|
253
|
+
return list(self._catalog.keys())
|
|
254
|
+
|
|
255
|
+
def _repr_params(self) -> list[str]:
|
|
256
|
+
params = [f'{len(self.keys())} items']
|
|
257
|
+
if not self.is_active():
|
|
258
|
+
params.append('NOT ACTIVE')
|
|
259
|
+
if self.is_read_only():
|
|
260
|
+
params.append('READ ONLY')
|
|
261
|
+
return params
|
|
262
|
+
|
|
263
|
+
def __repr__(self):
|
|
264
|
+
return f"{type(self).__name__}({', '.join(self._repr_params())})"
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def check_meta_mismatches(caches: Iterable[CacheCore], key=None) -> dict[str, tuple[CachedItemMeta, ...]]:
|
|
268
|
+
if key is not None and not isinstance(key, str):
|
|
269
|
+
raise TypeError('key is not a string')
|
|
270
|
+
for cache in caches:
|
|
271
|
+
if not isinstance(cache, CacheCore):
|
|
272
|
+
raise TypeError('caches must be an iterable containing Cache instances')
|
|
273
|
+
mismatches = {}
|
|
274
|
+
if key is None:
|
|
275
|
+
keys = set(k for cache in caches for k in cache.keys())
|
|
276
|
+
else:
|
|
277
|
+
keys = [key]
|
|
278
|
+
for k in keys:
|
|
279
|
+
metas = [cache.get_meta(k) for cache in caches if k in cache]
|
|
280
|
+
if len(metas) > 1:
|
|
281
|
+
first_meta = metas[0]
|
|
282
|
+
if any(meta != first_meta for meta in metas[1:]):
|
|
283
|
+
mismatches[k] = tuple(metas)
|
|
284
|
+
return mismatches
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class Serializer(metaclass=ABCMeta):
|
|
288
|
+
default_ext = None
|
|
289
|
+
|
|
290
|
+
def __init__(self, ext=None):
|
|
291
|
+
if ext is None:
|
|
292
|
+
self.ext = self.default_ext
|
|
293
|
+
else:
|
|
294
|
+
self.ext = ext
|
|
295
|
+
|
|
296
|
+
@abstractmethod
|
|
297
|
+
def read(self, path):
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
@abstractmethod
|
|
301
|
+
def write(self, path, obj):
|
|
302
|
+
pass
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import pickle
|
|
4
|
+
from datetime import datetime, UTC
|
|
5
|
+
from functools import partial
|
|
6
|
+
from glob import glob
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional, Self
|
|
9
|
+
|
|
10
|
+
from yes3.caching.base import Cache, CacheDictCatalog, CachedItemMeta, Serializer, CacheReaderWriter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PickleSerializer(Serializer):
|
|
14
|
+
default_ext = 'pkl'
|
|
15
|
+
|
|
16
|
+
def read(self, path):
|
|
17
|
+
with open(path, 'rb') as f:
|
|
18
|
+
return pickle.load(f)
|
|
19
|
+
|
|
20
|
+
def write(self, path, obj):
|
|
21
|
+
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
|
|
22
|
+
with open(path, 'wb') as f:
|
|
23
|
+
pickle.dump(obj, f)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class JsonSerializer(Serializer):
|
|
27
|
+
default_ext = 'json'
|
|
28
|
+
|
|
29
|
+
def read(self, path) -> dict:
|
|
30
|
+
with open(path, 'r') as f:
|
|
31
|
+
return json.load(f)
|
|
32
|
+
|
|
33
|
+
def write(self, path, obj: dict):
|
|
34
|
+
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
|
|
35
|
+
with open(path, 'w') as f:
|
|
36
|
+
json.dump(obj, f)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class JsonMetaSerializer(JsonSerializer):
|
|
40
|
+
default_ext = 'meta'
|
|
41
|
+
|
|
42
|
+
def read(self, path) -> CachedItemMeta:
|
|
43
|
+
meta_dict = super().read(path)
|
|
44
|
+
return CachedItemMeta(**meta_dict)
|
|
45
|
+
|
|
46
|
+
def write(self, path, meta: CachedItemMeta):
|
|
47
|
+
super().write(path, meta.to_dict())
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _get_serializer(serializer: str | Serializer, ext=None) -> Serializer:
|
|
51
|
+
if isinstance(serializer, type):
|
|
52
|
+
serializer = serializer(ext)
|
|
53
|
+
|
|
54
|
+
if isinstance(serializer, str):
|
|
55
|
+
if serializer.lstrip('.').lower() in {'pkl', 'pickle'}:
|
|
56
|
+
return PickleSerializer(ext)
|
|
57
|
+
elif serializer.lstrip('.').lower() == 'json':
|
|
58
|
+
return JsonSerializer(ext)
|
|
59
|
+
else:
|
|
60
|
+
raise NotImplementedError(f"Serializer not implemented for file type '{serializer}'")
|
|
61
|
+
elif isinstance(serializer, Serializer):
|
|
62
|
+
if ext is not None:
|
|
63
|
+
serializer.ext = ext
|
|
64
|
+
return serializer
|
|
65
|
+
else:
|
|
66
|
+
raise TypeError(
|
|
67
|
+
f'file_type must be a string or a Serializer subclass, but got type {type(serializer).__name__}')
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _with_ext(path, ext: Optional[str]):
|
|
71
|
+
if ext is None:
|
|
72
|
+
return path
|
|
73
|
+
if not ext.startswith('.'):
|
|
74
|
+
ext = f'.{ext}'
|
|
75
|
+
path_str = str(path)
|
|
76
|
+
if path_str.endswith(ext):
|
|
77
|
+
return path
|
|
78
|
+
else:
|
|
79
|
+
try:
|
|
80
|
+
return type(path)(path_str + ext)
|
|
81
|
+
except (ValueError, TypeError):
|
|
82
|
+
return Path(path_str + ext)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class LocalReaderWriter(CacheReaderWriter):
|
|
86
|
+
def __init__(
|
|
87
|
+
self, path: str | Path,
|
|
88
|
+
object_serializer: str | Serializer = PickleSerializer(),
|
|
89
|
+
meta_serializer: str | Serializer = JsonMetaSerializer(),
|
|
90
|
+
):
|
|
91
|
+
self.path = Path(path)
|
|
92
|
+
self.obj_serializer = _get_serializer(object_serializer)
|
|
93
|
+
self.meta_serializer = _get_serializer(meta_serializer)
|
|
94
|
+
|
|
95
|
+
def clone(self, path: str | Path) -> Self:
|
|
96
|
+
return type(self)(path, object_serializer=self.obj_serializer, meta_serializer=self.meta_serializer)
|
|
97
|
+
|
|
98
|
+
def key2path(self, key: str, meta=False) -> Path:
|
|
99
|
+
if meta:
|
|
100
|
+
return self.path / _with_ext(key, self.meta_serializer.ext)
|
|
101
|
+
else:
|
|
102
|
+
return self.path / _with_ext(key, self.obj_serializer.ext)
|
|
103
|
+
|
|
104
|
+
def path2key(self, path: str | Path) -> str:
|
|
105
|
+
path = Path(path)
|
|
106
|
+
rel_path = path.relative_to(self.path)
|
|
107
|
+
return rel_path.stem
|
|
108
|
+
|
|
109
|
+
def read(self, key: str):
|
|
110
|
+
path = self.key2path(key)
|
|
111
|
+
print(f"Reading cached item '{key}' at {path}")
|
|
112
|
+
return self.obj_serializer.read(path)
|
|
113
|
+
|
|
114
|
+
def _build_meta(self, path, key=None) -> CachedItemMeta:
|
|
115
|
+
if key is None:
|
|
116
|
+
key = self.path2key(path)
|
|
117
|
+
file_stat = os.stat(path)
|
|
118
|
+
rel_path = path.relative_to(self.path)
|
|
119
|
+
return CachedItemMeta(
|
|
120
|
+
key=key,
|
|
121
|
+
path=str(rel_path),
|
|
122
|
+
size=file_stat.st_size,
|
|
123
|
+
timestamp=datetime.fromtimestamp(file_stat.st_mtime, UTC),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def get_meta(self, key: str, rebuild=False) -> CachedItemMeta:
|
|
127
|
+
if rebuild:
|
|
128
|
+
obj_path = self.key2path(key)
|
|
129
|
+
meta_path = self.key2path(key, meta=True)
|
|
130
|
+
meta = self._build_meta(path=obj_path, key=key)
|
|
131
|
+
self.meta_serializer.write(meta_path, meta)
|
|
132
|
+
else:
|
|
133
|
+
meta_path = self.key2path(key, meta=True)
|
|
134
|
+
meta = self.meta_serializer.read(meta_path)
|
|
135
|
+
return meta
|
|
136
|
+
|
|
137
|
+
def write(self, key: str, obj, meta: Optional[CachedItemMeta] = None) -> CachedItemMeta:
|
|
138
|
+
obj_path = self.key2path(key)
|
|
139
|
+
print(f"Caching item '{key}' at {obj_path}")
|
|
140
|
+
self.obj_serializer.write(obj_path, obj)
|
|
141
|
+
|
|
142
|
+
meta_path = self.key2path(key, meta=True)
|
|
143
|
+
if meta is None:
|
|
144
|
+
meta = self._build_meta(path=obj_path, key=key)
|
|
145
|
+
self.meta_serializer.write(meta_path, meta)
|
|
146
|
+
return meta
|
|
147
|
+
|
|
148
|
+
def delete(self, key: str, meta_only=False):
|
|
149
|
+
path = self.key2path(key)
|
|
150
|
+
meta_path = self.key2path(key, meta=True)
|
|
151
|
+
if meta_only:
|
|
152
|
+
print(f"Deleting cached item '{key}' metadata at {meta_path}")
|
|
153
|
+
else:
|
|
154
|
+
print(f"Deleting cached item '{key}' at {path}")
|
|
155
|
+
os.remove(path)
|
|
156
|
+
os.remove(meta_path)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class LocalDiskCache(Cache):
|
|
160
|
+
@staticmethod
|
|
161
|
+
def _build_catalog_dict(reader_writer: LocalReaderWriter, rebuild_missing_meta=False) -> dict:
|
|
162
|
+
catalog_dict = {}
|
|
163
|
+
if os.path.exists(reader_writer.path):
|
|
164
|
+
data_ext = reader_writer.obj_serializer.ext.lstrip('.')
|
|
165
|
+
meta_ext = reader_writer.meta_serializer.ext.lstrip('.')
|
|
166
|
+
data_files = glob(str(reader_writer.path / f'*.{data_ext}'))
|
|
167
|
+
meta_files = glob(str(reader_writer.path / f'*.{meta_ext}'))
|
|
168
|
+
data_map = {Path(p).stem: p for p in data_files}
|
|
169
|
+
meta_map = {Path(p).stem: p for p in meta_files}
|
|
170
|
+
if data_map.keys() != meta_map.keys():
|
|
171
|
+
if rebuild_missing_meta:
|
|
172
|
+
print(f'WARNING: data and metadata files are not aligned for cache at {reader_writer.path}, '
|
|
173
|
+
'rebuilding missing metadata files')
|
|
174
|
+
else:
|
|
175
|
+
raise RuntimeError(f'data and metadata files are not aligned for cache at {reader_writer.path}')
|
|
176
|
+
for key in data_map.keys():
|
|
177
|
+
catalog_dict[key] = reader_writer.get_meta(key, rebuild=(key not in meta_map and rebuild_missing_meta))
|
|
178
|
+
if len(catalog_dict.keys()) > 0:
|
|
179
|
+
print(f'{len(catalog_dict.keys())} cached items discovered at {reader_writer.path}')
|
|
180
|
+
return catalog_dict
|
|
181
|
+
|
|
182
|
+
@classmethod
|
|
183
|
+
def create(
|
|
184
|
+
cls,
|
|
185
|
+
path: str | Path,
|
|
186
|
+
obj_serializer: str | Serializer = PickleSerializer(),
|
|
187
|
+
meta_serializer: str | Serializer = JsonMetaSerializer(),
|
|
188
|
+
reader_writer: Optional[CacheReaderWriter] = None,
|
|
189
|
+
rebuild_missing_meta=False,
|
|
190
|
+
**kwargs,
|
|
191
|
+
):
|
|
192
|
+
if reader_writer is None:
|
|
193
|
+
reader_writer = LocalReaderWriter(path, obj_serializer, meta_serializer)
|
|
194
|
+
elif not isinstance(reader_writer, LocalReaderWriter):
|
|
195
|
+
raise TypeError(f'`reader_writer` must be a {LocalReaderWriter.__name__} instance')
|
|
196
|
+
elif reader_writer.path != path:
|
|
197
|
+
reader_writer = reader_writer.clone(path)
|
|
198
|
+
catalog_builder = partial(cls._build_catalog_dict, reader_writer=reader_writer,
|
|
199
|
+
rebuild_missing_meta=rebuild_missing_meta)
|
|
200
|
+
catalog = CacheDictCatalog(catalog_builder=catalog_builder)
|
|
201
|
+
return cls(catalog, reader_writer, **kwargs)
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def path(self) -> Path:
|
|
205
|
+
return self._reader_writer.path
|
|
206
|
+
|
|
207
|
+
def subcache(self, rel_path: str) -> Self:
|
|
208
|
+
path = self.path / rel_path
|
|
209
|
+
kwargs = dict(active=self.is_active(), read_only=self.is_read_only())
|
|
210
|
+
return type(self).create(path, reader_writer=self._reader_writer, **kwargs)
|
|
211
|
+
|
|
212
|
+
def clear(self, force=False) -> Self:
|
|
213
|
+
if self.is_active() and len(self.keys()) > 0:
|
|
214
|
+
if not force:
|
|
215
|
+
raise RuntimeError(f'Clearing this {type(self).__name__} ({self.path}) requires specifying force=True')
|
|
216
|
+
print(f'Deleting {len(self.keys())} item(s) from cache at {self.path}')
|
|
217
|
+
for key in self.keys():
|
|
218
|
+
self.remove(key)
|
|
219
|
+
new_cache = type(self).create(self.path, reader_writer=self._reader_writer)
|
|
220
|
+
self.__init__(new_cache._catalog, new_cache._reader_writer, active=self._active, read_only=self._read_only)
|
|
221
|
+
return self
|
|
222
|
+
|
|
223
|
+
def clear_meta(self, force=False) -> Self:
|
|
224
|
+
if self.is_active() and len(self.keys()) > 0:
|
|
225
|
+
if not force:
|
|
226
|
+
raise RuntimeError(f'Clearing this {type(self).__name__} metadata ({self.path}) requires specifying '
|
|
227
|
+
'force=True')
|
|
228
|
+
print(f'Deleting {len(self.keys())} item(s) from cache at {self.path}')
|
|
229
|
+
for key in self.keys():
|
|
230
|
+
self.remove(key, meta_only=True)
|
|
231
|
+
return self
|
|
232
|
+
|
|
233
|
+
def _repr_params(self) -> list[str]:
|
|
234
|
+
params = super()._repr_params()
|
|
235
|
+
params.insert(0, str(self.path))
|
|
236
|
+
return params
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from datetime import datetime, UTC
|
|
2
|
+
from typing import Any, Optional, Self
|
|
3
|
+
|
|
4
|
+
from yes3.caching.base import CacheCore, CachedItemMeta, raise_not_found, _NotSpecified
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MemoryCache(CacheCore):
|
|
8
|
+
def __init__(self, active=True, read_only=False):
|
|
9
|
+
super().__init__(active=active, read_only=read_only)
|
|
10
|
+
self._data: dict[str, Any] = {}
|
|
11
|
+
self._meta: dict[str, CachedItemMeta] = {}
|
|
12
|
+
|
|
13
|
+
def __contains__(self, key: str):
|
|
14
|
+
if not self.is_active():
|
|
15
|
+
return False
|
|
16
|
+
found = key in self._data
|
|
17
|
+
if found and key not in self._meta:
|
|
18
|
+
raise RuntimeError(f"data exists, but no metadata found, for key '{key}' in {type(self).__name__}")
|
|
19
|
+
return found
|
|
20
|
+
|
|
21
|
+
def get(self, key: str, default=_NotSpecified):
|
|
22
|
+
if not self.is_active() or key not in self:
|
|
23
|
+
if default is _NotSpecified:
|
|
24
|
+
raise_not_found(key)
|
|
25
|
+
else:
|
|
26
|
+
return default
|
|
27
|
+
return self._data[key]
|
|
28
|
+
|
|
29
|
+
def get_meta(self, key: str) -> CachedItemMeta:
|
|
30
|
+
if not self.is_active() or key not in self:
|
|
31
|
+
raise_not_found(key)
|
|
32
|
+
return self._meta[key]
|
|
33
|
+
|
|
34
|
+
def put(self, key: str, obj, *, update=False, meta: Optional[CachedItemMeta] = None) -> Self:
|
|
35
|
+
if self.is_read_only():
|
|
36
|
+
raise TypeError('Cache is in read only mode')
|
|
37
|
+
if self.is_active():
|
|
38
|
+
if key in self and not update:
|
|
39
|
+
raise ValueError(f"key '{key}' already exists in cache; use 'update' to overwrite")
|
|
40
|
+
if meta is None:
|
|
41
|
+
meta = CachedItemMeta(key=key, timestamp=datetime.now(UTC), path=None, size=None)
|
|
42
|
+
self._meta[key] = meta
|
|
43
|
+
self._data[key] = obj
|
|
44
|
+
else:
|
|
45
|
+
print(f"WARNING: {type(self).__name__} is not active")
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def remove(self, key: str) -> Self:
|
|
49
|
+
if self.is_active():
|
|
50
|
+
if key in self:
|
|
51
|
+
if self.is_read_only():
|
|
52
|
+
raise TypeError('Cache is in read only mode')
|
|
53
|
+
self._data.pop(key)
|
|
54
|
+
self._meta.pop(key)
|
|
55
|
+
else:
|
|
56
|
+
print(f"WARNING: {type(self).__name__} is not active")
|
|
57
|
+
return self
|
|
58
|
+
|
|
59
|
+
def keys(self) -> list[Any]:
|
|
60
|
+
if not self.is_active():
|
|
61
|
+
return []
|
|
62
|
+
else:
|
|
63
|
+
return list(self._data.keys())
|
|
64
|
+
|
|
65
|
+
def clear(self, force=False) -> Self:
|
|
66
|
+
if self.is_active():
|
|
67
|
+
if len(self.keys()) > 0:
|
|
68
|
+
if not force:
|
|
69
|
+
raise RuntimeError(f'Clearing this {type(self).__name__} requires specifying force=True')
|
|
70
|
+
self._data: dict[str, Any] = {}
|
|
71
|
+
self._meta: dict[str, CachedItemMeta] = {}
|
|
72
|
+
else:
|
|
73
|
+
print(f"WARNING: {type(self).__name__} is not active")
|
|
74
|
+
return self
|