yes3 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- yes3-0.0.1/LICENSE +21 -0
- yes3-0.0.1/PKG-INFO +145 -0
- yes3-0.0.1/README.md +122 -0
- yes3-0.0.1/pyproject.toml +41 -0
- yes3-0.0.1/setup.cfg +4 -0
- yes3-0.0.1/src/yes3/__init__.py +1 -0
- yes3-0.0.1/src/yes3/caching/__init__.py +6 -0
- yes3-0.0.1/src/yes3/caching/base.py +302 -0
- yes3-0.0.1/src/yes3/caching/local_cache.py +236 -0
- yes3-0.0.1/src/yes3/caching/memory_cache.py +74 -0
- yes3-0.0.1/src/yes3/caching/multi_cache.py +169 -0
- yes3-0.0.1/src/yes3/caching/s3_cache.py +183 -0
- yes3-0.0.1/src/yes3/caching/setup_helpers.py +42 -0
- yes3-0.0.1/src/yes3/client.py +82 -0
- yes3-0.0.1/src/yes3/config.py +26 -0
- yes3-0.0.1/src/yes3/s3.py +756 -0
- yes3-0.0.1/src/yes3/utils/__init__.py +0 -0
- yes3-0.0.1/src/yes3/utils/decorators.py +26 -0
- yes3-0.0.1/src/yes3/utils/testing.py +39 -0
- yes3-0.0.1/src/yes3.egg-info/PKG-INFO +145 -0
- yes3-0.0.1/src/yes3.egg-info/SOURCES.txt +23 -0
- yes3-0.0.1/src/yes3.egg-info/dependency_links.txt +1 -0
- yes3-0.0.1/src/yes3.egg-info/requires.txt +9 -0
- yes3-0.0.1/src/yes3.egg-info/top_level.txt +1 -0
- yes3-0.0.1/tests/test_s3.py +256 -0
yes3-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Erik Schomburg
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
yes3-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: yes3
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A simple, intuitive, flexible interface for Amazon S3
|
|
5
|
+
Author-email: Erik Schomburg <eschomburg@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/eschombu/yes3
|
|
7
|
+
Project-URL: Issues, https://github.com/eschombu/yes3/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: boto3
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Requires-Dist: pandas
|
|
17
|
+
Requires-Dist: tqdm
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: flake8; extra == "dev"
|
|
20
|
+
Requires-Dist: moto; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest; extra == "dev"
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# `yes3`
|
|
25
|
+
|
|
26
|
+
## A library for intuitive reading, writing, listing, and caching with AWS S3 (Simple Storage Service).
|
|
27
|
+
|
|
28
|
+
This library wraps the `boto3` S3 API boilerplate with a simple and intuitive interface, path flexibility, and powerful
|
|
29
|
+
utilities for easily listing, reading, and writing data on/from/to S3.
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
Using a virtual environment is recommended. The simplest way to install is with pip: `pip install yes3`. To install the
|
|
34
|
+
latest version, you can install from github: `pip install git+https://github.com/eschombu/yes3.git`.
|
|
35
|
+
|
|
36
|
+
To run tests and test scripts, and/or contribute to `yes3`, clone this repository from
|
|
37
|
+
https://github.com/eschombu/yes3.git, and install the dev requirements:
|
|
38
|
+
```
|
|
39
|
+
git clone https://github.com/eschombu/yes3.git
|
|
40
|
+
cd yes3
|
|
41
|
+
# Optionally create a virtual environment:
|
|
42
|
+
# python3.1x -m venv .venv/yes3
|
|
43
|
+
# source .venv/yes3/bin/activate
|
|
44
|
+
pip install -e .[dev]
|
|
45
|
+
pytest
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## TODO
|
|
49
|
+
|
|
50
|
+
1. Documentation
|
|
51
|
+
2. Replace message printing with loggers
|
|
52
|
+
|
|
53
|
+
## Usage
|
|
54
|
+
|
|
55
|
+
### S3 Locations and Paths
|
|
56
|
+
|
|
57
|
+
The `boto3` APIs for S3 typically consider the 'bucket' and 'key' of an S3 object:
|
|
58
|
+
```
|
|
59
|
+
import boto3
|
|
60
|
+
s3_client = boto3.client('s3')
|
|
61
|
+
s3_client.download_file('my-bucket', 'key/to/object', 'path/to/local/file')
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
The awscli uses urls:
|
|
65
|
+
```
|
|
66
|
+
aws s3 cp s3://my-bucket/key/to/object path/to/local/file
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
In `yes3`, we accept either, attempting to flexibly interpret input arguments as S3 locations and local paths,
|
|
70
|
+
converting S3 locations into `S3Location` objects:
|
|
71
|
+
```
|
|
72
|
+
from yes3 import s3, S3Location
|
|
73
|
+
|
|
74
|
+
# The following download calls are equivalent
|
|
75
|
+
s3.download('s3://my-bucket/key/to/object', 'path/to/local/file')
|
|
76
|
+
s3.download('my-bucket', 'key/to/object', 'path/to/local/file')
|
|
77
|
+
|
|
78
|
+
s3_loc = S3Location('s3://my-bucket/key/to/object')
|
|
79
|
+
print(s3_loc.bucket) # 'my-bucket'
|
|
80
|
+
print(s3_loc.key) # 'key/to/object'
|
|
81
|
+
print(s3_loc.exists()) # True
|
|
82
|
+
print(s3_loc.is_bucket()) # False
|
|
83
|
+
print(s3_loc.is_dir()) # False
|
|
84
|
+
print(s3_loc.is_object()) # True
|
|
85
|
+
s3.download(s3_loc, 'path/to/local/file')
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
If the local path is to a directory, the object will be downloaded with the filename inferred from the S3 path.
|
|
89
|
+
Recursive downloads are also supported.
|
|
90
|
+
```
|
|
91
|
+
s3_dir = S3Location('s3://my-bucket/path/to/dir')
|
|
92
|
+
print(s3_dir.is_dir()) # True
|
|
93
|
+
print(s3_dir.is_object()) # False
|
|
94
|
+
s3.download(s3_dir, 'local_dir/') # raises ValueError because s3_dir is not a single S3 object
|
|
95
|
+
s3.download(s3_dir, 'local_dir/', recursive=True) # downloads all objects to the `local_dir` directory (which is created if it does not already exist)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Direct read/write functions are also available: `s3.read`, `s3.write_to_s3` (which actually creates a local temp file,
|
|
99
|
+
which is removed afterwards), and `s3.touch`.
|
|
100
|
+
|
|
101
|
+
Convenient object and directory listing methods are available:
|
|
102
|
+
* `s3.list_objects`: list all objects with the specified prefix
|
|
103
|
+
* `s3.list_dir`: List objects and directories only up to the specified depth (default: 1). S3 does not actually have a
|
|
104
|
+
directory structure, but this function works as if it does.
|
|
105
|
+
|
|
106
|
+
### Easy key-based caching utilities, for local, S3, and multi-location caches
|
|
107
|
+
|
|
108
|
+
To quickly and easily cache data, and allow for such a cache to be synced across devices, this package includes `Cache`
|
|
109
|
+
classes, which include `LocalDiskCache` and `S3Cache` subclasses, as well as a `MultiCache` which can utilize multiple
|
|
110
|
+
cache locations. Caching is key-value based, with customizable serializers that can store objects with `pickle` or
|
|
111
|
+
alternative data/file formats.
|
|
112
|
+
|
|
113
|
+
A helper function, `setup_cache`, provides a simple interface to create a `Cache` object with the default
|
|
114
|
+
`PickleSerializer` serializer:
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
from yes3.caching import setup_cache
|
|
118
|
+
|
|
119
|
+
local_cache = setup_cache('path/to/cache/dir')
|
|
120
|
+
s3_cache = setup_cache('s3://my-bucket/cache/dir/prefix')
|
|
121
|
+
|
|
122
|
+
if 'data' in s3_cache:
|
|
123
|
+
data = s3_cache['data']
|
|
124
|
+
else:
|
|
125
|
+
data = expensive_data_processing(args)
|
|
126
|
+
if 'data' not in local_cache:
|
|
127
|
+
local_cache['data'] = data
|
|
128
|
+
|
|
129
|
+
multi_cache = MultiCache([local_cache, s3_cache])
|
|
130
|
+
multi_cache.sync_now() # Add any data missing found in either cache to the one in which it is missing
|
|
131
|
+
multi_cache.sync_always() # Keep the caches synced moving forward
|
|
132
|
+
|
|
133
|
+
new_data = get_more_data()
|
|
134
|
+
multi_cache.put('new_data', new_data)
|
|
135
|
+
print('new_data' in local_cache) # True
|
|
136
|
+
print('new_data' in s3_cache) # True
|
|
137
|
+
|
|
138
|
+
from yes3 import s3
|
|
139
|
+
for loc in s3.list_objects(s3_cache.path):
|
|
140
|
+
print(loc.s3_uri)
|
|
141
|
+
# 's3://my-bucket/cache/dir/prefix/data.meta'
|
|
142
|
+
# 's3://my-bucket/cache/dir/prefix/data.pkl'
|
|
143
|
+
# 's3://my-bucket/cache/dir/prefix/new_data.meta'
|
|
144
|
+
# 's3://my-bucket/cache/dir/prefix/new_data.pkl'
|
|
145
|
+
```
|
yes3-0.0.1/README.md
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# `yes3`
|
|
2
|
+
|
|
3
|
+
## A library for intuitive reading, writing, listing, and caching with AWS S3 (Simple Storage Service).
|
|
4
|
+
|
|
5
|
+
This library wraps the `boto3` S3 API boilerplate with a simple and intuitive interface, path flexibility, and powerful
|
|
6
|
+
utilities for easily listing, reading, and writing data on/from/to S3.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
Using a virtual environment is recommended. The simplest way to install is with pip: `pip install yes3`. To install the
|
|
11
|
+
latest version, you can install from github: `pip install git+https://github.com/eschombu/yes3.git`.
|
|
12
|
+
|
|
13
|
+
To run tests and test scripts, and/or contribute to `yes3`, clone this repository from
|
|
14
|
+
https://github.com/eschombu/yes3.git, and install the dev requirements:
|
|
15
|
+
```
|
|
16
|
+
git clone https://github.com/eschombu/yes3.git
|
|
17
|
+
cd yes3
|
|
18
|
+
# Optionally create a virtual environment:
|
|
19
|
+
# python3.1x -m venv .venv/yes3
|
|
20
|
+
# source .venv/yes3/bin/activate
|
|
21
|
+
pip install -e .[dev]
|
|
22
|
+
pytest
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## TODO
|
|
26
|
+
|
|
27
|
+
1. Documentation
|
|
28
|
+
2. Replace message printing with loggers
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
### S3 Locations and Paths
|
|
33
|
+
|
|
34
|
+
The `boto3` APIs for S3 typically consider the 'bucket' and 'key' of an S3 object:
|
|
35
|
+
```
|
|
36
|
+
import boto3
|
|
37
|
+
s3_client = boto3.client('s3')
|
|
38
|
+
s3_client.download_file('my-bucket', 'key/to/object', 'path/to/local/file')
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
The awscli uses urls:
|
|
42
|
+
```
|
|
43
|
+
aws s3 cp s3://my-bucket/key/to/object path/to/local/file
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
In `yes3`, we accept either, attempting to flexibly interpret input arguments as S3 locations and local paths,
|
|
47
|
+
converting S3 locations into `S3Location` objects:
|
|
48
|
+
```
|
|
49
|
+
from yes3 import s3, S3Location
|
|
50
|
+
|
|
51
|
+
# The following download calls are equivalent
|
|
52
|
+
s3.download('s3://my-bucket/key/to/object', 'path/to/local/file')
|
|
53
|
+
s3.download('my-bucket', 'key/to/object', 'path/to/local/file')
|
|
54
|
+
|
|
55
|
+
s3_loc = S3Location('s3://my-bucket/key/to/object')
|
|
56
|
+
print(s3_loc.bucket) # 'my-bucket'
|
|
57
|
+
print(s3_loc.key) # 'key/to/object'
|
|
58
|
+
print(s3_loc.exists()) # True
|
|
59
|
+
print(s3_loc.is_bucket()) # False
|
|
60
|
+
print(s3_loc.is_dir()) # False
|
|
61
|
+
print(s3_loc.is_object()) # True
|
|
62
|
+
s3.download(s3_loc, 'path/to/local/file')
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
If the local path is to a directory, the object will be downloaded with the filename inferred from the S3 path.
|
|
66
|
+
Recursive downloads are also supported.
|
|
67
|
+
```
|
|
68
|
+
s3_dir = S3Location('s3://my-bucket/path/to/dir')
|
|
69
|
+
print(s3_dir.is_dir()) # True
|
|
70
|
+
print(s3_dir.is_object()) # False
|
|
71
|
+
s3.download(s3_dir, 'local_dir/') # raises ValueError because s3_dir is not a single S3 object
|
|
72
|
+
s3.download(s3_dir, 'local_dir/', recursive=True) # downloads all objects to the `local_dir` directory (which is created if it does not already exist)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Direct read/write functions are also available: `s3.read`, `s3.write_to_s3` (which actually creates a local temp file,
|
|
76
|
+
which is removed afterwards), and `s3.touch`.
|
|
77
|
+
|
|
78
|
+
Convenient object and directory listing methods are available:
|
|
79
|
+
* `s3.list_objects`: list all objects with the specified prefix
|
|
80
|
+
* `s3.list_dir`: List objects and directories only up to the specified depth (default: 1). S3 does not actually have a
|
|
81
|
+
directory structure, but this function works as if it does.
|
|
82
|
+
|
|
83
|
+
### Easy key-based caching utilities, for local, S3, and multi-location caches
|
|
84
|
+
|
|
85
|
+
To quickly and easily cache data, and allow for such a cache to be synced across devices, this package includes `Cache`
|
|
86
|
+
classes, which include `LocalDiskCache` and `S3Cache` subclasses, as well as a `MultiCache` which can utilize multiple
|
|
87
|
+
cache locations. Caching is key-value based, with customizable serializers that can store objects with `pickle` or
|
|
88
|
+
alternative data/file formats.
|
|
89
|
+
|
|
90
|
+
A helper function, `setup_cache`, provides a simple interface to create a `Cache` object with the default
|
|
91
|
+
`PickleSerializer` serializer:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
from yes3.caching import setup_cache
|
|
95
|
+
|
|
96
|
+
local_cache = setup_cache('path/to/cache/dir')
|
|
97
|
+
s3_cache = setup_cache('s3://my-bucket/cache/dir/prefix')
|
|
98
|
+
|
|
99
|
+
if 'data' in s3_cache:
|
|
100
|
+
data = s3_cache['data']
|
|
101
|
+
else:
|
|
102
|
+
data = expensive_data_processing(args)
|
|
103
|
+
if 'data' not in local_cache:
|
|
104
|
+
local_cache['data'] = data
|
|
105
|
+
|
|
106
|
+
multi_cache = MultiCache([local_cache, s3_cache])
|
|
107
|
+
multi_cache.sync_now() # Add any data missing found in either cache to the one in which it is missing
|
|
108
|
+
multi_cache.sync_always() # Keep the caches synced moving forward
|
|
109
|
+
|
|
110
|
+
new_data = get_more_data()
|
|
111
|
+
multi_cache.put('new_data', new_data)
|
|
112
|
+
print('new_data' in local_cache) # True
|
|
113
|
+
print('new_data' in s3_cache) # True
|
|
114
|
+
|
|
115
|
+
from yes3 import s3
|
|
116
|
+
for loc in s3.list_objects(s3_cache.path):
|
|
117
|
+
print(loc.s3_uri)
|
|
118
|
+
# 's3://my-bucket/cache/dir/prefix/data.meta'
|
|
119
|
+
# 's3://my-bucket/cache/dir/prefix/data.pkl'
|
|
120
|
+
# 's3://my-bucket/cache/dir/prefix/new_data.meta'
|
|
121
|
+
# 's3://my-bucket/cache/dir/prefix/new_data.pkl'
|
|
122
|
+
```
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "yes3"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Erik Schomburg", email="eschomburg@gmail.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "A simple, intuitive, flexible interface for Amazon S3"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"boto3",
|
|
21
|
+
"numpy",
|
|
22
|
+
"pandas",
|
|
23
|
+
"tqdm",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [
|
|
28
|
+
"flake8",
|
|
29
|
+
"moto",
|
|
30
|
+
"pytest",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[tool.pytest.ini_options]
|
|
34
|
+
pythonpath = ["src"]
|
|
35
|
+
testpaths = ["tests"]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/eschombu/yes3"
|
|
39
|
+
Issues = "https://github.com/eschombu/yes3/issues"
|
|
40
|
+
|
|
41
|
+
|
yes3-0.0.1/setup.cfg
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .s3 import S3Location, is_s3_url
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from .base import Cache, CacheCore, CachedItemMeta, Serializer, check_meta_mismatches
|
|
2
|
+
from .local_cache import LocalDiskCache
|
|
3
|
+
from .memory_cache import MemoryCache
|
|
4
|
+
from .multi_cache import MultiCache
|
|
5
|
+
from .s3_cache import S3Cache
|
|
6
|
+
from .setup_helpers import setup_cache
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
from abc import ABCMeta, abstractmethod
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime, UTC
|
|
5
|
+
from typing import Iterable, Iterator, Optional, Self
|
|
6
|
+
|
|
7
|
+
_NotSpecified = object()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def raise_not_found(key) -> KeyError:
|
|
11
|
+
raise KeyError(f"key '{key}' not found in cache")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class CachedItemMeta:
|
|
16
|
+
key: str
|
|
17
|
+
path: Optional[str]
|
|
18
|
+
size: Optional[int]
|
|
19
|
+
timestamp: Optional[datetime]
|
|
20
|
+
|
|
21
|
+
_ts_format = '%Y-%m-%d %H:%M:%S.%f %z'
|
|
22
|
+
|
|
23
|
+
def __post_init__(self):
|
|
24
|
+
if isinstance(self.timestamp, float):
|
|
25
|
+
self.timestamp = datetime.fromtimestamp(self.timestamp, UTC)
|
|
26
|
+
if isinstance(self.timestamp, str):
|
|
27
|
+
self.timestamp = datetime.strptime(self.timestamp, self._ts_format)
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict:
|
|
30
|
+
return {
|
|
31
|
+
'key': self.key,
|
|
32
|
+
'path': self.path,
|
|
33
|
+
'size': self.size,
|
|
34
|
+
'timestamp': self.timestamp.strftime(self._ts_format) if self.timestamp else None,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class CacheCore(metaclass=ABCMeta):
|
|
39
|
+
def __init__(self, active=True, read_only=False):
|
|
40
|
+
self._read_only = read_only
|
|
41
|
+
self._active = active
|
|
42
|
+
|
|
43
|
+
@abstractmethod
|
|
44
|
+
def __contains__(self, key):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def get(self, key, default=_NotSpecified):
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def get_meta(self, key) -> CachedItemMeta:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def put(self, key, obj, update=False, meta: Optional[CachedItemMeta] = None):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def remove(self, key):
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def keys(self):
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def __getitem__(self, key: str):
|
|
68
|
+
return self.get(key)
|
|
69
|
+
|
|
70
|
+
def __setitem__(self, key: str, obj) -> None:
|
|
71
|
+
self.put(key, obj)
|
|
72
|
+
|
|
73
|
+
def __delitem__(self, key: str) -> None:
|
|
74
|
+
self.remove(key)
|
|
75
|
+
|
|
76
|
+
def is_active(self) -> bool:
|
|
77
|
+
return self._active
|
|
78
|
+
|
|
79
|
+
def activate(self):
|
|
80
|
+
self._active = True
|
|
81
|
+
return self
|
|
82
|
+
|
|
83
|
+
def deactivate(self):
|
|
84
|
+
self._active = False
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
def is_read_only(self) -> bool:
|
|
88
|
+
return self._read_only
|
|
89
|
+
|
|
90
|
+
def set_read_only(self, value: bool) -> Self:
|
|
91
|
+
self._read_only = value
|
|
92
|
+
return self
|
|
93
|
+
|
|
94
|
+
def update(self, key: str, obj):
|
|
95
|
+
if key not in self:
|
|
96
|
+
raise_not_found(key)
|
|
97
|
+
self.put(key, obj, update=True)
|
|
98
|
+
|
|
99
|
+
def pop(self, key: str, default=_NotSpecified):
|
|
100
|
+
obj = self.get(key, default=default)
|
|
101
|
+
self.remove(key)
|
|
102
|
+
return obj
|
|
103
|
+
|
|
104
|
+
def list(self) -> dict[str, CachedItemMeta]:
|
|
105
|
+
items_meta = {}
|
|
106
|
+
for key in self.keys():
|
|
107
|
+
items_meta[key] = self.get_meta(key)
|
|
108
|
+
return items_meta
|
|
109
|
+
|
|
110
|
+
def subcache(self, *args, **kwargs) -> Self:
|
|
111
|
+
raise NotImplementedError(f"`subcache` method is not defined for class {type(self).__name__}")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class CacheReaderWriter(metaclass=ABCMeta):
|
|
115
|
+
@abstractmethod
|
|
116
|
+
def read(self, key: str):
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
def get_meta(self, key: str) -> CachedItemMeta:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def write(self, key: str, obj, meta=None) -> CachedItemMeta:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
@abstractmethod
|
|
128
|
+
def delete(self, key: str, meta_only=False):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class CacheCatalog(metaclass=ABCMeta):
|
|
133
|
+
@abstractmethod
|
|
134
|
+
def contains(self, key: str):
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def add(self, key: str, info: CachedItemMeta):
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
@abstractmethod
|
|
142
|
+
def get(self, key: str) -> CachedItemMeta:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
@abstractmethod
|
|
146
|
+
def remove(self, key: str):
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
@abstractmethod
|
|
150
|
+
def keys(self):
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def items(self):
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
_CatalogT = dict[str, CachedItemMeta]
|
|
159
|
+
_CatalogBuilderT = Callable[[], _CatalogT]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class CacheDictCatalog(CacheCatalog):
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
catalog: Optional[dict[str, CachedItemMeta]] = None,
|
|
166
|
+
catalog_builder: Optional[_CatalogBuilderT] = None,
|
|
167
|
+
):
|
|
168
|
+
self._catalog = catalog
|
|
169
|
+
if catalog_builder is None:
|
|
170
|
+
catalog_builder = dict
|
|
171
|
+
self._build_catalog = catalog_builder
|
|
172
|
+
if self._catalog is None:
|
|
173
|
+
self.rebuild()
|
|
174
|
+
|
|
175
|
+
def rebuild(self):
|
|
176
|
+
self._catalog = self._build_catalog().copy()
|
|
177
|
+
|
|
178
|
+
def contains(self, key: str):
|
|
179
|
+
return str(key) in self._catalog
|
|
180
|
+
|
|
181
|
+
def add(self, key: str, meta: CachedItemMeta):
|
|
182
|
+
self._catalog[str(key)] = meta
|
|
183
|
+
|
|
184
|
+
def get(self, key: str) -> CachedItemMeta:
|
|
185
|
+
return self._catalog[str(key)]
|
|
186
|
+
|
|
187
|
+
def remove(self, key: str):
|
|
188
|
+
self._catalog.pop(str(key))
|
|
189
|
+
|
|
190
|
+
def keys(self):
|
|
191
|
+
return list(self._catalog.keys())
|
|
192
|
+
|
|
193
|
+
def items(self) -> Iterator[tuple[str, CachedItemMeta]]:
|
|
194
|
+
return iter(self._catalog.items())
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class Cache(CacheCore, metaclass=ABCMeta):
|
|
198
|
+
def __init__(self, catalog: CacheCatalog, reader_writer: CacheReaderWriter, active=True, read_only=False):
|
|
199
|
+
super().__init__(active=active, read_only=read_only)
|
|
200
|
+
self._catalog = catalog
|
|
201
|
+
self._reader_writer = reader_writer
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
@abstractmethod
|
|
205
|
+
def create(cls, *args, **kwargs):
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
def __contains__(self, key: str) -> bool:
|
|
209
|
+
if not self.is_active():
|
|
210
|
+
return False
|
|
211
|
+
return self._catalog.contains(key)
|
|
212
|
+
|
|
213
|
+
def get(self, key: str, default=_NotSpecified):
|
|
214
|
+
if not self.is_active() or key not in self:
|
|
215
|
+
if default is _NotSpecified:
|
|
216
|
+
raise_not_found(key)
|
|
217
|
+
else:
|
|
218
|
+
return default
|
|
219
|
+
return self._reader_writer.read(key)
|
|
220
|
+
|
|
221
|
+
def get_meta(self, key: str) -> CachedItemMeta:
|
|
222
|
+
if not self.is_active() or key not in self:
|
|
223
|
+
raise_not_found(key)
|
|
224
|
+
return self._catalog.get(key)
|
|
225
|
+
|
|
226
|
+
def put(self, key: str, obj, *, update=False, meta: Optional[CachedItemMeta] = None) -> Self:
|
|
227
|
+
if self.is_read_only():
|
|
228
|
+
raise TypeError('Cache is in read only mode')
|
|
229
|
+
if self.is_active():
|
|
230
|
+
if key in self and not update:
|
|
231
|
+
raise ValueError(f"key '{key}' already exists in cache; use 'update' to overwrite")
|
|
232
|
+
meta = self._reader_writer.write(key, obj, meta=meta)
|
|
233
|
+
self._catalog.add(key, meta)
|
|
234
|
+
else:
|
|
235
|
+
print(f'WARNING: {type(self).__name__} is not active')
|
|
236
|
+
return self
|
|
237
|
+
|
|
238
|
+
def remove(self, key: str, meta_only=False) -> Self:
|
|
239
|
+
if self.is_active() and key in self:
|
|
240
|
+
if self.is_read_only():
|
|
241
|
+
raise TypeError('Cache is in read only mode')
|
|
242
|
+
self._catalog.remove(key)
|
|
243
|
+
self._reader_writer.delete(key, meta_only=meta_only)
|
|
244
|
+
return self
|
|
245
|
+
|
|
246
|
+
def remove_meta(self, key: str) -> Self:
|
|
247
|
+
return self.remove(key, meta_only=True)
|
|
248
|
+
|
|
249
|
+
def keys(self) -> list[str]:
|
|
250
|
+
if not self.is_active():
|
|
251
|
+
return []
|
|
252
|
+
else:
|
|
253
|
+
return list(self._catalog.keys())
|
|
254
|
+
|
|
255
|
+
def _repr_params(self) -> list[str]:
|
|
256
|
+
params = [f'{len(self.keys())} items']
|
|
257
|
+
if not self.is_active():
|
|
258
|
+
params.append('NOT ACTIVE')
|
|
259
|
+
if self.is_read_only():
|
|
260
|
+
params.append('READ ONLY')
|
|
261
|
+
return params
|
|
262
|
+
|
|
263
|
+
def __repr__(self):
|
|
264
|
+
return f"{type(self).__name__}({', '.join(self._repr_params())})"
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def check_meta_mismatches(caches: Iterable[CacheCore], key=None) -> dict[str, tuple[CachedItemMeta, ...]]:
|
|
268
|
+
if key is not None and not isinstance(key, str):
|
|
269
|
+
raise TypeError('key is not a string')
|
|
270
|
+
for cache in caches:
|
|
271
|
+
if not isinstance(cache, CacheCore):
|
|
272
|
+
raise TypeError('caches must be an iterable containing Cache instances')
|
|
273
|
+
mismatches = {}
|
|
274
|
+
if key is None:
|
|
275
|
+
keys = set(k for cache in caches for k in cache.keys())
|
|
276
|
+
else:
|
|
277
|
+
keys = [key]
|
|
278
|
+
for k in keys:
|
|
279
|
+
metas = [cache.get_meta(k) for cache in caches if k in cache]
|
|
280
|
+
if len(metas) > 1:
|
|
281
|
+
first_meta = metas[0]
|
|
282
|
+
if any(meta != first_meta for meta in metas[1:]):
|
|
283
|
+
mismatches[k] = tuple(metas)
|
|
284
|
+
return mismatches
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class Serializer(metaclass=ABCMeta):
|
|
288
|
+
default_ext = None
|
|
289
|
+
|
|
290
|
+
def __init__(self, ext=None):
|
|
291
|
+
if ext is None:
|
|
292
|
+
self.ext = self.default_ext
|
|
293
|
+
else:
|
|
294
|
+
self.ext = ext
|
|
295
|
+
|
|
296
|
+
@abstractmethod
|
|
297
|
+
def read(self, path):
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
@abstractmethod
|
|
301
|
+
def write(self, path, obj):
|
|
302
|
+
pass
|