toolchemy 0.2.185__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolchemy/__main__.py +9 -0
- toolchemy/ai/clients/__init__.py +20 -0
- toolchemy/ai/clients/common.py +429 -0
- toolchemy/ai/clients/dummy_model_client.py +61 -0
- toolchemy/ai/clients/factory.py +37 -0
- toolchemy/ai/clients/gemini_client.py +48 -0
- toolchemy/ai/clients/ollama_client.py +58 -0
- toolchemy/ai/clients/openai_client.py +76 -0
- toolchemy/ai/clients/pricing.py +66 -0
- toolchemy/ai/clients/whisper_client.py +141 -0
- toolchemy/ai/prompter.py +124 -0
- toolchemy/ai/trackers/__init__.py +5 -0
- toolchemy/ai/trackers/common.py +216 -0
- toolchemy/ai/trackers/mlflow_tracker.py +221 -0
- toolchemy/ai/trackers/neptune_tracker.py +135 -0
- toolchemy/db/lightdb.py +260 -0
- toolchemy/utils/__init__.py +19 -0
- toolchemy/utils/at_exit_collector.py +109 -0
- toolchemy/utils/cacher/__init__.py +20 -0
- toolchemy/utils/cacher/cacher_diskcache.py +121 -0
- toolchemy/utils/cacher/cacher_pickle.py +152 -0
- toolchemy/utils/cacher/cacher_shelve.py +196 -0
- toolchemy/utils/cacher/common.py +174 -0
- toolchemy/utils/datestimes.py +77 -0
- toolchemy/utils/locations.py +111 -0
- toolchemy/utils/logger.py +76 -0
- toolchemy/utils/timer.py +23 -0
- toolchemy/utils/utils.py +168 -0
- toolchemy/vision/__init__.py +5 -0
- toolchemy/vision/caption_overlay.py +77 -0
- toolchemy/vision/image.py +89 -0
- toolchemy-0.2.185.dist-info/METADATA +25 -0
- toolchemy-0.2.185.dist-info/RECORD +36 -0
- toolchemy-0.2.185.dist-info/WHEEL +4 -0
- toolchemy-0.2.185.dist-info/entry_points.txt +3 -0
- toolchemy-0.2.185.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import atexit
|
|
3
|
+
import collections
|
|
4
|
+
|
|
5
|
+
from toolchemy.utils.logger import get_logger
|
|
6
|
+
from toolchemy.utils.utils import pp
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ICollectable(ABC):
|
|
10
|
+
FIELD_NAME_KEY = "name"
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def label(self) -> str:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def collect(self) -> dict:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AtExitCollector:
|
|
22
|
+
_collectables: list[ICollectable] = []
|
|
23
|
+
_collector_logger = get_logger()
|
|
24
|
+
_is_enabled = False
|
|
25
|
+
_is_registered = False
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def enable(cls):
|
|
29
|
+
cls._is_enabled = True
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def disable(cls):
|
|
33
|
+
cls._is_enabled = False
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def reset(cls):
|
|
37
|
+
cls._collectables = []
|
|
38
|
+
atexit.unregister(cls._collector_summary)
|
|
39
|
+
cls._is_registered = False
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def register(cls, collectable: ICollectable):
|
|
43
|
+
if not cls._is_enabled:
|
|
44
|
+
return
|
|
45
|
+
if not cls._is_registered:
|
|
46
|
+
atexit.register(cls._collector_summary)
|
|
47
|
+
cls._collector_logger.info("AtExitCollector registered.")
|
|
48
|
+
cls._is_registered = True
|
|
49
|
+
cls._collector_logger.info(f"Registering collectable: {collectable} (type: {type(collectable)})")
|
|
50
|
+
assert isinstance(collectable, ICollectable), f"Expected ICollectable, got {type(collectable)}"
|
|
51
|
+
cls._collectables.append(collectable)
|
|
52
|
+
for c in cls._collectables:
|
|
53
|
+
cls._collector_logger.info(f"Registered collectable: {c} (type: {c})")
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def _collector_summary(cls) -> None:
|
|
57
|
+
if not cls._is_enabled:
|
|
58
|
+
return
|
|
59
|
+
if len(cls._collectables) == 0:
|
|
60
|
+
cls._collector_logger.info(f"No collectable registered, skipping AtExitCollector summary.")
|
|
61
|
+
return
|
|
62
|
+
cls._collector_logger.info("AtExitCollector| generating summary...")
|
|
63
|
+
aggregated = {}
|
|
64
|
+
for collectable in cls._collectables:
|
|
65
|
+
try:
|
|
66
|
+
data = collectable.collect()
|
|
67
|
+
except TypeError as e:
|
|
68
|
+
cls._collector_logger.error(f"Collectable of wrong type: {type(collectable)} (is ICollectable: {isinstance(collectable, ICollectable)}): {collectable} (err msg: {e})")
|
|
69
|
+
raise e
|
|
70
|
+
|
|
71
|
+
name = collectable.label()
|
|
72
|
+
if name not in aggregated:
|
|
73
|
+
aggregated[name] = {
|
|
74
|
+
"instances": 0,
|
|
75
|
+
}
|
|
76
|
+
for k, v in data.items():
|
|
77
|
+
if not isinstance(v, (int, float)):
|
|
78
|
+
continue
|
|
79
|
+
if k not in aggregated[name]:
|
|
80
|
+
aggregated[name][k] = 0
|
|
81
|
+
aggregated[name][k] += v
|
|
82
|
+
aggregated[name]["instances"] += 1
|
|
83
|
+
|
|
84
|
+
cls._collector_logger.info(f"AtExitCollector| summary for {name}:\n{pp(data, print_msg=False)}")
|
|
85
|
+
|
|
86
|
+
averages = {}
|
|
87
|
+
for instance_name, instance_data in aggregated.items():
|
|
88
|
+
if instance_name not in averages:
|
|
89
|
+
averages[instance_name] = {}
|
|
90
|
+
for k, v in instance_data.items():
|
|
91
|
+
if k == "instances":
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
avg_key = f"avg_{k}"
|
|
95
|
+
averages[instance_name][avg_key] = v / instance_data["instances"]
|
|
96
|
+
|
|
97
|
+
cls._update_dict(aggregated, averages)
|
|
98
|
+
|
|
99
|
+
cls._collector_logger.info(f"AtExitCollector| aggregated summary:\n{pp(aggregated, print_msg=False)}")
|
|
100
|
+
cls._collector_logger.info("AtExitCollector| summary generation DONE")
|
|
101
|
+
|
|
102
|
+
@classmethod
|
|
103
|
+
def _update_dict(cls, d, u):
|
|
104
|
+
for k, v in u.items():
|
|
105
|
+
if isinstance(v, collections.abc.Mapping):
|
|
106
|
+
d[k] = cls._update_dict(d.get(k, {}), v)
|
|
107
|
+
else:
|
|
108
|
+
d[k] = v
|
|
109
|
+
return d
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from .common import ICacher, BaseCacher, CacheEntryDoesNotExistError, DummyCacher
|
|
2
|
+
from .cacher_pickle import CacherPickle
|
|
3
|
+
from .cacher_shelve import CacherShelve
|
|
4
|
+
from .cacher_diskcache import CacherDiskcache
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Cacher(CacherDiskcache):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ICacher",
|
|
13
|
+
"BaseCacher",
|
|
14
|
+
"Cacher",
|
|
15
|
+
"CacherPickle",
|
|
16
|
+
"CacherShelve",
|
|
17
|
+
"CacherDiskcache",
|
|
18
|
+
"DummyCacher",
|
|
19
|
+
"CacheEntryDoesNotExistError",
|
|
20
|
+
]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Optional, Any
|
|
5
|
+
from diskcache import Cache
|
|
6
|
+
|
|
7
|
+
from toolchemy.utils.cacher.common import BaseCacher, CacheEntryDoesNotExistError, CacheEntryHasNotBeenSetError, CacherInitializationError
|
|
8
|
+
from toolchemy.utils.logger import get_logger
|
|
9
|
+
from toolchemy.utils.locations import get_external_caller_path
|
|
10
|
+
from toolchemy.utils.utils import _caller_module_name
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CacherDiskcache(BaseCacher):
|
|
14
|
+
def __init__(self, name: str | None = None, cache_base_dir: Optional[str] = None, disabled: bool = False,
|
|
15
|
+
log_level: int = logging.INFO):
|
|
16
|
+
super().__init__()
|
|
17
|
+
self._disabled = disabled
|
|
18
|
+
|
|
19
|
+
self._log_level = log_level
|
|
20
|
+
self._logger = get_logger(level=self._log_level)
|
|
21
|
+
|
|
22
|
+
self._name = name
|
|
23
|
+
if not self._name:
|
|
24
|
+
self._name = _caller_module_name()
|
|
25
|
+
|
|
26
|
+
self._cache_base_dir = cache_base_dir
|
|
27
|
+
if self._cache_base_dir is None:
|
|
28
|
+
self._cache_base_dir = get_external_caller_path()
|
|
29
|
+
|
|
30
|
+
self._cache_dir = os.path.join(self._cache_base_dir, self.CACHER_MAIN_NAME, self._name)
|
|
31
|
+
|
|
32
|
+
if self._disabled:
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
self._cache = Cache(self._cache_dir, cull_limit=0, size_limit=2**38)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
raise CacherInitializationError(f"Failed to initialize disk cache for name '{self._name}' (cache dir: '{self._cache_dir}')") from e
|
|
39
|
+
|
|
40
|
+
self._logger.debug(
|
|
41
|
+
f"Cacher '{self._name}' initialized (cache path: '{self._cache_dir}', log_level: '{logging.getLevelName(log_level)}')")
|
|
42
|
+
self._logger.debug(f"Cacher logging DEBUG level enabled")
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def cache_location(self) -> str:
|
|
46
|
+
return self._cache_dir
|
|
47
|
+
|
|
48
|
+
def sub_cacher(self, log_level: int | None = None, suffix: str | None = None) -> "ICacher":
|
|
49
|
+
name = _caller_module_name()
|
|
50
|
+
if suffix:
|
|
51
|
+
name += f"__{suffix}"
|
|
52
|
+
if log_level is None:
|
|
53
|
+
log_level = self._log_level
|
|
54
|
+
self._logger.debug(f"Creating sub cacher")
|
|
55
|
+
self._logger.debug(f"> base cache dir: {self._cache_dir}")
|
|
56
|
+
self._logger.debug(f"> name: {name}")
|
|
57
|
+
self._logger.debug(f"> log level: {log_level} ({logging.getLevelName(log_level)})")
|
|
58
|
+
self._logger.debug(f"> is disabled: {self._disabled})")
|
|
59
|
+
|
|
60
|
+
return CacherDiskcache(name=os.path.join(self._name, name).strip("/"),
|
|
61
|
+
cache_base_dir=self._cache_base_dir,
|
|
62
|
+
log_level=log_level, disabled=self._disabled)
|
|
63
|
+
|
|
64
|
+
def _exists(self, name: str) -> bool:
|
|
65
|
+
if self._disabled:
|
|
66
|
+
self._logger.debug("Cacher disabled")
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
if name in self._cache:
|
|
70
|
+
return True
|
|
71
|
+
self._logger.debug("Cache entry %s::%s does not exist", self._cache_dir, name)
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def set(self, name: str, content: Any, ttl_s: int | None = None):
|
|
75
|
+
"""
|
|
76
|
+
Dumps a given object under a given cache entry name. The object must be pickleable.
|
|
77
|
+
"""
|
|
78
|
+
if self._disabled:
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
result = self._cache.set(name, content, expire=ttl_s)
|
|
82
|
+
does_exist = self.exists(name)
|
|
83
|
+
if not result or not does_exist:
|
|
84
|
+
self._logger.error(f"Cache entry '{name}' not set for name '{self._name}' ({result}, {does_exist})")
|
|
85
|
+
self._logger.error(f"> cache dir: {self._cache_dir}")
|
|
86
|
+
self._logger.error(f"> type of the content: {type(content)}")
|
|
87
|
+
self._logger.error(f"> size of the content: {sys.getsizeof(content)}")
|
|
88
|
+
raise CacheEntryHasNotBeenSetError()
|
|
89
|
+
|
|
90
|
+
self._logger.debug("Cache set %s::%s", self._cache_dir, name)
|
|
91
|
+
|
|
92
|
+
def get(self, name: str) -> Any:
|
|
93
|
+
"""
|
|
94
|
+
Loads an object for a given cache entry name. If it doesn't exist an exception is thrown.
|
|
95
|
+
"""
|
|
96
|
+
if self._disabled:
|
|
97
|
+
raise CacheEntryDoesNotExistError(f"Caching is disabled...")
|
|
98
|
+
|
|
99
|
+
self._logger.debug("Cache get: %s::%s", self._cache_dir, name)
|
|
100
|
+
|
|
101
|
+
if name in self._cache:
|
|
102
|
+
return self._cache.get(name)
|
|
103
|
+
|
|
104
|
+
raise CacheEntryDoesNotExistError(f"Cache does not exist: {self._cache_dir}::{name}.")
|
|
105
|
+
|
|
106
|
+
def unset(self, name: str):
|
|
107
|
+
"""
|
|
108
|
+
Removes a cache entry for a given name
|
|
109
|
+
"""
|
|
110
|
+
self._logger.debug("Cache unset: %s::%s", self._cache_dir, name)
|
|
111
|
+
if self._disabled:
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
if name in self._cache:
|
|
115
|
+
del self._cache[name]
|
|
116
|
+
self._logger.debug("Cache entry %s::%s removed", self._name, name)
|
|
117
|
+
else:
|
|
118
|
+
self._logger.warning("Cache entry %s::%s does not exist, nothing to remove", self._name, name)
|
|
119
|
+
|
|
120
|
+
def persist(self):
|
|
121
|
+
self._cache.close()
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import pickle
|
|
4
|
+
import threading
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional, Any
|
|
7
|
+
import shutil
|
|
8
|
+
|
|
9
|
+
from toolchemy.utils.cacher.common import BaseCacher, DummyLock, CacheEntryDoesNotExistError
|
|
10
|
+
from toolchemy.utils.logger import get_logger
|
|
11
|
+
from toolchemy.utils.locations import get_external_caller_path
|
|
12
|
+
from toolchemy.utils.utils import _caller_module_name
|
|
13
|
+
from toolchemy.utils.datestimes import current_unix_timestamp
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CacherPickle(BaseCacher):
|
|
17
|
+
"""
|
|
18
|
+
Cacher implementation where cache is stored as a pickled local file
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
CACHER_MAIN_NAME = ".cache"
|
|
22
|
+
|
|
23
|
+
def __init__(self, name: str | None = None, cache_base_dir: Optional[str] = None, disabled: bool = False,
|
|
24
|
+
log_level: int = logging.INFO, enable_thread_safeness: bool = False):
|
|
25
|
+
"""
|
|
26
|
+
Initialize cache with its name. It creates .cache/name subdir in cache_base_dir directory
|
|
27
|
+
"""
|
|
28
|
+
super().__init__()
|
|
29
|
+
self._disabled = disabled
|
|
30
|
+
if enable_thread_safeness:
|
|
31
|
+
self._lock = threading.Lock()
|
|
32
|
+
else:
|
|
33
|
+
self._lock = DummyLock()
|
|
34
|
+
self._log_level = log_level
|
|
35
|
+
self._logger = get_logger(level=self._log_level)
|
|
36
|
+
|
|
37
|
+
self._name = name
|
|
38
|
+
if not self._name:
|
|
39
|
+
self._name = _caller_module_name()
|
|
40
|
+
|
|
41
|
+
self._cache_base_dir = cache_base_dir
|
|
42
|
+
if self._cache_base_dir is None:
|
|
43
|
+
self._cache_base_dir = get_external_caller_path()
|
|
44
|
+
|
|
45
|
+
self._cache_dir = os.path.join(self._cache_base_dir, self.CACHER_MAIN_NAME, self._name)
|
|
46
|
+
|
|
47
|
+
with self._lock:
|
|
48
|
+
os.makedirs(self._cache_dir, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
self._logger.debug(f"Cacher '{self._name}' initialized (cache dir: '{self._cache_dir}', log_level: '{logging.getLevelName(log_level)}')")
|
|
51
|
+
self._logger.debug(f"Cacher logging DEBUG level enabled")
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def cache_location(self) -> str:
|
|
55
|
+
return self._cache_dir
|
|
56
|
+
|
|
57
|
+
def sub_cacher(self, log_level: int | None = None, suffix: str | None = None) -> "ICacher":
|
|
58
|
+
name = _caller_module_name()
|
|
59
|
+
if suffix:
|
|
60
|
+
name += f"__{suffix}"
|
|
61
|
+
if log_level is None:
|
|
62
|
+
log_level = self._log_level
|
|
63
|
+
self._logger.debug(f"Creating sub cacher")
|
|
64
|
+
self._logger.debug(f"> base name: {self._name}")
|
|
65
|
+
self._logger.debug(f"> name: {name}")
|
|
66
|
+
self._logger.debug(f"> log level: {log_level} ({logging.getLevelName(log_level)})")
|
|
67
|
+
self._logger.debug(f"> is disabled: {self._disabled})")
|
|
68
|
+
return CacherPickle(name=os.path.join(self._name, name).strip("/"), cache_base_dir=self._cache_base_dir,
|
|
69
|
+
log_level=log_level, disabled=self._disabled)
|
|
70
|
+
|
|
71
|
+
def _exists(self, name: str) -> bool:
|
|
72
|
+
if self._disabled:
|
|
73
|
+
self._logger.debug("Cacher disabled")
|
|
74
|
+
return False
|
|
75
|
+
target_filename = self._cache_name(name)
|
|
76
|
+
target_file = Path(target_filename)
|
|
77
|
+
|
|
78
|
+
ret_val = False
|
|
79
|
+
if target_file.is_file() or target_file.is_symlink():
|
|
80
|
+
try:
|
|
81
|
+
entry = self._get(name)
|
|
82
|
+
if entry['ttl_s'] is None:
|
|
83
|
+
ret_val = True
|
|
84
|
+
else:
|
|
85
|
+
current_time = current_unix_timestamp()
|
|
86
|
+
if current_time - entry['timestamp'] < entry['ttl_s']:
|
|
87
|
+
ret_val = True
|
|
88
|
+
else:
|
|
89
|
+
self.unset(name)
|
|
90
|
+
ret_val = False
|
|
91
|
+
except CacheEntryDoesNotExistError:
|
|
92
|
+
return False
|
|
93
|
+
self._logger.debug("Cache entry %s::%s (%s) exists", self._name, name, target_filename)
|
|
94
|
+
|
|
95
|
+
self._logger.debug("Cache entry %s::%s does not exist", self._name, name)
|
|
96
|
+
return ret_val
|
|
97
|
+
|
|
98
|
+
def set(self, name: str, content: Any, ttl_s: int | None = None):
|
|
99
|
+
if self._disabled:
|
|
100
|
+
return
|
|
101
|
+
target_filename = self._cache_name(name)
|
|
102
|
+
with self._lock:
|
|
103
|
+
with open(target_filename, "wb") as file:
|
|
104
|
+
try:
|
|
105
|
+
pickle.dump(self._envelop(content, ttl_s=ttl_s), file) # type: ignore
|
|
106
|
+
except TypeError as e:
|
|
107
|
+
self._logger.error(f"Wrong type of the serialized content: {type(content)}. Target: {target_filename}. Content:\n{content}")
|
|
108
|
+
shutil.rmtree(target_filename)
|
|
109
|
+
raise e
|
|
110
|
+
self._logger.debug("Cache set %s::%s (file: %s)", self._name, name, target_filename)
|
|
111
|
+
|
|
112
|
+
def get(self, name: str) -> Any:
|
|
113
|
+
entry = self._get(name)
|
|
114
|
+
return entry["data"]
|
|
115
|
+
|
|
116
|
+
def _get(self, name: str) -> Any:
|
|
117
|
+
if self._disabled:
|
|
118
|
+
raise CacheEntryDoesNotExistError(f"Caching is disabled...")
|
|
119
|
+
target_filename = self._cache_name(name)
|
|
120
|
+
self._logger.debug("Cache get: %s::%s (file: %s)", self._name, name, target_filename)
|
|
121
|
+
target_file = Path(target_filename)
|
|
122
|
+
with self._lock:
|
|
123
|
+
if target_file.is_file() or target_file.is_symlink():
|
|
124
|
+
with open(target_filename, "rb") as file:
|
|
125
|
+
try:
|
|
126
|
+
restored_object = pickle.load(file, encoding="utf-8")
|
|
127
|
+
except ModuleNotFoundError as e:
|
|
128
|
+
self._logger.error(f"{e} while loading from file: '{target_filename}'")
|
|
129
|
+
raise e
|
|
130
|
+
if restored_object['ttl_s'] is not None:
|
|
131
|
+
current_time = current_unix_timestamp()
|
|
132
|
+
if current_time - restored_object['timestamp'] >= restored_object['ttl_s']:
|
|
133
|
+
self.unset(name)
|
|
134
|
+
raise CacheEntryDoesNotExistError(f"Cache does not exist: {self._name}::{name}. Path: {target_filename}")
|
|
135
|
+
return restored_object
|
|
136
|
+
raise CacheEntryDoesNotExistError(f"Cache does not exist: {self._name}::{name}. Path: {target_filename}")
|
|
137
|
+
|
|
138
|
+
def unset(self, name: str):
|
|
139
|
+
"""
|
|
140
|
+
Removes a cache entry for a given name
|
|
141
|
+
"""
|
|
142
|
+
target_filename = self._cache_name(name)
|
|
143
|
+
self._logger.debug("Cache unset: %s::%s (file: %s)", self._name, name, target_filename)
|
|
144
|
+
target_file = Path(target_filename)
|
|
145
|
+
if target_file.is_file() or target_file.is_symlink():
|
|
146
|
+
os.remove(target_filename)
|
|
147
|
+
self._logger.debug("Cache entry %s::%s removed", self._name, name)
|
|
148
|
+
else:
|
|
149
|
+
self._logger.warning("Cache entry %s::%s does not exist, nothing to remove", self._name, name)
|
|
150
|
+
|
|
151
|
+
def _cache_name(self, name: str):
|
|
152
|
+
return os.path.join(self._cache_dir, f"{name}.pkl")
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import threading
|
|
4
|
+
from typing import Optional, Any
|
|
5
|
+
import shelve
|
|
6
|
+
|
|
7
|
+
from toolchemy.utils.cacher.common import BaseCacher, DummyLock, CacheEntryDoesNotExistError
|
|
8
|
+
from toolchemy.utils.logger import get_logger
|
|
9
|
+
from toolchemy.utils.locations import get_external_caller_path
|
|
10
|
+
from toolchemy.utils.utils import _caller_module_name
|
|
11
|
+
from toolchemy.utils.datestimes import current_unix_timestamp
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CacherShelve(BaseCacher):
|
|
15
|
+
def __init__(self, name: str | None = None, cache_base_dir: Optional[str] = None, disabled: bool = False,
|
|
16
|
+
log_level: int = logging.INFO, enable_thread_safeness: bool = False):
|
|
17
|
+
super().__init__()
|
|
18
|
+
self._disabled = disabled
|
|
19
|
+
self._enable_thread_safeness = enable_thread_safeness
|
|
20
|
+
if enable_thread_safeness:
|
|
21
|
+
self._lock = threading.Lock()
|
|
22
|
+
else:
|
|
23
|
+
self._lock = DummyLock()
|
|
24
|
+
self._log_level = log_level
|
|
25
|
+
self._logger = get_logger(level=self._log_level)
|
|
26
|
+
|
|
27
|
+
self._name = name
|
|
28
|
+
if not self._name:
|
|
29
|
+
self._name = _caller_module_name()
|
|
30
|
+
|
|
31
|
+
self._cache_base_dir = cache_base_dir
|
|
32
|
+
if self._cache_base_dir is None:
|
|
33
|
+
self._cache_base_dir = get_external_caller_path()
|
|
34
|
+
|
|
35
|
+
self._cache_dir = os.path.join(self._cache_base_dir, self.CACHER_MAIN_NAME, self._name)
|
|
36
|
+
|
|
37
|
+
if self._disabled:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
self._cache_path = os.path.join(self._cache_dir, "cache")
|
|
41
|
+
|
|
42
|
+
with self._lock:
|
|
43
|
+
os.makedirs(self._cache_dir, exist_ok=True)
|
|
44
|
+
if not self._enable_thread_safeness:
|
|
45
|
+
self._open()
|
|
46
|
+
|
|
47
|
+
self._logger.debug(f"Cacher '{self._name}' initialized (cache path: '{self._cache_dir}', log_level: '{logging.getLevelName(log_level)}')")
|
|
48
|
+
self._logger.debug(f"Cacher logging DEBUG level enabled")
|
|
49
|
+
|
|
50
|
+
def _open(self):
|
|
51
|
+
self._cache = shelve.open(self._cache_path, writeback=False)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def cache_location(self) -> str:
|
|
55
|
+
return self._cache_dir
|
|
56
|
+
|
|
57
|
+
def sub_cacher(self, log_level: int | None = None, suffix: str | None = None) -> "ICacher":
|
|
58
|
+
name = _caller_module_name()
|
|
59
|
+
if suffix:
|
|
60
|
+
name += f"__{suffix}"
|
|
61
|
+
if log_level is None:
|
|
62
|
+
log_level = self._log_level
|
|
63
|
+
self._logger.debug(f"Creating sub cacher")
|
|
64
|
+
self._logger.debug(f"> base cache dir: {self._cache_dir}")
|
|
65
|
+
self._logger.debug(f"> name: {name}")
|
|
66
|
+
self._logger.debug(f"> log level: {log_level} ({logging.getLevelName(log_level)})")
|
|
67
|
+
self._logger.debug(f"> is disabled: {self._disabled})")
|
|
68
|
+
|
|
69
|
+
return CacherShelve(name=os.path.join(self._name, name).strip("/"),
|
|
70
|
+
cache_base_dir=self._cache_base_dir,
|
|
71
|
+
log_level=log_level, disabled=self._disabled)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _exists(self, name: str) -> bool:
|
|
75
|
+
if self._disabled:
|
|
76
|
+
self._logger.debug("Cacher disabled")
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
ret_val = False
|
|
80
|
+
with self._lock:
|
|
81
|
+
if self._enable_thread_safeness:
|
|
82
|
+
self._open()
|
|
83
|
+
if name in self._cache:
|
|
84
|
+
self._logger.debug("Cache entry %s::%s exists", self._cache_dir, name)
|
|
85
|
+
try:
|
|
86
|
+
existing_entry = self._cache[name]
|
|
87
|
+
except Exception as e:
|
|
88
|
+
self._logger.error(f"Cache entry failed to fetch cached entry: {e}")
|
|
89
|
+
self._logger.error(f"Existing keys: {self._cache.keys()}")
|
|
90
|
+
if self._enable_thread_safeness:
|
|
91
|
+
self._close()
|
|
92
|
+
raise e
|
|
93
|
+
self._logger.debug(f"Cache existing entry: {existing_entry}")
|
|
94
|
+
entry = self._migrate(name, existing_entry)
|
|
95
|
+
if self._cache[name]['ttl_s'] is None:
|
|
96
|
+
ret_val = True
|
|
97
|
+
else:
|
|
98
|
+
current_time = current_unix_timestamp()
|
|
99
|
+
if current_time - entry['timestamp'] < entry['ttl_s']:
|
|
100
|
+
ret_val = True
|
|
101
|
+
else:
|
|
102
|
+
del self._cache[name]
|
|
103
|
+
if self._enable_thread_safeness:
|
|
104
|
+
self._close()
|
|
105
|
+
self._logger.debug("Cache entry %s::%s does not exist", self._cache_dir, name)
|
|
106
|
+
return ret_val
|
|
107
|
+
|
|
108
|
+
def set(self, name: str, content: Any, ttl_s: int | None = None):
|
|
109
|
+
"""
|
|
110
|
+
Dumps a given object under a given cache entry name. The object must be pickleable.
|
|
111
|
+
"""
|
|
112
|
+
if self._disabled:
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
with self._lock:
|
|
116
|
+
if self._enable_thread_safeness:
|
|
117
|
+
self._open()
|
|
118
|
+
self._cache[name] = self._envelop(content, ttl_s=ttl_s)
|
|
119
|
+
if self._enable_thread_safeness:
|
|
120
|
+
self._close()
|
|
121
|
+
|
|
122
|
+
self._logger.debug("Cache set %s::%s", self._cache_dir, name)
|
|
123
|
+
|
|
124
|
+
def get(self, name: str) -> Any:
|
|
125
|
+
"""
|
|
126
|
+
Loads an object for a given cache entry name. If it doesn't exist an exception is thrown.
|
|
127
|
+
"""
|
|
128
|
+
if self._disabled:
|
|
129
|
+
raise CacheEntryDoesNotExistError(f"Caching is disabled...")
|
|
130
|
+
|
|
131
|
+
self._logger.debug("Cache get: %s::%s", self._cache_dir, name)
|
|
132
|
+
|
|
133
|
+
ret_val = None
|
|
134
|
+
|
|
135
|
+
with self._lock:
|
|
136
|
+
if self._enable_thread_safeness:
|
|
137
|
+
self._open()
|
|
138
|
+
if name in self._cache:
|
|
139
|
+
entry = self._migrate(name, self._cache[name])
|
|
140
|
+
if entry['ttl_s'] is None:
|
|
141
|
+
ret_val = entry['data']
|
|
142
|
+
else:
|
|
143
|
+
current_time = current_unix_timestamp()
|
|
144
|
+
if current_time - entry['timestamp'] < entry['ttl_s']:
|
|
145
|
+
ret_val = entry['data']
|
|
146
|
+
else:
|
|
147
|
+
del self._cache[name]
|
|
148
|
+
if self._enable_thread_safeness:
|
|
149
|
+
self._close()
|
|
150
|
+
if ret_val is not None:
|
|
151
|
+
return ret_val
|
|
152
|
+
raise CacheEntryDoesNotExistError(f"Cache does not exist: {self._cache_dir}::{name}.")
|
|
153
|
+
|
|
154
|
+
def unset(self, name: str):
|
|
155
|
+
"""
|
|
156
|
+
Removes a cache entry for a given name
|
|
157
|
+
"""
|
|
158
|
+
self._logger.debug("Cache unset: %s::%s", self._cache_dir, name)
|
|
159
|
+
if self._disabled:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
if name in self._cache:
|
|
163
|
+
with self._lock:
|
|
164
|
+
if self._enable_thread_safeness:
|
|
165
|
+
self._open()
|
|
166
|
+
del self._cache[name]
|
|
167
|
+
if self._enable_thread_safeness:
|
|
168
|
+
self._close()
|
|
169
|
+
self._logger.debug("Cache entry %s::%s removed", self._name, name)
|
|
170
|
+
else:
|
|
171
|
+
self._logger.warning("Cache entry %s::%s does not exist, nothing to remove", self._name, name)
|
|
172
|
+
|
|
173
|
+
def _close(self):
|
|
174
|
+
self._cache.close()
|
|
175
|
+
|
|
176
|
+
def persist(self):
|
|
177
|
+
if not self._enable_thread_safeness:
|
|
178
|
+
with self._lock:
|
|
179
|
+
self._close()
|
|
180
|
+
|
|
181
|
+
def _migrate(self, name: str, entry: Any) -> dict[str, Any]:
|
|
182
|
+
if not isinstance(entry, dict) or ("data" not in entry and "timestamp" not in entry and "ttl_s" not in entry):
|
|
183
|
+
self._logger.info(f"Migrating data entry to handle TTL")
|
|
184
|
+
self._logger.info(f"> entry: {entry} (type: {type(entry)})")
|
|
185
|
+
self.set(name, entry)
|
|
186
|
+
entry = self._cache[name]
|
|
187
|
+
return entry
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def testing():
|
|
191
|
+
cacher = CacherShelve()
|
|
192
|
+
print(cacher._name)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
if __name__ == "__main__":
|
|
196
|
+
testing()
|