thds.core 0.0.1__py3-none-any.whl → 1.31.20250116223856__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.core might be problematic. Click here for more details.
- thds/core/__init__.py +48 -0
- thds/core/ansi_esc.py +46 -0
- thds/core/cache.py +201 -0
- thds/core/calgitver.py +82 -0
- thds/core/concurrency.py +100 -0
- thds/core/config.py +250 -0
- thds/core/decos.py +55 -0
- thds/core/dict_utils.py +188 -0
- thds/core/env.py +40 -0
- thds/core/exit_after.py +121 -0
- thds/core/files.py +125 -0
- thds/core/fretry.py +115 -0
- thds/core/generators.py +56 -0
- thds/core/git.py +81 -0
- thds/core/hash_cache.py +86 -0
- thds/core/hashing.py +106 -0
- thds/core/home.py +15 -0
- thds/core/hostname.py +10 -0
- thds/core/imports.py +17 -0
- thds/core/inspect.py +58 -0
- thds/core/iterators.py +9 -0
- thds/core/lazy.py +83 -0
- thds/core/link.py +153 -0
- thds/core/log/__init__.py +29 -0
- thds/core/log/basic_config.py +171 -0
- thds/core/log/json_formatter.py +43 -0
- thds/core/log/kw_formatter.py +84 -0
- thds/core/log/kw_logger.py +93 -0
- thds/core/log/logfmt.py +302 -0
- thds/core/merge_args.py +168 -0
- thds/core/meta.json +8 -0
- thds/core/meta.py +518 -0
- thds/core/parallel.py +200 -0
- thds/core/pickle_visit.py +24 -0
- thds/core/prof.py +276 -0
- thds/core/progress.py +112 -0
- thds/core/protocols.py +17 -0
- thds/core/py.typed +0 -0
- thds/core/scaling.py +39 -0
- thds/core/scope.py +199 -0
- thds/core/source.py +238 -0
- thds/core/source_serde.py +104 -0
- thds/core/sqlite/__init__.py +21 -0
- thds/core/sqlite/connect.py +33 -0
- thds/core/sqlite/copy.py +35 -0
- thds/core/sqlite/ddl.py +4 -0
- thds/core/sqlite/functions.py +63 -0
- thds/core/sqlite/index.py +22 -0
- thds/core/sqlite/insert_utils.py +23 -0
- thds/core/sqlite/merge.py +84 -0
- thds/core/sqlite/meta.py +190 -0
- thds/core/sqlite/read.py +66 -0
- thds/core/sqlite/sqlmap.py +179 -0
- thds/core/sqlite/structured.py +138 -0
- thds/core/sqlite/types.py +64 -0
- thds/core/sqlite/upsert.py +139 -0
- thds/core/sqlite/write.py +99 -0
- thds/core/stack_context.py +41 -0
- thds/core/thunks.py +40 -0
- thds/core/timer.py +214 -0
- thds/core/tmp.py +85 -0
- thds/core/types.py +4 -0
- thds.core-1.31.20250116223856.dist-info/METADATA +68 -0
- thds.core-1.31.20250116223856.dist-info/RECORD +67 -0
- {thds.core-0.0.1.dist-info → thds.core-1.31.20250116223856.dist-info}/WHEEL +1 -1
- thds.core-1.31.20250116223856.dist-info/entry_points.txt +4 -0
- thds.core-1.31.20250116223856.dist-info/top_level.txt +1 -0
- thds.core-0.0.1.dist-info/METADATA +0 -8
- thds.core-0.0.1.dist-info/RECORD +0 -4
- thds.core-0.0.1.dist-info/top_level.txt +0 -1
thds/core/lazy.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""A thread-safe lazy callable."""
|
|
2
|
+
|
|
3
|
+
import typing as ty
|
|
4
|
+
from threading import Lock, local
|
|
5
|
+
|
|
6
|
+
R = ty.TypeVar("R")
|
|
7
|
+
_LOCK_LOCK = Lock() # for thread local storage, you need to create the lock on each thread.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_or_create_lock(storage) -> Lock:
|
|
11
|
+
"""Ensures a lock is available on this storage object. Holds a global lock to make
|
|
12
|
+
sure there is only ever one lock created for this storage object.
|
|
13
|
+
|
|
14
|
+
Storage can be any object that can have an attribute assigned with __setattr__.
|
|
15
|
+
"""
|
|
16
|
+
if hasattr(storage, "lock"):
|
|
17
|
+
return storage.lock
|
|
18
|
+
with _LOCK_LOCK:
|
|
19
|
+
if hasattr(storage, "lock"):
|
|
20
|
+
return storage.lock
|
|
21
|
+
# creating a lock is itself very fast, whereas the source() callable may be slow.
|
|
22
|
+
storage.lock = Lock()
|
|
23
|
+
return storage.lock
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Lazy(ty.Generic[R]):
|
|
27
|
+
"""Ensures that the zero-argument callable (thunk) is called either 0 or 1 times for
|
|
28
|
+
the lifetime of this wrapper and its internal storage.
|
|
29
|
+
|
|
30
|
+
Most commonly, this wraps a singleton defined at module scope, but it could also be
|
|
31
|
+
used for shorter-lifetime singletons.
|
|
32
|
+
|
|
33
|
+
If thread-local storage is provided, then the wrapper will be called 0 or 1 times per
|
|
34
|
+
thread.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, source: ty.Callable[[], R], storage=None):
|
|
38
|
+
self._source = source
|
|
39
|
+
self._storage = storage if storage is not None else lambda: 0
|
|
40
|
+
self._storage.lock = Lock()
|
|
41
|
+
# we store the Lock on the storage, because in some cases the storage may be
|
|
42
|
+
# thread-local, and we need a separate lock per thread. However, we also create
|
|
43
|
+
# the first lock in the constructor so that in most cases, we never need to use
|
|
44
|
+
# the global _LOCK_LOCK, which will cause some very minor contention.
|
|
45
|
+
|
|
46
|
+
def __call__(self) -> R:
|
|
47
|
+
if hasattr(self._storage, "cached"):
|
|
48
|
+
return self._storage.cached
|
|
49
|
+
with _get_or_create_lock(self._storage):
|
|
50
|
+
if hasattr(self._storage, "cached"):
|
|
51
|
+
return self._storage.cached
|
|
52
|
+
self._storage.cached = self._source()
|
|
53
|
+
return self._storage.cached
|
|
54
|
+
|
|
55
|
+
def __repr__(self) -> str:
|
|
56
|
+
return f"Lazy({self._source})"
|
|
57
|
+
|
|
58
|
+
if not ty.TYPE_CHECKING:
|
|
59
|
+
# if I don't 'guard' it this way, mypy (unhelpfully) allows all attribute access (as Any)
|
|
60
|
+
def __getattr__(self, name: str) -> ty.NoReturn:
|
|
61
|
+
raise AttributeError(
|
|
62
|
+
f"{self} has no attribute '{name}' -"
|
|
63
|
+
f" did you mean to instantiate the object before access, i.e. `().{name}`?"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class ThreadLocalLazy(Lazy[R]):
|
|
68
|
+
"""A Lazy (see docs above), but with thread-local storage."""
|
|
69
|
+
|
|
70
|
+
def __init__(self, source: ty.Callable[[], R]):
|
|
71
|
+
# local() creates a brand new instance every time it is called,
|
|
72
|
+
# so this does not cause issues with storage being shared across multiple TTLazies
|
|
73
|
+
super().__init__(source, storage=local())
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def lazy(source: ty.Callable[[], R]) -> ty.Callable[[], R]:
|
|
77
|
+
"""Wraps a thunk so that it is called at most once, and the result is cached."""
|
|
78
|
+
return Lazy(source)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def threadlocal_lazy(source: ty.Callable[[], R]) -> ty.Callable[[], R]:
|
|
82
|
+
"""Wraps a thunk so that it is called at most once per thread, and the result is cached."""
|
|
83
|
+
return ThreadLocalLazy(source)
|
thds/core/link.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Best-effort to link a destination to a source depending on file system support."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import platform
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import typing as ty
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from . import log, tmp
|
|
11
|
+
from . import types as ct
|
|
12
|
+
|
|
13
|
+
_IS_MAC = platform.system() == "Darwin"
|
|
14
|
+
logger = log.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
LinkType = ty.Literal["same", "ref", "hard", "soft", ""]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _dest_parent(dest: ct.StrOrPath) -> Path:
|
|
21
|
+
"""Returns the parent directory that exists.
|
|
22
|
+
|
|
23
|
+
If it does not exist, raises an exception.
|
|
24
|
+
"""
|
|
25
|
+
dest_parent = Path(dest).parent
|
|
26
|
+
if not dest_parent.exists():
|
|
27
|
+
raise FileNotFoundError(f"Destination directory {dest_parent} does not exist")
|
|
28
|
+
if not dest_parent.is_dir():
|
|
29
|
+
raise NotADirectoryError(f"Destination {dest_parent} is not a directory")
|
|
30
|
+
return dest_parent
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def link(
|
|
34
|
+
src: ct.StrOrPath,
|
|
35
|
+
dest: ct.StrOrPath,
|
|
36
|
+
*attempt_types: LinkType,
|
|
37
|
+
) -> LinkType:
|
|
38
|
+
"""Attempt reflink, then hardlink, then softlink.
|
|
39
|
+
|
|
40
|
+
The destination directory must already exist.
|
|
41
|
+
|
|
42
|
+
Return a non-empty string of type LinkType if a link was successful.
|
|
43
|
+
|
|
44
|
+
Return empty string if no link could be created.
|
|
45
|
+
"""
|
|
46
|
+
if not attempt_types:
|
|
47
|
+
attempt_types = ("ref", "hard", "soft")
|
|
48
|
+
src = Path(src).resolve()
|
|
49
|
+
if src == Path(dest).resolve():
|
|
50
|
+
return "same"
|
|
51
|
+
assert os.path.exists(src), f"Source {src} does not exist"
|
|
52
|
+
|
|
53
|
+
dest_parent = _dest_parent(dest)
|
|
54
|
+
with tmp.temppath_same_fs(dest_parent) as tmp_link_dest:
|
|
55
|
+
# links will _fail_ if the destination already exists.
|
|
56
|
+
# Therefore, instead of linking directly to the destination,
|
|
57
|
+
# we always create the link at a temporary file on the same filesystem
|
|
58
|
+
# as the true destination. Then, we take advantage of atomic moves
|
|
59
|
+
# within the same filesystem, because moves of links are themselves atomic!
|
|
60
|
+
# https://unix.stackexchange.com/a/81900
|
|
61
|
+
assert not tmp_link_dest.exists(), tmp_link_dest
|
|
62
|
+
if _IS_MAC and "ref" in attempt_types:
|
|
63
|
+
try:
|
|
64
|
+
subprocess.check_output(["cp", "-c", str(src), str(tmp_link_dest)])
|
|
65
|
+
os.rename(tmp_link_dest, dest)
|
|
66
|
+
logger.debug(f"Created a copy-on-write reflink from {src} to {dest}")
|
|
67
|
+
return "ref"
|
|
68
|
+
except subprocess.CalledProcessError:
|
|
69
|
+
pass
|
|
70
|
+
if "hard" in attempt_types:
|
|
71
|
+
try:
|
|
72
|
+
os.link(src, tmp_link_dest)
|
|
73
|
+
os.rename(tmp_link_dest, dest)
|
|
74
|
+
logger.debug(f"Created a hardlink from {src} to {dest}")
|
|
75
|
+
return "hard"
|
|
76
|
+
except OSError as oserr:
|
|
77
|
+
logger.warning(f"Unable to hard-link {src} to {dest} ({oserr})")
|
|
78
|
+
if "soft" in attempt_types:
|
|
79
|
+
try:
|
|
80
|
+
os.symlink(src, tmp_link_dest)
|
|
81
|
+
os.rename(tmp_link_dest, dest)
|
|
82
|
+
logger.debug(f"Created a softlink from {src} to {dest}")
|
|
83
|
+
return "soft"
|
|
84
|
+
except OSError as oserr:
|
|
85
|
+
logger.warning(f"Unable to soft-link {src} to {dest}" f" ({oserr})")
|
|
86
|
+
|
|
87
|
+
return ""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def reify_if_link(path: Path):
|
|
91
|
+
"""Turn a softlink to a target file into a copy of the target file at the link location.
|
|
92
|
+
|
|
93
|
+
Useful for cases where a symlink crossing filesystems may not work
|
|
94
|
+
as expected, e.g. a Docker build.
|
|
95
|
+
|
|
96
|
+
No-op for anything that is not a symlink to a file.
|
|
97
|
+
"""
|
|
98
|
+
if not path.is_symlink() or not path.is_file():
|
|
99
|
+
return
|
|
100
|
+
logger.info(f'Reifying softlink "{path}"')
|
|
101
|
+
dest = path.absolute()
|
|
102
|
+
src = path.resolve()
|
|
103
|
+
dest.unlink()
|
|
104
|
+
shutil.copy(src, dest)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def link_or_copy(src: ct.StrOrPath, dest: ct.StrOrPath, *link_types: LinkType) -> LinkType:
|
|
108
|
+
"""If you absolutely have to get your file to its destination, you should use this
|
|
109
|
+
over link(), which could theoretically fail under certain conditions.
|
|
110
|
+
"""
|
|
111
|
+
if link_types:
|
|
112
|
+
link_success_type = link(src, dest, *link_types)
|
|
113
|
+
if link_success_type:
|
|
114
|
+
return link_success_type
|
|
115
|
+
logger.info(f"Unable to link {src} to {dest}; falling back to copy.")
|
|
116
|
+
|
|
117
|
+
logger.debug("Copying %s to %s", src, dest)
|
|
118
|
+
with tmp.temppath_same_fs(dest) as tmpfile:
|
|
119
|
+
# atomic to the final destination since we're on the same filesystem.
|
|
120
|
+
shutil.copyfile(src, tmpfile)
|
|
121
|
+
shutil.move(str(tmpfile), dest)
|
|
122
|
+
return ""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def cheap_copy(
|
|
126
|
+
src: ct.StrOrPath,
|
|
127
|
+
dest: ct.StrOrPath,
|
|
128
|
+
*,
|
|
129
|
+
permissions: ty.Optional[int] = None,
|
|
130
|
+
) -> Path:
|
|
131
|
+
"""Make a copy of the file, but first attempt Mac COW semantics if available.
|
|
132
|
+
|
|
133
|
+
The copy will be done via a temporary file on the same filesystem as the destination,
|
|
134
|
+
so it will 'appear' atomic at the destination.
|
|
135
|
+
|
|
136
|
+
If provided, the given permissions will be applied to the destination prior to the
|
|
137
|
+
atomic move.
|
|
138
|
+
"""
|
|
139
|
+
dest_parent = _dest_parent(dest)
|
|
140
|
+
with tmp.temppath_same_fs(dest_parent) as tmp_link_dest:
|
|
141
|
+
cow_success = False
|
|
142
|
+
if _IS_MAC:
|
|
143
|
+
try:
|
|
144
|
+
subprocess.check_output(["cp", "-c", os.fspath(src), str(tmp_link_dest)])
|
|
145
|
+
cow_success = True
|
|
146
|
+
except subprocess.CalledProcessError:
|
|
147
|
+
pass
|
|
148
|
+
if not cow_success:
|
|
149
|
+
shutil.copyfile(src, tmp_link_dest)
|
|
150
|
+
if permissions is not None:
|
|
151
|
+
os.chmod(tmp_link_dest, permissions)
|
|
152
|
+
os.rename(tmp_link_dest, dest)
|
|
153
|
+
return Path(dest)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""New and improved logger for Trilliant.
|
|
2
|
+
Now you can add keyword arguments to your log statements and they will
|
|
3
|
+
get formatted nicely in the logging message. If we ever move to
|
|
4
|
+
structured/JSON logging, we can write a useful Formatter for that
|
|
5
|
+
scenario as well.
|
|
6
|
+
Additionally, you can add additional context (via keyword arguments)
|
|
7
|
+
to logs at any time by inserting a logger_context, and this context
|
|
8
|
+
will accompany all future logging statements further down the stack,
|
|
9
|
+
but not once it has been exited.
|
|
10
|
+
Observe:
|
|
11
|
+
```
|
|
12
|
+
logger = getLogger("FooF")
|
|
13
|
+
logger.warning("testing")
|
|
14
|
+
# 2022-02-18 10:01:16,825 WARNING FooF () testing 1
|
|
15
|
+
logger.info("testing 2", two=3, eight="nine")
|
|
16
|
+
# 2022-02-18 10:01:16,826 info FooF (two=3,eight=nine) testing 2
|
|
17
|
+
with logger_context(App='bat', override='me'):
|
|
18
|
+
logger.info("testing 3", yes='no')
|
|
19
|
+
# 2022-02-18 10:01:16,827 info FooF (App=bat,override=me,yes=no) testing 3
|
|
20
|
+
logger.info("testing 4", override='you')
|
|
21
|
+
# 2022-02-18 10:01:16,828 info FooF (App=bat,override=you) testing 4
|
|
22
|
+
logger.info("testing 5")
|
|
23
|
+
# 2022-02-18 10:01:16,829 info FooF () testing 5
|
|
24
|
+
```
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from .basic_config import DuplicateFilter, set_logger_to_console_level # noqa: F401
|
|
28
|
+
from .kw_formatter import ThdsCompactFormatter # noqa: F401
|
|
29
|
+
from .kw_logger import KwLogger, getLogger, logger_context, make_th_formatters_safe # noqa: F401
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""Contains the basic configuration for our logger. By importing thds.core.log, you import
|
|
2
|
+
and 'use' this configuration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import logging.config
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import typing as ty
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Iterator, Tuple
|
|
13
|
+
|
|
14
|
+
from .. import config, home
|
|
15
|
+
from .json_formatter import ThdsJsonFormatter
|
|
16
|
+
from .kw_formatter import ThdsCompactFormatter
|
|
17
|
+
from .kw_logger import getLogger, make_th_formatters_safe
|
|
18
|
+
from .logfmt import mk_default_logfmter
|
|
19
|
+
|
|
20
|
+
_LOG_FILEPATH = os.getenv(
|
|
21
|
+
"THDS_CORE_LOG_FILEPATH",
|
|
22
|
+
str(
|
|
23
|
+
# we're logging to a file by default now. Set this to empty string to turn off.
|
|
24
|
+
# It's not a config item because it can't usefully be set after startup.
|
|
25
|
+
home.HOMEDIR()
|
|
26
|
+
/ ".thds-logs"
|
|
27
|
+
/ "-".join(
|
|
28
|
+
[
|
|
29
|
+
datetime.now().isoformat(),
|
|
30
|
+
f"ppid_{os.getppid()}",
|
|
31
|
+
f"pid_{os.getpid()}",
|
|
32
|
+
f"{'_'.join(sys.argv)[:150]}.log",
|
|
33
|
+
]
|
|
34
|
+
).replace("/", "_")
|
|
35
|
+
),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_LOGLEVEL = config.item("thds.core.log.level", logging.INFO, parse=logging.getLevelName)
|
|
40
|
+
_LOGLEVELS_FILEPATH = config.item("thds.core.log.levels_file", "", parse=lambda s: s.strip())
|
|
41
|
+
# see _parse_thds_loglevels_file for format of this file.
|
|
42
|
+
|
|
43
|
+
FORMAT = config.item("thds.core.log.format", "") # valid options are 'logfmt', 'json', and ''.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _pick_formatter() -> ty.Callable[[], logging.Formatter]:
|
|
47
|
+
if FORMAT() == "logfmt":
|
|
48
|
+
return mk_default_logfmter
|
|
49
|
+
if FORMAT() == "json":
|
|
50
|
+
return ThdsJsonFormatter
|
|
51
|
+
return ThdsCompactFormatter
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# this is the base of what gets passed to logging.dictConfig.
|
|
55
|
+
_BASE_LOG_CONFIG = {
|
|
56
|
+
"version": 1,
|
|
57
|
+
"disable_existing_loggers": False,
|
|
58
|
+
"formatters": {"default": {"()": _pick_formatter()}},
|
|
59
|
+
"handlers": {"console": {"class": "logging.StreamHandler", "formatter": "default"}},
|
|
60
|
+
"root": {"handlers": ["console"], "level": _LOGLEVEL()},
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def set_logger_to_console_level(config: dict, logger_name: str, level: int) -> dict:
|
|
65
|
+
if logger_name == "*":
|
|
66
|
+
if level != _LOGLEVEL():
|
|
67
|
+
getLogger(__name__).warning(f"Setting root logger to {logging.getLevelName(level)}")
|
|
68
|
+
return dict(config, root=dict(config["root"], level=level))
|
|
69
|
+
loggers = config.get("loggers") or dict()
|
|
70
|
+
loggers = {**loggers, logger_name: {"level": level, "handlers": ["console"], "propagate": False}}
|
|
71
|
+
# propagate=False means, don't pass this up the chain to loggers
|
|
72
|
+
# matching a subset of our name. The level is set on the logger,
|
|
73
|
+
# not the handler, but if a logger is set to propagate,then it
|
|
74
|
+
# will pass its message up the chain until it hits propagate=False
|
|
75
|
+
# or the root. And any loggers with the appropriate logging level
|
|
76
|
+
# will emit to any handlers they have configured. So, generally,
|
|
77
|
+
# you want to put handlers at the same level where
|
|
78
|
+
# propagate=False, which is what we do here.
|
|
79
|
+
return dict(config, loggers=loggers)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _parse_thds_loglevels_file(filepath: str) -> Iterator[Tuple[str, int]]:
|
|
83
|
+
"""Example loglevels file:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
[debug]
|
|
87
|
+
thds.adls.download
|
|
88
|
+
thds.mops.pure.pickle_runner
|
|
89
|
+
thds.nppes.intake.parquet_from_csv
|
|
90
|
+
|
|
91
|
+
[warning]
|
|
92
|
+
*
|
|
93
|
+
# the * sets the root logger to warning-and-above. INFO is the default.
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The last value encountered for any given logger (or the root) will
|
|
97
|
+
override any previous values.
|
|
98
|
+
"""
|
|
99
|
+
current_level = _LOGLEVEL()
|
|
100
|
+
if not os.path.exists(filepath):
|
|
101
|
+
return
|
|
102
|
+
with open(filepath) as f:
|
|
103
|
+
for line in f.readlines():
|
|
104
|
+
line = line.strip()
|
|
105
|
+
if not line or line.startswith("#"):
|
|
106
|
+
continue
|
|
107
|
+
if line.startswith("[") and line.endswith("]"):
|
|
108
|
+
current_level = getattr(
|
|
109
|
+
logging, line[1:-1].upper()
|
|
110
|
+
) # AttributeError means invalid level
|
|
111
|
+
continue
|
|
112
|
+
logger_name = line
|
|
113
|
+
yield logger_name, current_level
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class DuplicateFilter:
|
|
117
|
+
"""Filters away duplicate log messages.
|
|
118
|
+
|
|
119
|
+
Taken from @erb's answer on SO: https://stackoverflow.com/questions/31953272/logging-print-message-only-once
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __init__(self, logger: ty.Union[logging.Logger, logging.LoggerAdapter]):
|
|
123
|
+
self.msgs: ty.Set[str] = set()
|
|
124
|
+
self.logger = logger.logger if isinstance(logger, logging.LoggerAdapter) else logger
|
|
125
|
+
|
|
126
|
+
def filter(self, record: logging.LogRecord):
|
|
127
|
+
msg = str(record.msg)
|
|
128
|
+
is_duplicate = msg in self.msgs
|
|
129
|
+
if not is_duplicate:
|
|
130
|
+
self.msgs.add(msg)
|
|
131
|
+
return not is_duplicate
|
|
132
|
+
|
|
133
|
+
def __enter__(self):
|
|
134
|
+
self.logger.addFilter(self)
|
|
135
|
+
|
|
136
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
137
|
+
self.logger.removeFilter(self)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
if not logging.getLogger().hasHandlers():
|
|
141
|
+
live_config = _BASE_LOG_CONFIG
|
|
142
|
+
for logger_name, level in _parse_thds_loglevels_file(_LOGLEVELS_FILEPATH()):
|
|
143
|
+
live_config = set_logger_to_console_level(live_config, logger_name, level)
|
|
144
|
+
|
|
145
|
+
if _LOG_FILEPATH:
|
|
146
|
+
log_path = Path(_LOG_FILEPATH)
|
|
147
|
+
try:
|
|
148
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
# ^ I hate doing IO in module scope, but we've waited until the last possible moment...
|
|
150
|
+
live_config["handlers"]["file"] = { # type: ignore
|
|
151
|
+
"class": "logging.FileHandler",
|
|
152
|
+
"formatter": "default",
|
|
153
|
+
"filename": _LOG_FILEPATH,
|
|
154
|
+
"delay": True, # no need to have empty logfiles sitting around
|
|
155
|
+
}
|
|
156
|
+
live_config["root"]["handlers"].append("file") # type: ignore
|
|
157
|
+
except Exception as err:
|
|
158
|
+
print(f"Unable to create log directory at '{log_path.parent}' - ERROR: {err}")
|
|
159
|
+
|
|
160
|
+
logging.config.dictConfig(live_config)
|
|
161
|
+
make_th_formatters_safe(logging.getLogger())
|
|
162
|
+
|
|
163
|
+
class StartsWithFilter(logging.Filter):
|
|
164
|
+
def __init__(self, startswith: str):
|
|
165
|
+
self.startswith = startswith
|
|
166
|
+
|
|
167
|
+
def filter(self, record):
|
|
168
|
+
return not record.name.startswith(self.startswith)
|
|
169
|
+
|
|
170
|
+
for noisy_logger in ("py4j.java_gateway", "py4j.clientserver"): # 11.3, 9.1
|
|
171
|
+
logging.getLogger(noisy_logger).addFilter(StartsWithFilter(noisy_logger))
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""A JSON formatter that understands what to do with our keyword logger things."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from .kw_logger import th_keyvals_from_record
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ThdsJsonFormatter(logging.Formatter):
|
|
10
|
+
def _format_exception_and_trace(self, record: logging.LogRecord):
|
|
11
|
+
# without the following boilerplate, we would not see exceptions or stack traces
|
|
12
|
+
# get formatted as part of the log output at all.
|
|
13
|
+
formatted = ""
|
|
14
|
+
if record.exc_info:
|
|
15
|
+
if not record.exc_text:
|
|
16
|
+
record.exc_text = self.formatException(record.exc_info)
|
|
17
|
+
if record.exc_text:
|
|
18
|
+
formatted += "\n" + record.exc_text
|
|
19
|
+
if record.stack_info:
|
|
20
|
+
formatted += "\n" + self.formatStack(record.stack_info)
|
|
21
|
+
return formatted
|
|
22
|
+
|
|
23
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
24
|
+
"""Format the record as a JSON string."""
|
|
25
|
+
# We're going to use a dictionary to hold the record data, and then convert it to JSON.
|
|
26
|
+
# This is because we want to be able to add arbitrary key-value pairs to the log record
|
|
27
|
+
# and have them show up in the JSON output.
|
|
28
|
+
record_dict = {
|
|
29
|
+
"timestamp": self.formatTime(record),
|
|
30
|
+
"level": record.levelname,
|
|
31
|
+
"module": record.module,
|
|
32
|
+
"msg": record.getMessage(),
|
|
33
|
+
}
|
|
34
|
+
# Add the extra data, if it exists.
|
|
35
|
+
if record.__dict__.get("extra"):
|
|
36
|
+
record_dict.update(record.__dict__["extra"])
|
|
37
|
+
record_dict.update(th_keyvals_from_record(record) or {})
|
|
38
|
+
|
|
39
|
+
# Convert the dictionary to a JSON string.
|
|
40
|
+
formatted = json.dumps(record_dict)
|
|
41
|
+
if exc_text := self._format_exception_and_trace(record):
|
|
42
|
+
formatted += exc_text
|
|
43
|
+
return formatted
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""This is the 'standard' keyword-formatting logger formatter for our logs.
|
|
2
|
+
|
|
3
|
+
It is enabled by default via basic_config.py, but is not required.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import typing as ty
|
|
8
|
+
|
|
9
|
+
from .. import ansi_esc, config
|
|
10
|
+
from .kw_logger import th_keyvals_from_record
|
|
11
|
+
|
|
12
|
+
MAX_MODULE_NAME_LEN = config.item("max_module_name_len", 40, parse=int)
|
|
13
|
+
_MODULE_NAME_FMT_STR = "{compressed_name:" + str(MAX_MODULE_NAME_LEN()) + "}"
|
|
14
|
+
|
|
15
|
+
_COLOR_LEVEL_MAP = {
|
|
16
|
+
"low": f"{ansi_esc.fg.BLUE}{{}}{ansi_esc.fg.RESET}",
|
|
17
|
+
"info": f"{ansi_esc.fg.GREEN}{{}}{ansi_esc.fg.RESET}",
|
|
18
|
+
"warning": (
|
|
19
|
+
f"{ansi_esc.fg.YELLOW}{ansi_esc.style.BRIGHT}" "{}" f"{ansi_esc.style.NORMAL}{ansi_esc.fg.RESET}"
|
|
20
|
+
),
|
|
21
|
+
"error": (
|
|
22
|
+
f"{ansi_esc.bg.ERROR_RED}{ansi_esc.style.BRIGHT}"
|
|
23
|
+
"{}"
|
|
24
|
+
f"{ansi_esc.style.NORMAL}{ansi_esc.bg.RESET}"
|
|
25
|
+
),
|
|
26
|
+
"critical": (
|
|
27
|
+
f"{ansi_esc.bg.MAGENTA}{ansi_esc.style.BRIGHT}{ansi_esc.style.BLINK}" # 😂
|
|
28
|
+
"{}"
|
|
29
|
+
f"{ansi_esc.bg.RESET}{ansi_esc.style.NORMAL}{ansi_esc.style.NO_BLINK}"
|
|
30
|
+
),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def log_level_color(levelno: int, base_levelname: str) -> str:
|
|
35
|
+
if levelno < logging.INFO:
|
|
36
|
+
return _COLOR_LEVEL_MAP["low"].format(base_levelname.lower())
|
|
37
|
+
elif levelno < logging.WARNING:
|
|
38
|
+
return _COLOR_LEVEL_MAP["info"].format(base_levelname.lower())
|
|
39
|
+
elif levelno < logging.ERROR:
|
|
40
|
+
return _COLOR_LEVEL_MAP["warning"].format(base_levelname)
|
|
41
|
+
elif levelno < logging.CRITICAL:
|
|
42
|
+
return _COLOR_LEVEL_MAP["error"].format(base_levelname)
|
|
43
|
+
return _COLOR_LEVEL_MAP["critical"].format(base_levelname)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ThdsCompactFormatter(logging.Formatter):
|
|
47
|
+
"""This new formatter is more compact than what we had before, and hopefully makes logs a bit more readable overall."""
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def format_module_name(name: str) -> str:
|
|
51
|
+
max_module_name_len = MAX_MODULE_NAME_LEN()
|
|
52
|
+
compressed_name = (
|
|
53
|
+
name
|
|
54
|
+
if len(name) <= max_module_name_len
|
|
55
|
+
else name[: max_module_name_len // 2 - 2] + "..." + name[-max_module_name_len // 2 + 1 :]
|
|
56
|
+
)
|
|
57
|
+
assert len(compressed_name) <= max_module_name_len
|
|
58
|
+
return _MODULE_NAME_FMT_STR.format(compressed_name=compressed_name)
|
|
59
|
+
|
|
60
|
+
def _format_exception_and_trace(self, record: logging.LogRecord):
|
|
61
|
+
# without the following boilerplate, we would not see exceptions or stack traces
|
|
62
|
+
# get formatted as part of the log output at all.
|
|
63
|
+
formatted = ""
|
|
64
|
+
if record.exc_info:
|
|
65
|
+
if not record.exc_text:
|
|
66
|
+
record.exc_text = self.formatException(record.exc_info)
|
|
67
|
+
if record.exc_text:
|
|
68
|
+
formatted += "\n" + record.exc_text
|
|
69
|
+
if record.stack_info:
|
|
70
|
+
formatted += "\n" + self.formatStack(record.stack_info)
|
|
71
|
+
return formatted
|
|
72
|
+
|
|
73
|
+
def format(self, record: logging.LogRecord):
|
|
74
|
+
record.message = record.getMessage()
|
|
75
|
+
|
|
76
|
+
base_levelname = f"{record.levelname:7}" # the length of the string 'WARNING'
|
|
77
|
+
levelname = log_level_color(record.levelno, base_levelname)
|
|
78
|
+
|
|
79
|
+
th_ctx: ty.Any = th_keyvals_from_record(record) or tuple()
|
|
80
|
+
short_name = self.format_module_name(record.name)
|
|
81
|
+
formatted = f"{self.formatTime(record)} {levelname} {short_name} {th_ctx} {record.message}"
|
|
82
|
+
if exc_text := self._format_exception_and_trace(record):
|
|
83
|
+
formatted += exc_text
|
|
84
|
+
return formatted
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""A logger which allows passing of arbitrary keyword arguments to the end of a logger call,
|
|
2
|
+
such that that context gets embedded directly into the output in one way or another.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import logging
|
|
7
|
+
import logging.config
|
|
8
|
+
from copy import copy
|
|
9
|
+
from typing import Any, Dict, MutableMapping, Optional
|
|
10
|
+
|
|
11
|
+
from ..stack_context import StackContext
|
|
12
|
+
|
|
13
|
+
_LOGGING_KWARGS = ("exc_info", "stack_info", "stacklevel", "extra")
|
|
14
|
+
# These are the officially accepted keyword-arguments for a call to
|
|
15
|
+
# log something with the logger. Anything passed with these names
|
|
16
|
+
# should be passed through directly - anything else can be passed through
|
|
17
|
+
# to the keyword formatter.
|
|
18
|
+
|
|
19
|
+
TH_REC_CTXT = "th_context"
|
|
20
|
+
# this names a nested dict on some LogRecords that contains things we
|
|
21
|
+
# want to log. It is usable as a field specifier in log format strings
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class _THContext(Dict[str, Any]):
|
|
25
|
+
def __str__(self):
|
|
26
|
+
return ",".join(map("(%s=%s)".__mod__, self.items())) if self else "()"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_LOG_CONTEXT: StackContext[_THContext] = StackContext("TH_LOG_CONTEXT", _THContext())
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@contextlib.contextmanager
|
|
33
|
+
def logger_context(**kwargs):
|
|
34
|
+
"""Put some key-value pairs into the keyword-based logger context."""
|
|
35
|
+
with _LOG_CONTEXT.set(_THContext(_LOG_CONTEXT(), **kwargs)):
|
|
36
|
+
yield
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _embed_th_context_in_extra_kw(kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
|
40
|
+
"""Extracts the key-value pairs embedded via `logger_context, overlays those with
|
|
41
|
+
keyword arguments to the logger, and embeds them all in the logger's "extra" dictionary.
|
|
42
|
+
"""
|
|
43
|
+
th_context = _LOG_CONTEXT()
|
|
44
|
+
th_kwargs = [k for k in kwargs if k not in _LOGGING_KWARGS]
|
|
45
|
+
if th_kwargs:
|
|
46
|
+
th_context = copy(th_context)
|
|
47
|
+
th_context.update((k, kwargs.pop(k)) for k in th_kwargs)
|
|
48
|
+
extra = kwargs["extra"] = kwargs.get("extra", dict())
|
|
49
|
+
extra[TH_REC_CTXT] = th_context
|
|
50
|
+
return kwargs
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class KwLogger(logging.LoggerAdapter):
|
|
54
|
+
"""Allows logging of extra keyword arguments straight through without
|
|
55
|
+
needing an "extras" dictionary.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def process(self, msg, kwargs):
|
|
59
|
+
return msg, _embed_th_context_in_extra_kw(kwargs)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def th_keyvals_from_record(record: logging.LogRecord) -> Optional[Dict[str, Any]]:
|
|
63
|
+
"""Extracts the key-value pairs embedded via `logger_context` or keyword arguments from a LogRecord."""
|
|
64
|
+
return getattr(record, TH_REC_CTXT, None)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def getLogger(name: Optional[str] = None) -> logging.LoggerAdapter:
|
|
68
|
+
"""Using this Logger Adapter will allow you to pass key/value context at the end of
|
|
69
|
+
your logging statements, e.g. `logger.info("my message", key1=value1, key2=value2)`.
|
|
70
|
+
Provided that you haven't configured your own logging format, this module will do so for you,
|
|
71
|
+
ensuring that these contextual key-value pairs render in your log messages. To ensure their presence
|
|
72
|
+
when configuring logging yourself, just put a "%(th_context)s" format specifier somewhere in your
|
|
73
|
+
log message format.
|
|
74
|
+
"""
|
|
75
|
+
return KwLogger(logging.getLogger(name), dict())
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def make_th_formatters_safe(logger: logging.Logger):
|
|
79
|
+
"""Non-adapted loggers may still run into our root format string,
|
|
80
|
+
which expects _TH_REC_CTXT to be present on every LogRecord.
|
|
81
|
+
This will patch one in to any logs making it to our configured formatter.
|
|
82
|
+
"""
|
|
83
|
+
for handler in logger.handlers:
|
|
84
|
+
formatter = handler.formatter
|
|
85
|
+
if formatter and hasattr(formatter, "_style") and TH_REC_CTXT in formatter._style._fmt:
|
|
86
|
+
fmt_msg = formatter.formatMessage
|
|
87
|
+
|
|
88
|
+
def wrapper_formatMessage(record: logging.LogRecord):
|
|
89
|
+
if None is getattr(record, TH_REC_CTXT, None):
|
|
90
|
+
setattr(record, TH_REC_CTXT, _LOG_CONTEXT())
|
|
91
|
+
return fmt_msg(record) # noqa: B023
|
|
92
|
+
|
|
93
|
+
setattr(formatter, "formatMessage", wrapper_formatMessage) # noqa: B010
|