thds.core 0.0.1__py3-none-any.whl → 1.31.20250116223856__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.core might be problematic. Click here for more details.

Files changed (70) hide show
  1. thds/core/__init__.py +48 -0
  2. thds/core/ansi_esc.py +46 -0
  3. thds/core/cache.py +201 -0
  4. thds/core/calgitver.py +82 -0
  5. thds/core/concurrency.py +100 -0
  6. thds/core/config.py +250 -0
  7. thds/core/decos.py +55 -0
  8. thds/core/dict_utils.py +188 -0
  9. thds/core/env.py +40 -0
  10. thds/core/exit_after.py +121 -0
  11. thds/core/files.py +125 -0
  12. thds/core/fretry.py +115 -0
  13. thds/core/generators.py +56 -0
  14. thds/core/git.py +81 -0
  15. thds/core/hash_cache.py +86 -0
  16. thds/core/hashing.py +106 -0
  17. thds/core/home.py +15 -0
  18. thds/core/hostname.py +10 -0
  19. thds/core/imports.py +17 -0
  20. thds/core/inspect.py +58 -0
  21. thds/core/iterators.py +9 -0
  22. thds/core/lazy.py +83 -0
  23. thds/core/link.py +153 -0
  24. thds/core/log/__init__.py +29 -0
  25. thds/core/log/basic_config.py +171 -0
  26. thds/core/log/json_formatter.py +43 -0
  27. thds/core/log/kw_formatter.py +84 -0
  28. thds/core/log/kw_logger.py +93 -0
  29. thds/core/log/logfmt.py +302 -0
  30. thds/core/merge_args.py +168 -0
  31. thds/core/meta.json +8 -0
  32. thds/core/meta.py +518 -0
  33. thds/core/parallel.py +200 -0
  34. thds/core/pickle_visit.py +24 -0
  35. thds/core/prof.py +276 -0
  36. thds/core/progress.py +112 -0
  37. thds/core/protocols.py +17 -0
  38. thds/core/py.typed +0 -0
  39. thds/core/scaling.py +39 -0
  40. thds/core/scope.py +199 -0
  41. thds/core/source.py +238 -0
  42. thds/core/source_serde.py +104 -0
  43. thds/core/sqlite/__init__.py +21 -0
  44. thds/core/sqlite/connect.py +33 -0
  45. thds/core/sqlite/copy.py +35 -0
  46. thds/core/sqlite/ddl.py +4 -0
  47. thds/core/sqlite/functions.py +63 -0
  48. thds/core/sqlite/index.py +22 -0
  49. thds/core/sqlite/insert_utils.py +23 -0
  50. thds/core/sqlite/merge.py +84 -0
  51. thds/core/sqlite/meta.py +190 -0
  52. thds/core/sqlite/read.py +66 -0
  53. thds/core/sqlite/sqlmap.py +179 -0
  54. thds/core/sqlite/structured.py +138 -0
  55. thds/core/sqlite/types.py +64 -0
  56. thds/core/sqlite/upsert.py +139 -0
  57. thds/core/sqlite/write.py +99 -0
  58. thds/core/stack_context.py +41 -0
  59. thds/core/thunks.py +40 -0
  60. thds/core/timer.py +214 -0
  61. thds/core/tmp.py +85 -0
  62. thds/core/types.py +4 -0
  63. thds.core-1.31.20250116223856.dist-info/METADATA +68 -0
  64. thds.core-1.31.20250116223856.dist-info/RECORD +67 -0
  65. {thds.core-0.0.1.dist-info → thds.core-1.31.20250116223856.dist-info}/WHEEL +1 -1
  66. thds.core-1.31.20250116223856.dist-info/entry_points.txt +4 -0
  67. thds.core-1.31.20250116223856.dist-info/top_level.txt +1 -0
  68. thds.core-0.0.1.dist-info/METADATA +0 -8
  69. thds.core-0.0.1.dist-info/RECORD +0 -4
  70. thds.core-0.0.1.dist-info/top_level.txt +0 -1
thds/core/lazy.py ADDED
@@ -0,0 +1,83 @@
1
+ """A thread-safe lazy callable."""
2
+
3
+ import typing as ty
4
+ from threading import Lock, local
5
+
6
+ R = ty.TypeVar("R")
7
+ _LOCK_LOCK = Lock() # for thread local storage, you need to create the lock on each thread.
8
+
9
+
10
+ def _get_or_create_lock(storage) -> Lock:
11
+ """Ensures a lock is available on this storage object. Holds a global lock to make
12
+ sure there is only ever one lock created for this storage object.
13
+
14
+ Storage can be any object that can have an attribute assigned with __setattr__.
15
+ """
16
+ if hasattr(storage, "lock"):
17
+ return storage.lock
18
+ with _LOCK_LOCK:
19
+ if hasattr(storage, "lock"):
20
+ return storage.lock
21
+ # creating a lock is itself very fast, whereas the source() callable may be slow.
22
+ storage.lock = Lock()
23
+ return storage.lock
24
+
25
+
26
+ class Lazy(ty.Generic[R]):
27
+ """Ensures that the zero-argument callable (thunk) is called either 0 or 1 times for
28
+ the lifetime of this wrapper and its internal storage.
29
+
30
+ Most commonly, this wraps a singleton defined at module scope, but it could also be
31
+ used for shorter-lifetime singletons.
32
+
33
+ If thread-local storage is provided, then the wrapper will be called 0 or 1 times per
34
+ thread.
35
+ """
36
+
37
+ def __init__(self, source: ty.Callable[[], R], storage=None):
38
+ self._source = source
39
+ self._storage = storage if storage is not None else lambda: 0
40
+ self._storage.lock = Lock()
41
+ # we store the Lock on the storage, because in some cases the storage may be
42
+ # thread-local, and we need a separate lock per thread. However, we also create
43
+ # the first lock in the constructor so that in most cases, we never need to use
44
+ # the global _LOCK_LOCK, which will cause some very minor contention.
45
+
46
+ def __call__(self) -> R:
47
+ if hasattr(self._storage, "cached"):
48
+ return self._storage.cached
49
+ with _get_or_create_lock(self._storage):
50
+ if hasattr(self._storage, "cached"):
51
+ return self._storage.cached
52
+ self._storage.cached = self._source()
53
+ return self._storage.cached
54
+
55
+ def __repr__(self) -> str:
56
+ return f"Lazy({self._source})"
57
+
58
+ if not ty.TYPE_CHECKING:
59
+ # if I don't 'guard' it this way, mypy (unhelpfully) allows all attribute access (as Any)
60
+ def __getattr__(self, name: str) -> ty.NoReturn:
61
+ raise AttributeError(
62
+ f"{self} has no attribute '{name}' -"
63
+ f" did you mean to instantiate the object before access, i.e. `().{name}`?"
64
+ )
65
+
66
+
67
+ class ThreadLocalLazy(Lazy[R]):
68
+ """A Lazy (see docs above), but with thread-local storage."""
69
+
70
+ def __init__(self, source: ty.Callable[[], R]):
71
+ # local() creates a brand new instance every time it is called,
72
+ # so this does not cause issues with storage being shared across multiple TTLazies
73
+ super().__init__(source, storage=local())
74
+
75
+
76
+ def lazy(source: ty.Callable[[], R]) -> ty.Callable[[], R]:
77
+ """Wraps a thunk so that it is called at most once, and the result is cached."""
78
+ return Lazy(source)
79
+
80
+
81
+ def threadlocal_lazy(source: ty.Callable[[], R]) -> ty.Callable[[], R]:
82
+ """Wraps a thunk so that it is called at most once per thread, and the result is cached."""
83
+ return ThreadLocalLazy(source)
thds/core/link.py ADDED
@@ -0,0 +1,153 @@
1
+ """Best-effort to link a destination to a source depending on file system support."""
2
+
3
+ import os
4
+ import platform
5
+ import shutil
6
+ import subprocess
7
+ import typing as ty
8
+ from pathlib import Path
9
+
10
+ from . import log, tmp
11
+ from . import types as ct
12
+
13
+ _IS_MAC = platform.system() == "Darwin"
14
+ logger = log.getLogger(__name__)
15
+
16
+
17
+ LinkType = ty.Literal["same", "ref", "hard", "soft", ""]
18
+
19
+
20
+ def _dest_parent(dest: ct.StrOrPath) -> Path:
21
+ """Returns the parent directory that exists.
22
+
23
+ If it does not exist, raises an exception.
24
+ """
25
+ dest_parent = Path(dest).parent
26
+ if not dest_parent.exists():
27
+ raise FileNotFoundError(f"Destination directory {dest_parent} does not exist")
28
+ if not dest_parent.is_dir():
29
+ raise NotADirectoryError(f"Destination {dest_parent} is not a directory")
30
+ return dest_parent
31
+
32
+
33
+ def link(
34
+ src: ct.StrOrPath,
35
+ dest: ct.StrOrPath,
36
+ *attempt_types: LinkType,
37
+ ) -> LinkType:
38
+ """Attempt reflink, then hardlink, then softlink.
39
+
40
+ The destination directory must already exist.
41
+
42
+ Return a non-empty string of type LinkType if a link was successful.
43
+
44
+ Return empty string if no link could be created.
45
+ """
46
+ if not attempt_types:
47
+ attempt_types = ("ref", "hard", "soft")
48
+ src = Path(src).resolve()
49
+ if src == Path(dest).resolve():
50
+ return "same"
51
+ assert os.path.exists(src), f"Source {src} does not exist"
52
+
53
+ dest_parent = _dest_parent(dest)
54
+ with tmp.temppath_same_fs(dest_parent) as tmp_link_dest:
55
+ # links will _fail_ if the destination already exists.
56
+ # Therefore, instead of linking directly to the destination,
57
+ # we always create the link at a temporary file on the same filesystem
58
+ # as the true destination. Then, we take advantage of atomic moves
59
+ # within the same filesystem, because moves of links are themselves atomic!
60
+ # https://unix.stackexchange.com/a/81900
61
+ assert not tmp_link_dest.exists(), tmp_link_dest
62
+ if _IS_MAC and "ref" in attempt_types:
63
+ try:
64
+ subprocess.check_output(["cp", "-c", str(src), str(tmp_link_dest)])
65
+ os.rename(tmp_link_dest, dest)
66
+ logger.debug(f"Created a copy-on-write reflink from {src} to {dest}")
67
+ return "ref"
68
+ except subprocess.CalledProcessError:
69
+ pass
70
+ if "hard" in attempt_types:
71
+ try:
72
+ os.link(src, tmp_link_dest)
73
+ os.rename(tmp_link_dest, dest)
74
+ logger.debug(f"Created a hardlink from {src} to {dest}")
75
+ return "hard"
76
+ except OSError as oserr:
77
+ logger.warning(f"Unable to hard-link {src} to {dest} ({oserr})")
78
+ if "soft" in attempt_types:
79
+ try:
80
+ os.symlink(src, tmp_link_dest)
81
+ os.rename(tmp_link_dest, dest)
82
+ logger.debug(f"Created a softlink from {src} to {dest}")
83
+ return "soft"
84
+ except OSError as oserr:
85
+ logger.warning(f"Unable to soft-link {src} to {dest}" f" ({oserr})")
86
+
87
+ return ""
88
+
89
+
90
+ def reify_if_link(path: Path):
91
+ """Turn a softlink to a target file into a copy of the target file at the link location.
92
+
93
+ Useful for cases where a symlink crossing filesystems may not work
94
+ as expected, e.g. a Docker build.
95
+
96
+ No-op for anything that is not a symlink to a file.
97
+ """
98
+ if not path.is_symlink() or not path.is_file():
99
+ return
100
+ logger.info(f'Reifying softlink "{path}"')
101
+ dest = path.absolute()
102
+ src = path.resolve()
103
+ dest.unlink()
104
+ shutil.copy(src, dest)
105
+
106
+
107
+ def link_or_copy(src: ct.StrOrPath, dest: ct.StrOrPath, *link_types: LinkType) -> LinkType:
108
+ """If you absolutely have to get your file to its destination, you should use this
109
+ over link(), which could theoretically fail under certain conditions.
110
+ """
111
+ if link_types:
112
+ link_success_type = link(src, dest, *link_types)
113
+ if link_success_type:
114
+ return link_success_type
115
+ logger.info(f"Unable to link {src} to {dest}; falling back to copy.")
116
+
117
+ logger.debug("Copying %s to %s", src, dest)
118
+ with tmp.temppath_same_fs(dest) as tmpfile:
119
+ # atomic to the final destination since we're on the same filesystem.
120
+ shutil.copyfile(src, tmpfile)
121
+ shutil.move(str(tmpfile), dest)
122
+ return ""
123
+
124
+
125
+ def cheap_copy(
126
+ src: ct.StrOrPath,
127
+ dest: ct.StrOrPath,
128
+ *,
129
+ permissions: ty.Optional[int] = None,
130
+ ) -> Path:
131
+ """Make a copy of the file, but first attempt Mac COW semantics if available.
132
+
133
+ The copy will be done via a temporary file on the same filesystem as the destination,
134
+ so it will 'appear' atomic at the destination.
135
+
136
+ If provided, the given permissions will be applied to the destination prior to the
137
+ atomic move.
138
+ """
139
+ dest_parent = _dest_parent(dest)
140
+ with tmp.temppath_same_fs(dest_parent) as tmp_link_dest:
141
+ cow_success = False
142
+ if _IS_MAC:
143
+ try:
144
+ subprocess.check_output(["cp", "-c", os.fspath(src), str(tmp_link_dest)])
145
+ cow_success = True
146
+ except subprocess.CalledProcessError:
147
+ pass
148
+ if not cow_success:
149
+ shutil.copyfile(src, tmp_link_dest)
150
+ if permissions is not None:
151
+ os.chmod(tmp_link_dest, permissions)
152
+ os.rename(tmp_link_dest, dest)
153
+ return Path(dest)
@@ -0,0 +1,29 @@
1
+ """New and improved logger for Trilliant.
2
+ Now you can add keyword arguments to your log statements and they will
3
+ get formatted nicely in the logging message. If we ever move to
4
+ structured/JSON logging, we can write a useful Formatter for that
5
+ scenario as well.
6
+ Additionally, you can add additional context (via keyword arguments)
7
+ to logs at any time by inserting a logger_context, and this context
8
+ will accompany all future logging statements further down the stack,
9
+ but not once it has been exited.
10
+ Observe:
11
+ ```
12
+ logger = getLogger("FooF")
13
+ logger.warning("testing")
14
+ # 2022-02-18 10:01:16,825 WARNING FooF () testing 1
15
+ logger.info("testing 2", two=3, eight="nine")
16
+ # 2022-02-18 10:01:16,826 info FooF (two=3,eight=nine) testing 2
17
+ with logger_context(App='bat', override='me'):
18
+ logger.info("testing 3", yes='no')
19
+ # 2022-02-18 10:01:16,827 info FooF (App=bat,override=me,yes=no) testing 3
20
+ logger.info("testing 4", override='you')
21
+ # 2022-02-18 10:01:16,828 info FooF (App=bat,override=you) testing 4
22
+ logger.info("testing 5")
23
+ # 2022-02-18 10:01:16,829 info FooF () testing 5
24
+ ```
25
+ """
26
+
27
+ from .basic_config import DuplicateFilter, set_logger_to_console_level # noqa: F401
28
+ from .kw_formatter import ThdsCompactFormatter # noqa: F401
29
+ from .kw_logger import KwLogger, getLogger, logger_context, make_th_formatters_safe # noqa: F401
@@ -0,0 +1,171 @@
1
+ """Contains the basic configuration for our logger. By importing thds.core.log, you import
2
+ and 'use' this configuration.
3
+ """
4
+
5
+ import logging
6
+ import logging.config
7
+ import os
8
+ import sys
9
+ import typing as ty
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Iterator, Tuple
13
+
14
+ from .. import config, home
15
+ from .json_formatter import ThdsJsonFormatter
16
+ from .kw_formatter import ThdsCompactFormatter
17
+ from .kw_logger import getLogger, make_th_formatters_safe
18
+ from .logfmt import mk_default_logfmter
19
+
20
+ _LOG_FILEPATH = os.getenv(
21
+ "THDS_CORE_LOG_FILEPATH",
22
+ str(
23
+ # we're logging to a file by default now. Set this to empty string to turn off.
24
+ # It's not a config item because it can't usefully be set after startup.
25
+ home.HOMEDIR()
26
+ / ".thds-logs"
27
+ / "-".join(
28
+ [
29
+ datetime.now().isoformat(),
30
+ f"ppid_{os.getppid()}",
31
+ f"pid_{os.getpid()}",
32
+ f"{'_'.join(sys.argv)[:150]}.log",
33
+ ]
34
+ ).replace("/", "_")
35
+ ),
36
+ )
37
+
38
+
39
+ _LOGLEVEL = config.item("thds.core.log.level", logging.INFO, parse=logging.getLevelName)
40
+ _LOGLEVELS_FILEPATH = config.item("thds.core.log.levels_file", "", parse=lambda s: s.strip())
41
+ # see _parse_thds_loglevels_file for format of this file.
42
+
43
+ FORMAT = config.item("thds.core.log.format", "") # valid options are 'logfmt', 'json', and ''.
44
+
45
+
46
+ def _pick_formatter() -> ty.Callable[[], logging.Formatter]:
47
+ if FORMAT() == "logfmt":
48
+ return mk_default_logfmter
49
+ if FORMAT() == "json":
50
+ return ThdsJsonFormatter
51
+ return ThdsCompactFormatter
52
+
53
+
54
+ # this is the base of what gets passed to logging.dictConfig.
55
+ _BASE_LOG_CONFIG = {
56
+ "version": 1,
57
+ "disable_existing_loggers": False,
58
+ "formatters": {"default": {"()": _pick_formatter()}},
59
+ "handlers": {"console": {"class": "logging.StreamHandler", "formatter": "default"}},
60
+ "root": {"handlers": ["console"], "level": _LOGLEVEL()},
61
+ }
62
+
63
+
64
+ def set_logger_to_console_level(config: dict, logger_name: str, level: int) -> dict:
65
+ if logger_name == "*":
66
+ if level != _LOGLEVEL():
67
+ getLogger(__name__).warning(f"Setting root logger to {logging.getLevelName(level)}")
68
+ return dict(config, root=dict(config["root"], level=level))
69
+ loggers = config.get("loggers") or dict()
70
+ loggers = {**loggers, logger_name: {"level": level, "handlers": ["console"], "propagate": False}}
71
+ # propagate=False means, don't pass this up the chain to loggers
72
+ # matching a subset of our name. The level is set on the logger,
73
+ # not the handler, but if a logger is set to propagate,then it
74
+ # will pass its message up the chain until it hits propagate=False
75
+ # or the root. And any loggers with the appropriate logging level
76
+ # will emit to any handlers they have configured. So, generally,
77
+ # you want to put handlers at the same level where
78
+ # propagate=False, which is what we do here.
79
+ return dict(config, loggers=loggers)
80
+
81
+
82
+ def _parse_thds_loglevels_file(filepath: str) -> Iterator[Tuple[str, int]]:
83
+ """Example loglevels file:
84
+
85
+ ```
86
+ [debug]
87
+ thds.adls.download
88
+ thds.mops.pure.pickle_runner
89
+ thds.nppes.intake.parquet_from_csv
90
+
91
+ [warning]
92
+ *
93
+ # the * sets the root logger to warning-and-above. INFO is the default.
94
+ ```
95
+
96
+ The last value encountered for any given logger (or the root) will
97
+ override any previous values.
98
+ """
99
+ current_level = _LOGLEVEL()
100
+ if not os.path.exists(filepath):
101
+ return
102
+ with open(filepath) as f:
103
+ for line in f.readlines():
104
+ line = line.strip()
105
+ if not line or line.startswith("#"):
106
+ continue
107
+ if line.startswith("[") and line.endswith("]"):
108
+ current_level = getattr(
109
+ logging, line[1:-1].upper()
110
+ ) # AttributeError means invalid level
111
+ continue
112
+ logger_name = line
113
+ yield logger_name, current_level
114
+
115
+
116
+ class DuplicateFilter:
117
+ """Filters away duplicate log messages.
118
+
119
+ Taken from @erb's answer on SO: https://stackoverflow.com/questions/31953272/logging-print-message-only-once
120
+ """
121
+
122
+ def __init__(self, logger: ty.Union[logging.Logger, logging.LoggerAdapter]):
123
+ self.msgs: ty.Set[str] = set()
124
+ self.logger = logger.logger if isinstance(logger, logging.LoggerAdapter) else logger
125
+
126
+ def filter(self, record: logging.LogRecord):
127
+ msg = str(record.msg)
128
+ is_duplicate = msg in self.msgs
129
+ if not is_duplicate:
130
+ self.msgs.add(msg)
131
+ return not is_duplicate
132
+
133
+ def __enter__(self):
134
+ self.logger.addFilter(self)
135
+
136
+ def __exit__(self, exc_type, exc_val, exc_tb):
137
+ self.logger.removeFilter(self)
138
+
139
+
140
+ if not logging.getLogger().hasHandlers():
141
+ live_config = _BASE_LOG_CONFIG
142
+ for logger_name, level in _parse_thds_loglevels_file(_LOGLEVELS_FILEPATH()):
143
+ live_config = set_logger_to_console_level(live_config, logger_name, level)
144
+
145
+ if _LOG_FILEPATH:
146
+ log_path = Path(_LOG_FILEPATH)
147
+ try:
148
+ log_path.parent.mkdir(parents=True, exist_ok=True)
149
+ # ^ I hate doing IO in module scope, but we've waited until the last possible moment...
150
+ live_config["handlers"]["file"] = { # type: ignore
151
+ "class": "logging.FileHandler",
152
+ "formatter": "default",
153
+ "filename": _LOG_FILEPATH,
154
+ "delay": True, # no need to have empty logfiles sitting around
155
+ }
156
+ live_config["root"]["handlers"].append("file") # type: ignore
157
+ except Exception as err:
158
+ print(f"Unable to create log directory at '{log_path.parent}' - ERROR: {err}")
159
+
160
+ logging.config.dictConfig(live_config)
161
+ make_th_formatters_safe(logging.getLogger())
162
+
163
+ class StartsWithFilter(logging.Filter):
164
+ def __init__(self, startswith: str):
165
+ self.startswith = startswith
166
+
167
+ def filter(self, record):
168
+ return not record.name.startswith(self.startswith)
169
+
170
+ for noisy_logger in ("py4j.java_gateway", "py4j.clientserver"): # 11.3, 9.1
171
+ logging.getLogger(noisy_logger).addFilter(StartsWithFilter(noisy_logger))
@@ -0,0 +1,43 @@
1
+ """A JSON formatter that understands what to do with our keyword logger things."""
2
+
3
+ import json
4
+ import logging
5
+
6
+ from .kw_logger import th_keyvals_from_record
7
+
8
+
9
+ class ThdsJsonFormatter(logging.Formatter):
10
+ def _format_exception_and_trace(self, record: logging.LogRecord):
11
+ # without the following boilerplate, we would not see exceptions or stack traces
12
+ # get formatted as part of the log output at all.
13
+ formatted = ""
14
+ if record.exc_info:
15
+ if not record.exc_text:
16
+ record.exc_text = self.formatException(record.exc_info)
17
+ if record.exc_text:
18
+ formatted += "\n" + record.exc_text
19
+ if record.stack_info:
20
+ formatted += "\n" + self.formatStack(record.stack_info)
21
+ return formatted
22
+
23
+ def format(self, record: logging.LogRecord) -> str:
24
+ """Format the record as a JSON string."""
25
+ # We're going to use a dictionary to hold the record data, and then convert it to JSON.
26
+ # This is because we want to be able to add arbitrary key-value pairs to the log record
27
+ # and have them show up in the JSON output.
28
+ record_dict = {
29
+ "timestamp": self.formatTime(record),
30
+ "level": record.levelname,
31
+ "module": record.module,
32
+ "msg": record.getMessage(),
33
+ }
34
+ # Add the extra data, if it exists.
35
+ if record.__dict__.get("extra"):
36
+ record_dict.update(record.__dict__["extra"])
37
+ record_dict.update(th_keyvals_from_record(record) or {})
38
+
39
+ # Convert the dictionary to a JSON string.
40
+ formatted = json.dumps(record_dict)
41
+ if exc_text := self._format_exception_and_trace(record):
42
+ formatted += exc_text
43
+ return formatted
@@ -0,0 +1,84 @@
1
+ """This is the 'standard' keyword-formatting logger formatter for our logs.
2
+
3
+ It is enabled by default via basic_config.py, but is not required.
4
+ """
5
+
6
+ import logging
7
+ import typing as ty
8
+
9
+ from .. import ansi_esc, config
10
+ from .kw_logger import th_keyvals_from_record
11
+
12
+ MAX_MODULE_NAME_LEN = config.item("max_module_name_len", 40, parse=int)
13
+ _MODULE_NAME_FMT_STR = "{compressed_name:" + str(MAX_MODULE_NAME_LEN()) + "}"
14
+
15
+ _COLOR_LEVEL_MAP = {
16
+ "low": f"{ansi_esc.fg.BLUE}{{}}{ansi_esc.fg.RESET}",
17
+ "info": f"{ansi_esc.fg.GREEN}{{}}{ansi_esc.fg.RESET}",
18
+ "warning": (
19
+ f"{ansi_esc.fg.YELLOW}{ansi_esc.style.BRIGHT}" "{}" f"{ansi_esc.style.NORMAL}{ansi_esc.fg.RESET}"
20
+ ),
21
+ "error": (
22
+ f"{ansi_esc.bg.ERROR_RED}{ansi_esc.style.BRIGHT}"
23
+ "{}"
24
+ f"{ansi_esc.style.NORMAL}{ansi_esc.bg.RESET}"
25
+ ),
26
+ "critical": (
27
+ f"{ansi_esc.bg.MAGENTA}{ansi_esc.style.BRIGHT}{ansi_esc.style.BLINK}" # 😂
28
+ "{}"
29
+ f"{ansi_esc.bg.RESET}{ansi_esc.style.NORMAL}{ansi_esc.style.NO_BLINK}"
30
+ ),
31
+ }
32
+
33
+
34
+ def log_level_color(levelno: int, base_levelname: str) -> str:
35
+ if levelno < logging.INFO:
36
+ return _COLOR_LEVEL_MAP["low"].format(base_levelname.lower())
37
+ elif levelno < logging.WARNING:
38
+ return _COLOR_LEVEL_MAP["info"].format(base_levelname.lower())
39
+ elif levelno < logging.ERROR:
40
+ return _COLOR_LEVEL_MAP["warning"].format(base_levelname)
41
+ elif levelno < logging.CRITICAL:
42
+ return _COLOR_LEVEL_MAP["error"].format(base_levelname)
43
+ return _COLOR_LEVEL_MAP["critical"].format(base_levelname)
44
+
45
+
46
+ class ThdsCompactFormatter(logging.Formatter):
47
+ """This new formatter is more compact than what we had before, and hopefully makes logs a bit more readable overall."""
48
+
49
+ @staticmethod
50
+ def format_module_name(name: str) -> str:
51
+ max_module_name_len = MAX_MODULE_NAME_LEN()
52
+ compressed_name = (
53
+ name
54
+ if len(name) <= max_module_name_len
55
+ else name[: max_module_name_len // 2 - 2] + "..." + name[-max_module_name_len // 2 + 1 :]
56
+ )
57
+ assert len(compressed_name) <= max_module_name_len
58
+ return _MODULE_NAME_FMT_STR.format(compressed_name=compressed_name)
59
+
60
+ def _format_exception_and_trace(self, record: logging.LogRecord):
61
+ # without the following boilerplate, we would not see exceptions or stack traces
62
+ # get formatted as part of the log output at all.
63
+ formatted = ""
64
+ if record.exc_info:
65
+ if not record.exc_text:
66
+ record.exc_text = self.formatException(record.exc_info)
67
+ if record.exc_text:
68
+ formatted += "\n" + record.exc_text
69
+ if record.stack_info:
70
+ formatted += "\n" + self.formatStack(record.stack_info)
71
+ return formatted
72
+
73
+ def format(self, record: logging.LogRecord):
74
+ record.message = record.getMessage()
75
+
76
+ base_levelname = f"{record.levelname:7}" # the length of the string 'WARNING'
77
+ levelname = log_level_color(record.levelno, base_levelname)
78
+
79
+ th_ctx: ty.Any = th_keyvals_from_record(record) or tuple()
80
+ short_name = self.format_module_name(record.name)
81
+ formatted = f"{self.formatTime(record)} {levelname} {short_name} {th_ctx} {record.message}"
82
+ if exc_text := self._format_exception_and_trace(record):
83
+ formatted += exc_text
84
+ return formatted
@@ -0,0 +1,93 @@
1
+ """A logger which allows passing of arbitrary keyword arguments to the end of a logger call,
2
+ such that that context gets embedded directly into the output in one way or another.
3
+ """
4
+
5
+ import contextlib
6
+ import logging
7
+ import logging.config
8
+ from copy import copy
9
+ from typing import Any, Dict, MutableMapping, Optional
10
+
11
+ from ..stack_context import StackContext
12
+
13
+ _LOGGING_KWARGS = ("exc_info", "stack_info", "stacklevel", "extra")
14
+ # These are the officially accepted keyword-arguments for a call to
15
+ # log something with the logger. Anything passed with these names
16
+ # should be passed through directly - anything else can be passed through
17
+ # to the keyword formatter.
18
+
19
+ TH_REC_CTXT = "th_context"
20
+ # this names a nested dict on some LogRecords that contains things we
21
+ # want to log. It is usable as a field specifier in log format strings
22
+
23
+
24
+ class _THContext(Dict[str, Any]):
25
+ def __str__(self):
26
+ return ",".join(map("(%s=%s)".__mod__, self.items())) if self else "()"
27
+
28
+
29
+ _LOG_CONTEXT: StackContext[_THContext] = StackContext("TH_LOG_CONTEXT", _THContext())
30
+
31
+
32
+ @contextlib.contextmanager
33
+ def logger_context(**kwargs):
34
+ """Put some key-value pairs into the keyword-based logger context."""
35
+ with _LOG_CONTEXT.set(_THContext(_LOG_CONTEXT(), **kwargs)):
36
+ yield
37
+
38
+
39
+ def _embed_th_context_in_extra_kw(kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
40
+ """Extracts the key-value pairs embedded via `logger_context, overlays those with
41
+ keyword arguments to the logger, and embeds them all in the logger's "extra" dictionary.
42
+ """
43
+ th_context = _LOG_CONTEXT()
44
+ th_kwargs = [k for k in kwargs if k not in _LOGGING_KWARGS]
45
+ if th_kwargs:
46
+ th_context = copy(th_context)
47
+ th_context.update((k, kwargs.pop(k)) for k in th_kwargs)
48
+ extra = kwargs["extra"] = kwargs.get("extra", dict())
49
+ extra[TH_REC_CTXT] = th_context
50
+ return kwargs
51
+
52
+
53
+ class KwLogger(logging.LoggerAdapter):
54
+ """Allows logging of extra keyword arguments straight through without
55
+ needing an "extras" dictionary.
56
+ """
57
+
58
+ def process(self, msg, kwargs):
59
+ return msg, _embed_th_context_in_extra_kw(kwargs)
60
+
61
+
62
+ def th_keyvals_from_record(record: logging.LogRecord) -> Optional[Dict[str, Any]]:
63
+ """Extracts the key-value pairs embedded via `logger_context` or keyword arguments from a LogRecord."""
64
+ return getattr(record, TH_REC_CTXT, None)
65
+
66
+
67
+ def getLogger(name: Optional[str] = None) -> logging.LoggerAdapter:
68
+ """Using this Logger Adapter will allow you to pass key/value context at the end of
69
+ your logging statements, e.g. `logger.info("my message", key1=value1, key2=value2)`.
70
+ Provided that you haven't configured your own logging format, this module will do so for you,
71
+ ensuring that these contextual key-value pairs render in your log messages. To ensure their presence
72
+ when configuring logging yourself, just put a "%(th_context)s" format specifier somewhere in your
73
+ log message format.
74
+ """
75
+ return KwLogger(logging.getLogger(name), dict())
76
+
77
+
78
+ def make_th_formatters_safe(logger: logging.Logger):
79
+ """Non-adapted loggers may still run into our root format string,
80
+ which expects _TH_REC_CTXT to be present on every LogRecord.
81
+ This will patch one in to any logs making it to our configured formatter.
82
+ """
83
+ for handler in logger.handlers:
84
+ formatter = handler.formatter
85
+ if formatter and hasattr(formatter, "_style") and TH_REC_CTXT in formatter._style._fmt:
86
+ fmt_msg = formatter.formatMessage
87
+
88
+ def wrapper_formatMessage(record: logging.LogRecord):
89
+ if None is getattr(record, TH_REC_CTXT, None):
90
+ setattr(record, TH_REC_CTXT, _LOG_CONTEXT())
91
+ return fmt_msg(record) # noqa: B023
92
+
93
+ setattr(formatter, "formatMessage", wrapper_formatMessage) # noqa: B010