thds.core 0.0.1__py3-none-any.whl → 1.31.20250123022540__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.core might be problematic. Click here for more details.

Files changed (70) hide show
  1. thds/core/__init__.py +48 -0
  2. thds/core/ansi_esc.py +46 -0
  3. thds/core/cache.py +201 -0
  4. thds/core/calgitver.py +82 -0
  5. thds/core/concurrency.py +100 -0
  6. thds/core/config.py +250 -0
  7. thds/core/decos.py +55 -0
  8. thds/core/dict_utils.py +188 -0
  9. thds/core/env.py +40 -0
  10. thds/core/exit_after.py +121 -0
  11. thds/core/files.py +125 -0
  12. thds/core/fretry.py +115 -0
  13. thds/core/generators.py +56 -0
  14. thds/core/git.py +81 -0
  15. thds/core/hash_cache.py +86 -0
  16. thds/core/hashing.py +106 -0
  17. thds/core/home.py +15 -0
  18. thds/core/hostname.py +10 -0
  19. thds/core/imports.py +17 -0
  20. thds/core/inspect.py +58 -0
  21. thds/core/iterators.py +9 -0
  22. thds/core/lazy.py +83 -0
  23. thds/core/link.py +153 -0
  24. thds/core/log/__init__.py +29 -0
  25. thds/core/log/basic_config.py +171 -0
  26. thds/core/log/json_formatter.py +43 -0
  27. thds/core/log/kw_formatter.py +84 -0
  28. thds/core/log/kw_logger.py +93 -0
  29. thds/core/log/logfmt.py +302 -0
  30. thds/core/merge_args.py +168 -0
  31. thds/core/meta.json +8 -0
  32. thds/core/meta.py +518 -0
  33. thds/core/parallel.py +200 -0
  34. thds/core/pickle_visit.py +24 -0
  35. thds/core/prof.py +276 -0
  36. thds/core/progress.py +112 -0
  37. thds/core/protocols.py +17 -0
  38. thds/core/py.typed +0 -0
  39. thds/core/scaling.py +39 -0
  40. thds/core/scope.py +199 -0
  41. thds/core/source.py +238 -0
  42. thds/core/source_serde.py +104 -0
  43. thds/core/sqlite/__init__.py +21 -0
  44. thds/core/sqlite/connect.py +33 -0
  45. thds/core/sqlite/copy.py +35 -0
  46. thds/core/sqlite/ddl.py +4 -0
  47. thds/core/sqlite/functions.py +63 -0
  48. thds/core/sqlite/index.py +22 -0
  49. thds/core/sqlite/insert_utils.py +23 -0
  50. thds/core/sqlite/merge.py +84 -0
  51. thds/core/sqlite/meta.py +190 -0
  52. thds/core/sqlite/read.py +66 -0
  53. thds/core/sqlite/sqlmap.py +179 -0
  54. thds/core/sqlite/structured.py +138 -0
  55. thds/core/sqlite/types.py +64 -0
  56. thds/core/sqlite/upsert.py +139 -0
  57. thds/core/sqlite/write.py +99 -0
  58. thds/core/stack_context.py +41 -0
  59. thds/core/thunks.py +40 -0
  60. thds/core/timer.py +214 -0
  61. thds/core/tmp.py +85 -0
  62. thds/core/types.py +4 -0
  63. thds.core-1.31.20250123022540.dist-info/METADATA +68 -0
  64. thds.core-1.31.20250123022540.dist-info/RECORD +67 -0
  65. {thds.core-0.0.1.dist-info → thds.core-1.31.20250123022540.dist-info}/WHEEL +1 -1
  66. thds.core-1.31.20250123022540.dist-info/entry_points.txt +4 -0
  67. thds.core-1.31.20250123022540.dist-info/top_level.txt +1 -0
  68. thds.core-0.0.1.dist-info/METADATA +0 -8
  69. thds.core-0.0.1.dist-info/RECORD +0 -4
  70. thds.core-0.0.1.dist-info/top_level.txt +0 -1
thds/core/__init__.py ADDED
@@ -0,0 +1,48 @@
1
+ """Trilliant Health data science team core utils"""
2
+
3
+ from . import ( # noqa: F401
4
+ ansi_esc,
5
+ cache,
6
+ calgitver,
7
+ concurrency,
8
+ config,
9
+ decos,
10
+ dict_utils,
11
+ env,
12
+ exit_after,
13
+ files,
14
+ fretry,
15
+ generators,
16
+ git,
17
+ hash_cache,
18
+ hashing,
19
+ home,
20
+ hostname,
21
+ imports,
22
+ inspect,
23
+ lazy,
24
+ link,
25
+ log,
26
+ merge_args,
27
+ meta,
28
+ parallel,
29
+ prof,
30
+ progress,
31
+ protocols,
32
+ scope,
33
+ source,
34
+ sqlite,
35
+ stack_context,
36
+ thunks,
37
+ timer,
38
+ tmp,
39
+ types,
40
+ )
41
+ from .source import Source # noqa: F401
42
+
43
+ # these imports are helpful for IDE to parse things `core` usage like, `from thds import core`...`core.log.getLogger`
44
+ # this list of imports has no effect on runtime behavior and keeping this up to date is just a nicety and not *required*
45
+
46
+ __version__ = meta.get_version(__name__)
47
+ metadata = meta.read_metadata(__name__)
48
+ __commit__ = metadata.git_commit
thds/core/ansi_esc.py ADDED
@@ -0,0 +1,46 @@
1
+ # thanks to https://gist.github.com/minism/1590432
2
+ # and https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797
3
+
4
+
5
+ class fg:
6
+ BLACK = "\033[30m"
7
+ RED = "\033[31m"
8
+ GREEN = "\033[32m"
9
+ YELLOW = "\033[33m"
10
+ BLUE = "\033[34m"
11
+ MAGENTA = "\033[35m"
12
+ CYAN = "\033[36m"
13
+ WHITE = "\033[37m"
14
+
15
+ ERROR_RED = "\033[38;5;196m"
16
+
17
+ RESET = "\033[39m" # a.k.a. DEFAULT
18
+
19
+
20
+ class bg:
21
+ BLACK = "\033[40m"
22
+ RED = "\033[41m"
23
+ GREEN = "\033[42m"
24
+ YELLOW = "\033[43m"
25
+ BLUE = "\033[44m"
26
+ MAGENTA = "\033[45m"
27
+ CYAN = "\033[46m"
28
+ WHITE = "\033[47m"
29
+
30
+ ERROR_RED = "\033[48;5;196m"
31
+
32
+ RESET = "\033[49m" # a.k.a. DEFAULT
33
+
34
+
35
+ class style:
36
+ BRIGHT = "\033[1m"
37
+ DIM = "\033[2m"
38
+ NORMAL = "\033[22m"
39
+
40
+ BLINK = "\033[5m"
41
+ NO_BLINK = "\033[25m"
42
+
43
+ ITALIC = "\033[3m"
44
+ NO_ITALIC = "\033[23m"
45
+
46
+ RESET_ALL = "\033[0m"
thds/core/cache.py ADDED
@@ -0,0 +1,201 @@
1
+ import functools
2
+ import inspect
3
+ import sys
4
+ import threading
5
+ import typing as ty
6
+
7
+ from . import protocols as proto
8
+
9
+ if sys.version_info >= (3, 10): # pragma: no cover
10
+ from typing import ParamSpec
11
+ else: # pragma: no cover
12
+ from typing_extensions import ParamSpec
13
+
14
+
15
+ class _HashedTuple(tuple):
16
+ """A tuple that ensures that `hash` will be called no more than once
17
+ per element, since cache decorators will hash the key multiple
18
+ times on a cache miss. See also `_HashedSeq` in the standard
19
+ library `functools` implementation.
20
+ """
21
+
22
+ __hashvalue: ty.Optional[int] = None
23
+
24
+ def __hash__(self, hash=tuple.__hash__) -> int:
25
+ hashvalue = self.__hashvalue
26
+ if hashvalue is None:
27
+ self.__hashvalue = hashvalue = hash(self)
28
+ return hashvalue
29
+
30
+ def __add__(self, other, add=tuple.__add__) -> "_HashedTuple":
31
+ return _HashedTuple(add(self, other))
32
+
33
+ def __radd__(self, other, add=tuple.__add__) -> "_HashedTuple":
34
+ return _HashedTuple(add(other, self))
35
+
36
+ def __getstate__(self) -> ty.Dict:
37
+ return {}
38
+
39
+
40
+ # used for separating keyword arguments; we do not use an object
41
+ # instance here so identity is preserved when pickling/unpickling
42
+ _kwmark = (_HashedTuple,)
43
+
44
+
45
+ def hashkey(args: tuple, kwargs: ty.Mapping) -> _HashedTuple:
46
+ """Return a cache key for the specified hashable arguments."""
47
+
48
+ if kwargs:
49
+ return _HashedTuple(args + sum(sorted(kwargs.items()), _kwmark))
50
+ else:
51
+ return _HashedTuple(args)
52
+
53
+
54
+ # above keying code borrowed from `cachetools`: https://github.com/tkem/cachetools/tree/master
55
+ # I have added some type information
56
+
57
+
58
+ def make_bound_hashkey(func: ty.Callable) -> ty.Callable[..., _HashedTuple]:
59
+ """Makes a hashkey function that binds its `*args, **kwargs` to the function signature of `func`.
60
+
61
+ The resulting bound hashkey function makes cache keys that are robust to variations in how arguments are passed to
62
+ the cache-wrapped `func`. Note that `*args`, by definition, are order dependent.
63
+ """
64
+ signature = inspect.signature(func)
65
+
66
+ def bound_hashkey(args: tuple, kwargs: ty.Mapping) -> _HashedTuple:
67
+ bound_arguments = signature.bind(*args, **kwargs)
68
+ bound_arguments.apply_defaults()
69
+ return hashkey(bound_arguments.args, bound_arguments.kwargs)
70
+
71
+ return bound_hashkey
72
+
73
+
74
+ class _CacheInfo(ty.NamedTuple):
75
+ # typed version of what is in `functools`
76
+ hits: int
77
+ misses: int
78
+ maxsize: ty.Optional[int]
79
+ currsize: int
80
+
81
+
82
+ _P = ParamSpec("_P")
83
+ _R = ty.TypeVar("_R")
84
+
85
+
86
+ def _locking_factory(
87
+ cache_lock: proto.ContextManager,
88
+ make_func_lock: ty.Callable[[_HashedTuple], proto.ContextManager],
89
+ ) -> ty.Callable[[ty.Callable[_P, _R]], ty.Callable[_P, _R]]:
90
+ def decorator(func: ty.Callable[_P, _R]) -> ty.Callable[_P, _R]:
91
+ cache: ty.Dict[_HashedTuple, _R] = {}
92
+ keys_to_func_locks: ty.Dict[_HashedTuple, proto.ContextManager] = {}
93
+ hits = misses = 0
94
+ bound_hashkey = make_bound_hashkey(func)
95
+ sentinel = ty.cast(_R, object()) # unique object used to signal cache misses
96
+
97
+ @functools.wraps(func)
98
+ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> _R:
99
+ nonlocal hits, misses
100
+
101
+ key = bound_hashkey(args, kwargs)
102
+ maybe_value = cache.get(key, sentinel)
103
+ if maybe_value is not sentinel:
104
+ hits += 1
105
+ return maybe_value
106
+
107
+ if key not in keys_to_func_locks:
108
+ with cache_lock:
109
+ if key not in keys_to_func_locks: # pragma: no cover
110
+ # just here to guard against a potential race condition
111
+ keys_to_func_locks[key] = make_func_lock(key)
112
+
113
+ with keys_to_func_locks[key]:
114
+ maybe_value = cache.get(key, sentinel)
115
+ if maybe_value is not sentinel:
116
+ hits += 1
117
+ return maybe_value
118
+
119
+ misses += 1
120
+ result = func(*args, **kwargs)
121
+ cache[key] = result
122
+
123
+ del keys_to_func_locks[key]
124
+ return result
125
+
126
+ def cache_info() -> _CacheInfo:
127
+ # concurrent usage of cached function may result in incorrect hit and miss counts
128
+ # incrementing them is not threadsafe
129
+ with cache_lock:
130
+ return _CacheInfo(hits, misses, None, len(cache))
131
+
132
+ def clear_cache() -> None:
133
+ nonlocal hits, misses
134
+ with cache_lock:
135
+ cache.clear()
136
+ keys_to_func_locks.clear()
137
+ hits = misses = 0
138
+
139
+ wrapper.cache_info = cache_info # type: ignore[attr-defined]
140
+ wrapper.clear_cache = clear_cache # type: ignore[attr-defined]
141
+
142
+ return wrapper
143
+
144
+ return decorator
145
+
146
+
147
+ @ty.overload
148
+ def locking(func: ty.Callable[_P, _R]) -> ty.Callable[_P, _R]:
149
+ ... # pragma: no cover
150
+
151
+
152
+ @ty.overload
153
+ def locking(
154
+ func: None = ...,
155
+ *,
156
+ cache_lock: ty.Optional[proto.ContextManager] = ...,
157
+ make_func_lock: ty.Optional[ty.Callable[[_HashedTuple], proto.ContextManager]] = ...,
158
+ ) -> ty.Callable[[ty.Callable[_P, _R]], ty.Callable[_P, _R]]:
159
+ ... # pragma: no cover
160
+
161
+
162
+ # overloads cover typical usage of `locking_cache` but aren't comprehensive
163
+ # if you need typing coverage of a usage that these overloads do not cover, feel free to add it
164
+
165
+
166
+ def locking(
167
+ func: ty.Optional[ty.Callable[_P, _R]] = None,
168
+ *,
169
+ cache_lock: ty.Optional[proto.ContextManager] = None,
170
+ make_func_lock: ty.Optional[ty.Callable[[_HashedTuple], proto.ContextManager]] = None,
171
+ ):
172
+ """A threadsafe, simple, unbounded cache.
173
+
174
+ Unlike common cache implementations, such as `functools.cache` or `cachetools.cached({})`,
175
+ `locking` makes sure only one invocation of the wrapped function will occur per key across concurrent
176
+ threads.
177
+
178
+ When using `locking` to call the same function with the same arguments concurrently, care should be taken
179
+ that the wrapped function, `func`, handles exceptions gracefully. A worst-case scenario exists where the wrapped
180
+ function *F* is long-running and deterministically errors towards the end of its run. If this exception raising *F*
181
+ is called with the same arguments *N* times, *F* will run (and error) in serial, *N* times.
182
+
183
+ Users can optionally supply their own context manager supporting `cache_lock` and `make_func_lock` callable that
184
+ returns a context manager supporting lock based on the cache key. By default, the `cache_lock` is a `Lock` and
185
+ each unique cache key gets a unique `Lock`.
186
+
187
+ Please also note that `hits` and `misses` in `cache_info` may not be accurate as they are not incremented in
188
+ a threadsafe matter. Doing that incrementation in a threadsafe manner would incur a performance penalty on threaded
189
+ usage that is not worth the cost.
190
+ """
191
+
192
+ def default_make_func_lock(_key: _HashedTuple) -> threading.Lock:
193
+ return threading.Lock()
194
+
195
+ decorator = _locking_factory(
196
+ cache_lock or threading.Lock(), make_func_lock or default_make_func_lock
197
+ )
198
+
199
+ if func:
200
+ return decorator(func)
201
+ return decorator
thds/core/calgitver.py ADDED
@@ -0,0 +1,82 @@
1
+ """Uses local git repo info to construct a more informative CalVer version string.
2
+
3
+ This time format was chosen to be CalVer-esque but to drop time
4
+ fractions smaller than minutes since they're exceeding rarely
5
+ semantically meaningful, and the git commit hash will in 99.999%
6
+ of cases be a great disambiguator for cases where multiple
7
+ versions happen to be generated within the same minute by
8
+ different users.
9
+
10
+ We use only dots as separators to be compatible with both Container Registry
11
+ formats and PEP440.
12
+ """
13
+
14
+ import os
15
+ import re
16
+
17
+ from . import git
18
+
19
+ SHORT_HASH = 7
20
+
21
+
22
+ def calgitver() -> str:
23
+ """This is the 'proper', deterministic CalGitVer - unlike the nondeterministic
24
+ meta.make_calgitver when the repo is dirty. It does allow for the possibility of
25
+ override via environment variable, which is intended to support nonlocal runtime
26
+ environments.
27
+
28
+ Suitable for use any time you may be wanting to get this in a context where you're
29
+ not sure that the git repo is present, but you expect the environment variable has
30
+ been set if it isn't.
31
+
32
+ In other words, prefer calling this one instead of meta.make_calgitver if you are
33
+ trying to use this for production use cases, especially if in a Docker image or Spark
34
+ cluster.
35
+
36
+ """
37
+ env_var = os.getenv("CALGITVER")
38
+ if env_var:
39
+ return env_var
40
+
41
+ commit_datetime, commit_hash = git.get_commit_datetime_and_hash()
42
+ return "-".join(
43
+ filter(
44
+ None,
45
+ (
46
+ commit_datetime,
47
+ commit_hash[:SHORT_HASH],
48
+ "" if git.is_clean() else "dirty",
49
+ ),
50
+ )
51
+ )
52
+
53
+
54
+ def clean_calgitver() -> str:
55
+ """Only allow CalGitVer computed from a clean repository.
56
+
57
+ Particularly useful for strict production environments.
58
+ """
59
+ cgv = calgitver()
60
+ if cgv.endswith("-dirty"):
61
+ raise ValueError(f"CalGitVer {cgv} was computed from a dirty repository!")
62
+ return cgv
63
+
64
+
65
+ CALGITVER_EXTRACT_RE = re.compile(
66
+ r"""
67
+ (?P<year>\d{4})
68
+ (?P<month>\d{2})
69
+ (?P<day>\d{2})
70
+ \.
71
+ (?P<hour>\d{2})
72
+ (?P<minute>\d{2})
73
+ -
74
+ (?P<git_commit>[a-f0-9]{7})
75
+ (?P<dirty>(-dirty$)|$)
76
+ """,
77
+ re.X,
78
+ )
79
+
80
+
81
+ def parse_calgitver(maybe_calgitver: str):
82
+ return CALGITVER_EXTRACT_RE.match(maybe_calgitver)
@@ -0,0 +1,100 @@
1
+ """Utilities for working with concurrency in Python."""
2
+ import contextvars
3
+ import typing as ty
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from threading import Lock
6
+
7
+
8
+ def copy_context():
9
+ """The basic implementation you want if you want to copy the current ContextVar
10
+ context to a new thread. https://docs.python.org/3.10/library/contextvars.html
11
+
12
+ Makes a copy of the current context, and closes over that copy with a callable that
13
+ must then be called inside the new thread (or process, if your context is picklable).
14
+
15
+ It is disappointing that Python does not do this for you by default, since it is quite
16
+ common to want to do, extremely cheap, and is much easier to write the code to
17
+ manually override in the rare cases where it's the wrong idea, than it is to make sure
18
+ to put this in every single place you want it to happen. Which is probably why asyncio
19
+ _does_ do this by default for green/async coroutines...
20
+
21
+ """
22
+ context = contextvars.copy_context()
23
+
24
+ def copy_context_initializer():
25
+ for var, value in context.items():
26
+ var.set(value)
27
+
28
+ return copy_context_initializer
29
+
30
+
31
+ class ContextfulInit(ty.TypedDict):
32
+ """A dictionary corresponding to the initializer API expected by concurrent.futures.Executor"""
33
+
34
+ initializer: ty.Callable[[], None]
35
+
36
+
37
+ def initcontext() -> ContextfulInit:
38
+ """Returns a dictionary corresponding to the API expected by concurrent.futures.Executor,
39
+
40
+ so that you can do `ThreadPoolExecutor(**initcontext())` to get a ThreadPoolExecutor that
41
+ copies the current context to the new thread.
42
+ """
43
+ return dict(initializer=copy_context())
44
+
45
+
46
+ def contextful_threadpool_executor(
47
+ max_workers: ty.Optional[int] = None,
48
+ ) -> ty.ContextManager[ThreadPoolExecutor]:
49
+ """
50
+ Return a ThreadPoolExecutor that copies the current context to the new thread.
51
+
52
+ You don't need to use this directly.
53
+ """
54
+ return ThreadPoolExecutor(
55
+ max_workers=max_workers,
56
+ thread_name_prefix="contextful_threadpool_executor",
57
+ **initcontext(),
58
+ )
59
+
60
+
61
+ H = ty.TypeVar("H", bound=ty.Hashable)
62
+ L = ty.TypeVar("L", bound=Lock)
63
+
64
+
65
+ class LockSet(ty.Generic[H, L]):
66
+ """Get a process-global lock by hashable key, or create it (thread-safely) if it does not exist.
67
+
68
+ Handy if you have things you want to be able to do inside a process, but you don't want
69
+ to completely rule out the possibility of pickling the object that would otherwise hold the Lock object.
70
+
71
+ This does mean your locks are not shared across processes, but that's a Python limitation anyway.
72
+ """
73
+
74
+ def __init__(self, lockclass: ty.Type[L]):
75
+ self._lockclass = lockclass
76
+ self._master_lock = Lock()
77
+ self._hashed_locks: ty.Dict[H, L] = dict()
78
+
79
+ def get(self, hashable: H) -> Lock:
80
+ if hashable not in self._hashed_locks:
81
+ with self._master_lock:
82
+ if hashable not in self._hashed_locks:
83
+ self._hashed_locks[hashable] = self._lockclass()
84
+ assert hashable in self._hashed_locks, hashable
85
+ return self._hashed_locks[hashable]
86
+
87
+ def __getitem__(self, hashable: H) -> Lock:
88
+ return self.get(hashable)
89
+
90
+ def delete(self, hashable: H) -> None:
91
+ with self._master_lock:
92
+ self._hashed_locks.pop(hashable, None)
93
+
94
+
95
+ _GLOBAL_NAMED_LOCKS = LockSet[str, Lock](Lock)
96
+ # a general-purpose instance; you may want to create your own.
97
+
98
+
99
+ def named_lock(name: str) -> Lock:
100
+ return _GLOBAL_NAMED_LOCKS.get(name)