PyPI - thds.core - Versions diffs - 0.0.1__py3-none-any.whl → 1.31.20250123022540__py3-none-any.whl - Mend

thds.core 0.0.1py3-none-any.whl → 1.31.20250123022540py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of thds.core might be problematic. Click here for more details.

Files changed (70) hide show

thds/core/__init__.py +48 -0
thds/core/ansi_esc.py +46 -0
thds/core/cache.py +201 -0
thds/core/calgitver.py +82 -0
thds/core/concurrency.py +100 -0
thds/core/config.py +250 -0
thds/core/decos.py +55 -0
thds/core/dict_utils.py +188 -0
thds/core/env.py +40 -0
thds/core/exit_after.py +121 -0
thds/core/files.py +125 -0
thds/core/fretry.py +115 -0
thds/core/generators.py +56 -0
thds/core/git.py +81 -0
thds/core/hash_cache.py +86 -0
thds/core/hashing.py +106 -0
thds/core/home.py +15 -0
thds/core/hostname.py +10 -0
thds/core/imports.py +17 -0
thds/core/inspect.py +58 -0
thds/core/iterators.py +9 -0
thds/core/lazy.py +83 -0
thds/core/link.py +153 -0
thds/core/log/__init__.py +29 -0
thds/core/log/basic_config.py +171 -0
thds/core/log/json_formatter.py +43 -0
thds/core/log/kw_formatter.py +84 -0
thds/core/log/kw_logger.py +93 -0
thds/core/log/logfmt.py +302 -0
thds/core/merge_args.py +168 -0
thds/core/meta.json +8 -0
thds/core/meta.py +518 -0
thds/core/parallel.py +200 -0
thds/core/pickle_visit.py +24 -0
thds/core/prof.py +276 -0
thds/core/progress.py +112 -0
thds/core/protocols.py +17 -0
thds/core/py.typed +0 -0
thds/core/scaling.py +39 -0
thds/core/scope.py +199 -0
thds/core/source.py +238 -0
thds/core/source_serde.py +104 -0
thds/core/sqlite/__init__.py +21 -0
thds/core/sqlite/connect.py +33 -0
thds/core/sqlite/copy.py +35 -0
thds/core/sqlite/ddl.py +4 -0
thds/core/sqlite/functions.py +63 -0
thds/core/sqlite/index.py +22 -0
thds/core/sqlite/insert_utils.py +23 -0
thds/core/sqlite/merge.py +84 -0
thds/core/sqlite/meta.py +190 -0
thds/core/sqlite/read.py +66 -0
thds/core/sqlite/sqlmap.py +179 -0
thds/core/sqlite/structured.py +138 -0
thds/core/sqlite/types.py +64 -0
thds/core/sqlite/upsert.py +139 -0
thds/core/sqlite/write.py +99 -0
thds/core/stack_context.py +41 -0
thds/core/thunks.py +40 -0
thds/core/timer.py +214 -0
thds/core/tmp.py +85 -0
thds/core/types.py +4 -0
thds.core-1.31.20250123022540.dist-info/METADATA +68 -0
thds.core-1.31.20250123022540.dist-info/RECORD +67 -0
{thds.core-0.0.1.dist-info → thds.core-1.31.20250123022540.dist-info}/WHEEL +1 -1
thds.core-1.31.20250123022540.dist-info/entry_points.txt +4 -0
thds.core-1.31.20250123022540.dist-info/top_level.txt +1 -0
thds.core-0.0.1.dist-info/METADATA +0 -8
thds.core-0.0.1.dist-info/RECORD +0 -4
thds.core-0.0.1.dist-info/top_level.txt +0 -1

thds/core/__init__.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Trilliant Health data science team core utils"""
+from . import (  # noqa: F401
+    ansi_esc,
+    cache,
+    calgitver,
+    concurrency,
+    config,
+    decos,
+    dict_utils,
+    env,
+    exit_after,
+    files,
+    fretry,
+    generators,
+    git,
+    hash_cache,
+    hashing,
+    home,
+    hostname,
+    imports,
+    inspect,
+    lazy,
+    link,
+    log,
+    merge_args,
+    meta,
+    parallel,
+    prof,
+    progress,
+    protocols,
+    scope,
+    source,
+    sqlite,
+    stack_context,
+    thunks,
+    timer,
+    tmp,
+    types,
+)
+from .source import Source  # noqa: F401
+# these imports are helpful for IDE to parse things `core` usage like, `from thds import core`...`core.log.getLogger`
+# this list of imports has no effect on runtime behavior and keeping this up to date is just a nicety and not *required*
+__version__ = meta.get_version(__name__)
+metadata = meta.read_metadata(__name__)
+__commit__ = metadata.git_commit

thds/core/ansi_esc.py ADDED Viewed

@@ -0,0 +1,46 @@
+# thanks to https://gist.github.com/minism/1590432
+# and https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797
+class fg:
+    BLACK = "\033[30m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    MAGENTA = "\033[35m"
+    CYAN = "\033[36m"
+    WHITE = "\033[37m"
+    ERROR_RED = "\033[38;5;196m"
+    RESET = "\033[39m"  # a.k.a. DEFAULT
+class bg:
+    BLACK = "\033[40m"
+    RED = "\033[41m"
+    GREEN = "\033[42m"
+    YELLOW = "\033[43m"
+    BLUE = "\033[44m"
+    MAGENTA = "\033[45m"
+    CYAN = "\033[46m"
+    WHITE = "\033[47m"
+    ERROR_RED = "\033[48;5;196m"
+    RESET = "\033[49m"  # a.k.a. DEFAULT
+class style:
+    BRIGHT = "\033[1m"
+    DIM = "\033[2m"
+    NORMAL = "\033[22m"
+    BLINK = "\033[5m"
+    NO_BLINK = "\033[25m"
+    ITALIC = "\033[3m"
+    NO_ITALIC = "\033[23m"
+    RESET_ALL = "\033[0m"

thds/core/cache.py ADDED Viewed

@@ -0,0 +1,201 @@
+import functools
+import inspect
+import sys
+import threading
+import typing as ty
+from . import protocols as proto
+if sys.version_info >= (3, 10):  # pragma: no cover
+    from typing import ParamSpec
+else:  # pragma: no cover
+    from typing_extensions import ParamSpec
+class _HashedTuple(tuple):
+    """A tuple that ensures that `hash` will be called no more than once
+    per element, since cache decorators will hash the key multiple
+    times on a cache miss.  See also `_HashedSeq` in the standard
+    library `functools` implementation.
+    """
+    __hashvalue: ty.Optional[int] = None
+    def __hash__(self, hash=tuple.__hash__) -> int:
+        hashvalue = self.__hashvalue
+        if hashvalue is None:
+            self.__hashvalue = hashvalue = hash(self)
+        return hashvalue
+    def __add__(self, other, add=tuple.__add__) -> "_HashedTuple":
+        return _HashedTuple(add(self, other))
+    def __radd__(self, other, add=tuple.__add__) -> "_HashedTuple":
+        return _HashedTuple(add(other, self))
+    def __getstate__(self) -> ty.Dict:
+        return {}
+# used for separating keyword arguments; we do not use an object
+# instance here so identity is preserved when pickling/unpickling
+_kwmark = (_HashedTuple,)
+def hashkey(args: tuple, kwargs: ty.Mapping) -> _HashedTuple:
+    """Return a cache key for the specified hashable arguments."""
+    if kwargs:
+        return _HashedTuple(args + sum(sorted(kwargs.items()), _kwmark))
+    else:
+        return _HashedTuple(args)
+# above keying code borrowed from `cachetools`: https://github.com/tkem/cachetools/tree/master
+# I have added some type information
+def make_bound_hashkey(func: ty.Callable) -> ty.Callable[..., _HashedTuple]:
+    """Makes a hashkey function that binds its `*args, **kwargs` to the function signature of `func`.
+    The resulting bound hashkey function makes cache keys that are robust to variations in how arguments are passed to
+    the cache-wrapped `func`. Note that `*args`, by definition, are order dependent.
+    """
+    signature = inspect.signature(func)
+    def bound_hashkey(args: tuple, kwargs: ty.Mapping) -> _HashedTuple:
+        bound_arguments = signature.bind(*args, **kwargs)
+        bound_arguments.apply_defaults()
+        return hashkey(bound_arguments.args, bound_arguments.kwargs)
+    return bound_hashkey
+class _CacheInfo(ty.NamedTuple):
+    # typed version of what is in `functools`
+    hits: int
+    misses: int
+    maxsize: ty.Optional[int]
+    currsize: int
+_P = ParamSpec("_P")
+_R = ty.TypeVar("_R")
+def _locking_factory(
+    cache_lock: proto.ContextManager,
+    make_func_lock: ty.Callable[[_HashedTuple], proto.ContextManager],
+) -> ty.Callable[[ty.Callable[_P, _R]], ty.Callable[_P, _R]]:
+    def decorator(func: ty.Callable[_P, _R]) -> ty.Callable[_P, _R]:
+        cache: ty.Dict[_HashedTuple, _R] = {}
+        keys_to_func_locks: ty.Dict[_HashedTuple, proto.ContextManager] = {}
+        hits = misses = 0
+        bound_hashkey = make_bound_hashkey(func)
+        sentinel = ty.cast(_R, object())  # unique object used to signal cache misses
+        @functools.wraps(func)
+        def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> _R:
+            nonlocal hits, misses
+            key = bound_hashkey(args, kwargs)
+            maybe_value = cache.get(key, sentinel)
+            if maybe_value is not sentinel:
+                hits += 1
+                return maybe_value
+            if key not in keys_to_func_locks:
+                with cache_lock:
+                    if key not in keys_to_func_locks:  # pragma: no cover
+                        # just here to guard against a potential race condition
+                        keys_to_func_locks[key] = make_func_lock(key)
+            with keys_to_func_locks[key]:
+                maybe_value = cache.get(key, sentinel)
+                if maybe_value is not sentinel:
+                    hits += 1
+                    return maybe_value
+                misses += 1
+                result = func(*args, **kwargs)
+                cache[key] = result
+            del keys_to_func_locks[key]
+            return result
+        def cache_info() -> _CacheInfo:
+            # concurrent usage of cached function may result in incorrect hit and miss counts
+            # incrementing them is not threadsafe
+            with cache_lock:
+                return _CacheInfo(hits, misses, None, len(cache))
+        def clear_cache() -> None:
+            nonlocal hits, misses
+            with cache_lock:
+                cache.clear()
+                keys_to_func_locks.clear()
+                hits = misses = 0
+        wrapper.cache_info = cache_info  # type: ignore[attr-defined]
+        wrapper.clear_cache = clear_cache  # type: ignore[attr-defined]
+        return wrapper
+    return decorator
+@ty.overload
+def locking(func: ty.Callable[_P, _R]) -> ty.Callable[_P, _R]:
+    ...  # pragma: no cover
+@ty.overload
+def locking(
+    func: None = ...,
+    *,
+    cache_lock: ty.Optional[proto.ContextManager] = ...,
+    make_func_lock: ty.Optional[ty.Callable[[_HashedTuple], proto.ContextManager]] = ...,
+) -> ty.Callable[[ty.Callable[_P, _R]], ty.Callable[_P, _R]]:
+    ...  # pragma: no cover
+# overloads cover typical usage of `locking_cache` but aren't comprehensive
+# if you need typing coverage of a usage that these overloads do not cover, feel free to add it
+def locking(
+    func: ty.Optional[ty.Callable[_P, _R]] = None,
+    *,
+    cache_lock: ty.Optional[proto.ContextManager] = None,
+    make_func_lock: ty.Optional[ty.Callable[[_HashedTuple], proto.ContextManager]] = None,
+):
+    """A threadsafe, simple, unbounded cache.
+    Unlike common cache implementations, such as `functools.cache` or `cachetools.cached({})`,
+    `locking` makes sure only one invocation of the wrapped function will occur per key across concurrent
+    threads.
+    When using `locking` to call the same function with the same arguments concurrently, care should be taken
+    that the wrapped function, `func`, handles exceptions gracefully. A worst-case scenario exists where the wrapped
+    function *F* is long-running and deterministically errors towards the end of its run. If this exception raising *F*
+    is called with the same arguments *N* times, *F* will run (and error) in serial, *N* times.
+    Users can optionally supply their own context manager supporting `cache_lock` and `make_func_lock` callable that
+    returns a context manager supporting lock based on the cache key. By default, the `cache_lock` is a `Lock` and
+    each unique cache key gets a unique `Lock`.
+    Please also note that `hits` and `misses` in `cache_info` may not be accurate as they are not incremented in
+    a threadsafe matter. Doing that incrementation in a threadsafe manner would incur a performance penalty on threaded
+    usage that is not worth the cost.
+    """
+    def default_make_func_lock(_key: _HashedTuple) -> threading.Lock:
+        return threading.Lock()
+    decorator = _locking_factory(
+        cache_lock or threading.Lock(), make_func_lock or default_make_func_lock
+    )
+    if func:
+        return decorator(func)
+    return decorator

thds/core/calgitver.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Uses local git repo info to construct a more informative CalVer version string.
+    This time format was chosen to be CalVer-esque but to drop time
+    fractions smaller than minutes since they're exceeding rarely
+    semantically meaningful, and the git commit hash will in 99.999%
+    of cases be a great disambiguator for cases where multiple
+    versions happen to be generated within the same minute by
+    different users.
+    We use only dots as separators to be compatible with both Container Registry
+    formats and PEP440.
+"""
+import os
+import re
+from . import git
+SHORT_HASH = 7
+def calgitver() -> str:
+    """This is the 'proper', deterministic CalGitVer - unlike the nondeterministic
+    meta.make_calgitver when the repo is dirty. It does allow for the possibility of
+    override via environment variable, which is intended to support nonlocal runtime
+    environments.
+    Suitable for use any time you may be wanting to get this in a context where you're
+    not sure that the git repo is present, but you expect the environment variable has
+    been set if it isn't.
+    In other words, prefer calling this one instead of meta.make_calgitver if you are
+    trying to use this for production use cases, especially if in a Docker image or Spark
+    cluster.
+    """
+    env_var = os.getenv("CALGITVER")
+    if env_var:
+        return env_var
+    commit_datetime, commit_hash = git.get_commit_datetime_and_hash()
+    return "-".join(
+        filter(
+            None,
+            (
+                commit_datetime,
+                commit_hash[:SHORT_HASH],
+                "" if git.is_clean() else "dirty",
+            ),
+        )
+    )
+def clean_calgitver() -> str:
+    """Only allow CalGitVer computed from a clean repository.
+    Particularly useful for strict production environments.
+    """
+    cgv = calgitver()
+    if cgv.endswith("-dirty"):
+        raise ValueError(f"CalGitVer {cgv} was computed from a dirty repository!")
+    return cgv
+CALGITVER_EXTRACT_RE = re.compile(
+    r"""
+    (?P<year>\d{4})
+    (?P<month>\d{2})
+    (?P<day>\d{2})
+    \.
+    (?P<hour>\d{2})
+    (?P<minute>\d{2})
+    -
+    (?P<git_commit>[a-f0-9]{7})
+    (?P<dirty>(-dirty$)|$)
+    """,
+    re.X,
+)
+def parse_calgitver(maybe_calgitver: str):
+    return CALGITVER_EXTRACT_RE.match(maybe_calgitver)

thds/core/concurrency.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Utilities for working with concurrency in Python."""
+import contextvars
+import typing as ty
+from concurrent.futures import ThreadPoolExecutor
+from threading import Lock
+def copy_context():
+    """The basic implementation you want if you want to copy the current ContextVar
+    context to a new thread. https://docs.python.org/3.10/library/contextvars.html
+    Makes a copy of the current context, and closes over that copy with a callable that
+    must then be called inside the new thread (or process, if your context is picklable).
+    It is disappointing that Python does not do this for you by default, since it is quite
+    common to want to do, extremely cheap, and is much easier to write the code to
+    manually override in the rare cases where it's the wrong idea, than it is to make sure
+    to put this in every single place you want it to happen. Which is probably why asyncio
+    _does_ do this by default for green/async coroutines...
+    """
+    context = contextvars.copy_context()
+    def copy_context_initializer():
+        for var, value in context.items():
+            var.set(value)
+    return copy_context_initializer
+class ContextfulInit(ty.TypedDict):
+    """A dictionary corresponding to the initializer API expected by concurrent.futures.Executor"""
+    initializer: ty.Callable[[], None]
+def initcontext() -> ContextfulInit:
+    """Returns a dictionary corresponding to the API expected by concurrent.futures.Executor,
+    so that you can do `ThreadPoolExecutor(**initcontext())` to get a ThreadPoolExecutor that
+    copies the current context to the new thread.
+    """
+    return dict(initializer=copy_context())
+def contextful_threadpool_executor(
+    max_workers: ty.Optional[int] = None,
+) -> ty.ContextManager[ThreadPoolExecutor]:
+    """
+    Return a ThreadPoolExecutor that copies the current context to the new thread.
+    You don't need to use this directly.
+    """
+    return ThreadPoolExecutor(
+        max_workers=max_workers,
+        thread_name_prefix="contextful_threadpool_executor",
+        **initcontext(),
+    )
+H = ty.TypeVar("H", bound=ty.Hashable)
+L = ty.TypeVar("L", bound=Lock)
+class LockSet(ty.Generic[H, L]):
+    """Get a process-global lock by hashable key, or create it (thread-safely) if it does not exist.
+    Handy if you have things you want to be able to do inside a process, but you don't want
+    to completely rule out the possibility of pickling the object that would otherwise hold the Lock object.
+    This does mean your locks are not shared across processes, but that's a Python limitation anyway.
+    """
+    def __init__(self, lockclass: ty.Type[L]):
+        self._lockclass = lockclass
+        self._master_lock = Lock()
+        self._hashed_locks: ty.Dict[H, L] = dict()
+    def get(self, hashable: H) -> Lock:
+        if hashable not in self._hashed_locks:
+            with self._master_lock:
+                if hashable not in self._hashed_locks:
+                    self._hashed_locks[hashable] = self._lockclass()
+        assert hashable in self._hashed_locks, hashable
+        return self._hashed_locks[hashable]
+    def __getitem__(self, hashable: H) -> Lock:
+        return self.get(hashable)
+    def delete(self, hashable: H) -> None:
+        with self._master_lock:
+            self._hashed_locks.pop(hashable, None)
+_GLOBAL_NAMED_LOCKS = LockSet[str, Lock](Lock)
+# a general-purpose instance; you may want to create your own.
+def named_lock(name: str) -> Lock:
+    return _GLOBAL_NAMED_LOCKS.get(name)

thds.core 0.0.1__py3-none-any.whl → 1.31.20250123022540__py3-none-any.whl

Potentially problematic release.

thds.core 0.0.1py3-none-any.whl → 1.31.20250123022540py3-none-any.whl