thds.core 1.40.20250701001211__py3-none-any.whl → 1.41.20250702194312__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.core might be problematic. Click here for more details.
- thds/core/__init__.py +1 -0
- thds/core/cpus.py +14 -1
- thds/core/dump_stacks.py +29 -0
- thds/core/hash_cache.py +14 -8
- thds/core/hashing.py +50 -17
- thds/core/link.py +10 -4
- thds/core/meta.py +6 -4
- thds/core/source/__init__.py +1 -0
- thds/core/source/_construct.py +27 -4
- thds/core/source/_download.py +2 -8
- thds/core/source/serde.py +9 -22
- thds/core/source/src.py +2 -1
- thds/core/tmp.py +6 -6
- {thds_core-1.40.20250701001211.dist-info → thds_core-1.41.20250702194312.dist-info}/METADATA +1 -1
- {thds_core-1.40.20250701001211.dist-info → thds_core-1.41.20250702194312.dist-info}/RECORD +18 -17
- {thds_core-1.40.20250701001211.dist-info → thds_core-1.41.20250702194312.dist-info}/WHEEL +0 -0
- {thds_core-1.40.20250701001211.dist-info → thds_core-1.41.20250702194312.dist-info}/entry_points.txt +0 -0
- {thds_core-1.40.20250701001211.dist-info → thds_core-1.41.20250702194312.dist-info}/top_level.txt +0 -0
thds/core/__init__.py
CHANGED
thds/core/cpus.py
CHANGED
|
@@ -39,7 +39,20 @@ def _try_read_value(config_path: Path, parse: ty.Callable[[str], T]) -> ty.Optio
|
|
|
39
39
|
|
|
40
40
|
def _parse_cpu_quota_and_period_v2(s: str) -> ty.Tuple[int, int]:
|
|
41
41
|
"""Parse both CPU quota and period from kernel cgroup v2 config file."""
|
|
42
|
-
|
|
42
|
+
|
|
43
|
+
parts = s.split()
|
|
44
|
+
quota_str, period_str = parts[0], parts[1]
|
|
45
|
+
|
|
46
|
+
if quota_str == "max":
|
|
47
|
+
# https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
|
48
|
+
#
|
|
49
|
+
# "In the above four control files, the special token “max” should be used
|
|
50
|
+
# to represent upward infinity for both reading and writing."
|
|
51
|
+
quota = -1 # Use -1 to indicate unlimited, matching v1 behavior
|
|
52
|
+
else:
|
|
53
|
+
quota = int(quota_str)
|
|
54
|
+
|
|
55
|
+
period = int(period_str)
|
|
43
56
|
return quota, period
|
|
44
57
|
|
|
45
58
|
|
thds/core/dump_stacks.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import signal
|
|
2
|
+
import sys
|
|
3
|
+
import threading
|
|
4
|
+
import traceback
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def dump_all_stacks(signum, frame):
|
|
8
|
+
print(f"\n=== Stack dump triggered by signal {signum} ===", flush=True)
|
|
9
|
+
for thread in threading.enumerate():
|
|
10
|
+
print(f"\nThread: {thread.name} (ID: {thread.ident})", flush=True)
|
|
11
|
+
print("-" * 50, flush=True)
|
|
12
|
+
if thread.ident in sys._current_frames():
|
|
13
|
+
traceback.print_stack(sys._current_frames()[thread.ident], file=sys.stdout)
|
|
14
|
+
else:
|
|
15
|
+
print("No frame available for this thread", flush=True)
|
|
16
|
+
print("=== End stack dump ===\n", flush=True)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def setup_signal_handler(signal_num: int = signal.SIGUSR1):
|
|
20
|
+
"""
|
|
21
|
+
Set up a signal handler to dump all thread stacks when the specified signal is received.
|
|
22
|
+
Default is SIGUSR1, but can be changed to any other signal as needed.
|
|
23
|
+
"""
|
|
24
|
+
signal.signal(signal_num, dump_all_stacks)
|
|
25
|
+
|
|
26
|
+
print(
|
|
27
|
+
f"Signal handler set up for signal {signal_num}."
|
|
28
|
+
" Send this signal to the process to dump all thread stacks."
|
|
29
|
+
)
|
thds/core/hash_cache.py
CHANGED
|
@@ -11,18 +11,16 @@ functions themselves.
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
import hashlib
|
|
14
|
-
import os
|
|
15
14
|
import sys
|
|
16
15
|
from pathlib import Path
|
|
17
|
-
from typing import Any
|
|
18
16
|
|
|
19
17
|
from . import config, files
|
|
20
|
-
from .hashing import Hash, hash_using
|
|
18
|
+
from .hashing import Hash, Hasher, get_hasher, hash_using
|
|
21
19
|
from .home import HOMEDIR
|
|
22
20
|
from .log import getLogger
|
|
23
21
|
from .types import StrOrPath
|
|
24
22
|
|
|
25
|
-
CACHE_HASH_DIR = config.item("directory", HOMEDIR() / ".hash-cache", parse=Path)
|
|
23
|
+
CACHE_HASH_DIR = config.item("directory", HOMEDIR() / ".thds/core/hash-cache", parse=Path)
|
|
26
24
|
_1GB = 1 * 2**30 # log if hashing a file larger than this, since it will be slow.
|
|
27
25
|
|
|
28
26
|
|
|
@@ -53,7 +51,7 @@ def _is_no_older_than(file: Path, other: Path) -> bool:
|
|
|
53
51
|
return file.stat().st_mtime >= other.stat().st_mtime
|
|
54
52
|
|
|
55
53
|
|
|
56
|
-
def hash_file(filepath: StrOrPath, hasher:
|
|
54
|
+
def hash_file(filepath: StrOrPath, hasher: Hasher) -> bytes:
|
|
57
55
|
"""Hashes a file with the given hashlib hasher. If we've already previously computed
|
|
58
56
|
the given hash for the file and the file hasn't changed (according to filesystem
|
|
59
57
|
mtime) since we stored that hash, we'll just return the cached hash.
|
|
@@ -74,14 +72,22 @@ def hash_file(filepath: StrOrPath, hasher: Any) -> bytes:
|
|
|
74
72
|
# I want to know how often we're finding 'outdated' hashes; those should be rare.
|
|
75
73
|
log_at_lvl(f"Hashing {psize/_1GB:.2f} GB file at {resolved_path}{hash_cached}")
|
|
76
74
|
|
|
77
|
-
|
|
75
|
+
if hasattr(hasher, "update_mmap"):
|
|
76
|
+
# a special case for the blake3 module, which has deadlocked in the past
|
|
77
|
+
logger.warning("DEBUG starting update_mmap for blake3")
|
|
78
|
+
hasher.update_mmap(filepath)
|
|
79
|
+
hash_bytes = hasher.digest()
|
|
80
|
+
logger.info("DEBUG finished update_mmap")
|
|
81
|
+
else:
|
|
82
|
+
hash_bytes = hash_using(resolved_path, hasher).digest()
|
|
83
|
+
|
|
78
84
|
cached_hash_path.parent.mkdir(parents=True, exist_ok=True)
|
|
79
85
|
with files.atomic_binary_writer(cached_hash_path) as f:
|
|
80
86
|
f.write(hash_bytes)
|
|
81
87
|
return hash_bytes
|
|
82
88
|
|
|
83
89
|
|
|
84
|
-
def filehash(algo: str, pathlike:
|
|
90
|
+
def filehash(algo: str, pathlike: StrOrPath) -> Hash:
|
|
85
91
|
"""Wraps a cached hash of a file in a core.hashing.Hash object, which carries the name
|
|
86
92
|
of the hash algorithm used."""
|
|
87
|
-
return Hash(sys.intern(algo), hash_file(pathlike,
|
|
93
|
+
return Hash(sys.intern(algo), hash_file(pathlike, get_hasher(algo)))
|
thds/core/hashing.py
CHANGED
|
@@ -2,39 +2,53 @@
|
|
|
2
2
|
https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
|
|
3
3
|
I have written this code too many times to write it again. Why isn't this in the stdlib?
|
|
4
4
|
"""
|
|
5
|
+
|
|
5
6
|
import base64
|
|
6
7
|
import contextlib
|
|
8
|
+
import hashlib
|
|
7
9
|
import io
|
|
8
10
|
import os
|
|
9
|
-
import threading
|
|
10
11
|
import typing as ty
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
_SEMAPHORE = threading.BoundedSemaphore(int(os.getenv("THDS_CORE_HASHING_PARALLELISM", 4)))
|
|
17
|
-
_CHUNK_SIZE = int(os.getenv("THDS_CORE_HASHING_CHUNK_SIZE", 65536))
|
|
14
|
+
from .types import StrOrPath
|
|
15
|
+
|
|
16
|
+
_CHUNK_SIZE = int(os.getenv("THDS_CORE_HASHING_CHUNK_SIZE", 2**18))
|
|
18
17
|
# https://stackoverflow.com/questions/17731660/hashlib-optimal-size-of-chunks-to-be-used-in-md5-update
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
|
|
18
|
+
# i've done some additional benchmarking, and slightly larger chunks (256 KB) are faster
|
|
19
|
+
# when the files are larger, and those are the ones we care about most since they take the longest.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Hasher(ty.Protocol):
|
|
23
|
+
"""This may be incomplete as far as hashlib is concerned, but it covers everything we use."""
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def name(self) -> str:
|
|
27
|
+
"""The name of the hashing algorithm, e.g. 'sha256'."""
|
|
28
|
+
...
|
|
22
29
|
|
|
23
|
-
|
|
30
|
+
def update(self, __byteslike: ty.Union[bytes, bytearray, memoryview]) -> None:
|
|
31
|
+
"""Update the hash object with the bytes-like object."""
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
def digest(self) -> bytes:
|
|
35
|
+
...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
H = ty.TypeVar("H", bound=Hasher)
|
|
24
39
|
SomehowReadable = ty.Union[ty.AnyStr, ty.IO[ty.AnyStr], Path]
|
|
25
40
|
|
|
26
41
|
|
|
27
|
-
def hash_readable_chunks(bytes_readable: ty.IO[bytes], hasher:
|
|
42
|
+
def hash_readable_chunks(bytes_readable: ty.IO[bytes], hasher: H) -> H:
|
|
28
43
|
"""Return thing you can call .digest or .hexdigest on.
|
|
29
44
|
|
|
30
45
|
E.g.:
|
|
31
46
|
|
|
32
47
|
hash_readable_chunks(open(Path('foo/bar'), 'rb'), hashlib.sha256()).hexdigest()
|
|
33
48
|
"""
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
return hasher
|
|
49
|
+
for chunk in iter(lambda: bytes_readable.read(_CHUNK_SIZE), b""):
|
|
50
|
+
hasher.update(chunk) # type: ignore
|
|
51
|
+
return hasher
|
|
38
52
|
|
|
39
53
|
|
|
40
54
|
@contextlib.contextmanager
|
|
@@ -53,7 +67,7 @@ def attempt_readable(thing: SomehowReadable) -> ty.Iterator[ty.IO[bytes]]:
|
|
|
53
67
|
yield readable
|
|
54
68
|
|
|
55
69
|
|
|
56
|
-
def hash_using(data: SomehowReadable, hasher:
|
|
70
|
+
def hash_using(data: SomehowReadable, hasher: H) -> H:
|
|
57
71
|
"""This is quite dynamic - but if your data object is not readable
|
|
58
72
|
bytes and is not openable as bytes, you'll get a
|
|
59
73
|
FileNotFoundError, or possibly a TypeError or other gremlin.
|
|
@@ -66,7 +80,7 @@ def hash_using(data: SomehowReadable, hasher: T) -> T:
|
|
|
66
80
|
return hash_readable_chunks(readable, hasher)
|
|
67
81
|
|
|
68
82
|
|
|
69
|
-
def hash_anything(data: SomehowReadable, hasher:
|
|
83
|
+
def hash_anything(data: SomehowReadable, hasher: H) -> ty.Optional[H]:
|
|
70
84
|
try:
|
|
71
85
|
return hash_using(data, hasher)
|
|
72
86
|
except (FileNotFoundError, TypeError):
|
|
@@ -104,3 +118,22 @@ class Hash(ty.NamedTuple):
|
|
|
104
118
|
|
|
105
119
|
def __repr__(self) -> str:
|
|
106
120
|
return f"Hash(algo='{self.algo}', bytes={_repr_bytes(self.bytes)})"
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
_NAMED_HASH_CONSTRUCTORS: ty.Dict[str, ty.Callable[[str], Hasher]] = {}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def add_named_hash(algo: str, constructor: ty.Callable[[str], Hasher]) -> None:
|
|
127
|
+
_NAMED_HASH_CONSTRUCTORS[algo] = constructor
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_hasher(algo: str) -> Hasher:
|
|
131
|
+
if algo in _NAMED_HASH_CONSTRUCTORS:
|
|
132
|
+
return _NAMED_HASH_CONSTRUCTORS[algo](algo)
|
|
133
|
+
|
|
134
|
+
return hashlib.new(algo)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def file(algo: str, pathlike: StrOrPath) -> bytes:
|
|
138
|
+
"""I'm so lazy"""
|
|
139
|
+
return hash_using(pathlike, get_hasher(algo)).digest()
|
thds/core/link.py
CHANGED
|
@@ -48,6 +48,7 @@ def link(
|
|
|
48
48
|
src = Path(src).resolve()
|
|
49
49
|
if src == Path(dest).resolve():
|
|
50
50
|
return "same"
|
|
51
|
+
|
|
51
52
|
assert os.path.exists(src), f"Source {src} does not exist"
|
|
52
53
|
|
|
53
54
|
dest_parent = _dest_parent(dest)
|
|
@@ -63,24 +64,27 @@ def link(
|
|
|
63
64
|
try:
|
|
64
65
|
subprocess.check_output(["cp", "-c", str(src), str(tmp_link_dest)])
|
|
65
66
|
os.rename(tmp_link_dest, dest)
|
|
66
|
-
logger.debug(
|
|
67
|
+
logger.debug("Created a copy-on-write reflink from %s to %s", src, dest)
|
|
67
68
|
return "ref"
|
|
69
|
+
|
|
68
70
|
except subprocess.CalledProcessError:
|
|
69
71
|
pass
|
|
70
72
|
if "hard" in attempt_types:
|
|
71
73
|
try:
|
|
72
74
|
os.link(src, tmp_link_dest)
|
|
73
75
|
os.rename(tmp_link_dest, dest)
|
|
74
|
-
logger.debug(
|
|
76
|
+
logger.debug("Created a hardlink from %s to %s", src, dest)
|
|
75
77
|
return "hard"
|
|
78
|
+
|
|
76
79
|
except OSError as oserr:
|
|
77
80
|
logger.warning(f"Unable to hard-link {src} to {dest} ({oserr})")
|
|
78
81
|
if "soft" in attempt_types:
|
|
79
82
|
try:
|
|
80
83
|
os.symlink(src, tmp_link_dest)
|
|
81
84
|
os.rename(tmp_link_dest, dest)
|
|
82
|
-
logger.debug(
|
|
85
|
+
logger.debug("Created a softlink from %s to %s", src, dest)
|
|
83
86
|
return "soft"
|
|
87
|
+
|
|
84
88
|
except OSError as oserr:
|
|
85
89
|
logger.warning(f"Unable to soft-link {src} to {dest}" f" ({oserr})")
|
|
86
90
|
|
|
@@ -112,7 +116,9 @@ def link_or_copy(src: ct.StrOrPath, dest: ct.StrOrPath, *link_types: LinkType) -
|
|
|
112
116
|
link_success_type = link(src, dest, *link_types)
|
|
113
117
|
if link_success_type:
|
|
114
118
|
return link_success_type
|
|
115
|
-
|
|
119
|
+
|
|
120
|
+
log_at_lvl = logger.debug if link_types == ("ref",) and not _IS_MAC else logger.info
|
|
121
|
+
log_at_lvl("Unable to link %s to %s using %s; falling back to copy.", src, dest, link_types)
|
|
116
122
|
|
|
117
123
|
logger.debug("Copying %s to %s", src, dest)
|
|
118
124
|
with tmp.temppath_same_fs(dest) as tmpfile:
|
thds/core/meta.py
CHANGED
|
@@ -198,7 +198,7 @@ class NoBasePackageFromMain(ValueError):
|
|
|
198
198
|
|
|
199
199
|
|
|
200
200
|
@lru_cache(None)
|
|
201
|
-
def get_base_package(pkg: Package) -> str:
|
|
201
|
+
def get_base_package(pkg: Package, *, orig: Package = "") -> str:
|
|
202
202
|
try:
|
|
203
203
|
str_pkg = str(pkg)
|
|
204
204
|
if str_pkg == "__main__":
|
|
@@ -210,10 +210,12 @@ def get_base_package(pkg: Package) -> str:
|
|
|
210
210
|
except PackageNotFoundError:
|
|
211
211
|
pkg_ = pkg.split(".")
|
|
212
212
|
if len(pkg_) <= 1:
|
|
213
|
-
LOGGER.warning(
|
|
213
|
+
LOGGER.warning(
|
|
214
|
+
"Could not find the base package for `%s`. Package %s not found.", orig, pkg
|
|
215
|
+
)
|
|
214
216
|
return ""
|
|
215
|
-
|
|
216
|
-
|
|
217
|
+
|
|
218
|
+
return get_base_package(".".join(pkg_[:-1]), orig=orig or pkg)
|
|
217
219
|
|
|
218
220
|
return str(pkg)
|
|
219
221
|
|
thds/core/source/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ yet will not be downloaded (if non-local) until it is actually opened or unwrapp
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from . import serde, tree # noqa: F401
|
|
7
|
+
from ._construct import set_file_autohash # noqa: F401
|
|
7
8
|
from ._construct import from_file, from_uri, path_from_uri, register_from_uri_handler # noqa: F401
|
|
8
9
|
from ._construct_tree import tree_from_directory # noqa: F401
|
|
9
10
|
from ._download import Downloader, register_download_handler # noqa: F401
|
thds/core/source/_construct.py
CHANGED
|
@@ -1,15 +1,37 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import sys
|
|
2
3
|
import typing as ty
|
|
3
4
|
from functools import partial
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
7
|
+
from .. import log
|
|
6
8
|
from ..files import is_file_uri, path_from_uri, to_uri
|
|
7
|
-
from ..
|
|
8
|
-
from
|
|
9
|
+
from ..hash_cache import filehash
|
|
10
|
+
from ..hashing import Hash, Hasher, add_named_hash
|
|
9
11
|
from .src import Source
|
|
10
12
|
|
|
11
13
|
# Creation from local Files or from remote URIs
|
|
12
14
|
|
|
15
|
+
_AUTOHASH = "sha256"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def set_file_autohash(
|
|
19
|
+
algo: str, hash_constructor: ty.Optional[ty.Callable[[str], Hasher]] = None
|
|
20
|
+
) -> None:
|
|
21
|
+
"""If you call this and provide a non-builtin hash algorithm, you must also provide a constructor for it."""
|
|
22
|
+
if hash_constructor:
|
|
23
|
+
hash_constructor(algo) # this will raise if algo is not supported.
|
|
24
|
+
add_named_hash(algo, hash_constructor)
|
|
25
|
+
global _AUTOHASH
|
|
26
|
+
_AUTOHASH = sys.intern(algo)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def hash_file(path: Path, algo: str = "") -> Hash:
|
|
30
|
+
hash_algo = sys.intern(algo or _AUTOHASH)
|
|
31
|
+
with log.logger_context(hash_for=f"source-{hash_algo}"):
|
|
32
|
+
computed_hash = filehash(hash_algo, path)
|
|
33
|
+
return computed_hash
|
|
34
|
+
|
|
13
35
|
|
|
14
36
|
def from_file(
|
|
15
37
|
filename: ty.Union[str, os.PathLike], hash: ty.Optional[Hash] = None, uri: str = ""
|
|
@@ -24,10 +46,11 @@ def from_file(
|
|
|
24
46
|
if not path.exists():
|
|
25
47
|
raise FileNotFoundError(path)
|
|
26
48
|
|
|
49
|
+
file_hash = hash or hash_file(path) # use automatic hash algo if not specified!
|
|
27
50
|
if uri:
|
|
28
|
-
src = from_uri(uri,
|
|
51
|
+
src = from_uri(uri, file_hash)
|
|
29
52
|
else:
|
|
30
|
-
src = Source(to_uri(path),
|
|
53
|
+
src = Source(to_uri(path), file_hash)
|
|
31
54
|
src._set_cached_path(path) # internally, it's okay to hack around immutability.
|
|
32
55
|
return src
|
|
33
56
|
|
thds/core/source/_download.py
CHANGED
|
@@ -3,16 +3,12 @@
|
|
|
3
3
|
yet will not be downloaded (if non-local) until it is actually opened or unwrapped.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
import sys
|
|
7
6
|
import typing as ty
|
|
8
7
|
from pathlib import Path
|
|
9
8
|
|
|
10
|
-
from .. import log
|
|
11
9
|
from ..files import is_file_uri, path_from_uri
|
|
12
|
-
from ..hash_cache import filehash
|
|
13
10
|
from ..hashing import Hash
|
|
14
|
-
|
|
15
|
-
SHA256 = "sha256" # this hopefully interns the string which makes sure that all our pickles reuse the reference
|
|
11
|
+
from ._construct import hash_file
|
|
16
12
|
|
|
17
13
|
|
|
18
14
|
class Downloader(ty.Protocol):
|
|
@@ -76,9 +72,7 @@ class SourceHashMismatchError(ValueError):
|
|
|
76
72
|
|
|
77
73
|
|
|
78
74
|
def _check_hash(expected_hash: ty.Optional[Hash], path: Path) -> Hash:
|
|
79
|
-
|
|
80
|
-
with log.logger_context(hash_for=f"source-{hash_algo}"):
|
|
81
|
-
computed_hash = filehash(hash_algo, path)
|
|
75
|
+
computed_hash = hash_file(path)
|
|
82
76
|
if expected_hash and expected_hash != computed_hash:
|
|
83
77
|
raise SourceHashMismatchError(
|
|
84
78
|
f"{expected_hash.algo} mismatch for {path};"
|
thds/core/source/serde.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
|
1
1
|
# this should later get promoted somewhere, probably
|
|
2
2
|
import json
|
|
3
3
|
import typing as ty
|
|
4
|
-
from functools import partial
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
|
|
7
6
|
from thds.core import files, hashing, log, types
|
|
8
7
|
|
|
9
8
|
from . import _construct
|
|
10
|
-
from ._download import SHA256
|
|
11
9
|
from .src import Source
|
|
12
10
|
|
|
13
11
|
_SHA256_B64 = "sha256b64"
|
|
@@ -17,20 +15,16 @@ MD5 = "md5"
|
|
|
17
15
|
logger = log.getLogger(__name__)
|
|
18
16
|
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _from_md5b64(d: dict) -> ty.Optional[hashing.Hash]:
|
|
27
|
-
if "md5b64" in d:
|
|
28
|
-
return hashing.Hash(algo=MD5, bytes=hashing.db64(d[_MD5_B64]))
|
|
18
|
+
def _from_b64(d: dict) -> ty.Optional[hashing.Hash]:
|
|
19
|
+
for key in d.keys():
|
|
20
|
+
if key.endswith("b64"):
|
|
21
|
+
algo = key[:-3]
|
|
22
|
+
return hashing.Hash(algo=algo, bytes=hashing.db64(d[key]))
|
|
29
23
|
return None
|
|
30
24
|
|
|
31
25
|
|
|
32
26
|
HashParser = ty.Callable[[dict], ty.Optional[hashing.Hash]]
|
|
33
|
-
_BASE_PARSERS = (
|
|
27
|
+
_BASE_PARSERS = (_from_b64,)
|
|
34
28
|
|
|
35
29
|
|
|
36
30
|
def base_parsers() -> ty.Tuple[HashParser, ...]:
|
|
@@ -45,19 +39,12 @@ def from_json(json_source: str, hash_parsers: ty.Collection[HashParser] = base_p
|
|
|
45
39
|
)
|
|
46
40
|
|
|
47
41
|
|
|
48
|
-
def
|
|
49
|
-
|
|
50
|
-
) -> ty.Optional[dict]:
|
|
51
|
-
if hash.algo == algo:
|
|
52
|
-
return {keyname: stringify_hash(hash.bytes)}
|
|
53
|
-
return None
|
|
54
|
-
|
|
42
|
+
def _very_generic_b64_hash_serializer(hash: hashing.Hash) -> dict:
|
|
43
|
+
return {hash.algo + "b64": hashing.b64(hash.bytes)}
|
|
55
44
|
|
|
56
|
-
_to_sha256b64 = partial(_generic_hash_serializer, SHA256, hashing.b64, _SHA256_B64)
|
|
57
|
-
_to_md5b64 = partial(_generic_hash_serializer, MD5, hashing.b64, _MD5_B64)
|
|
58
45
|
|
|
59
46
|
HashSerializer = ty.Callable[[hashing.Hash], ty.Optional[dict]]
|
|
60
|
-
_BASE_HASH_SERIALIZERS: ty.Tuple[HashSerializer, ...] = (
|
|
47
|
+
_BASE_HASH_SERIALIZERS: ty.Tuple[HashSerializer, ...] = (_very_generic_b64_hash_serializer,)
|
|
61
48
|
|
|
62
49
|
|
|
63
50
|
def base_hash_serializers() -> ty.Tuple[HashSerializer, ...]:
|
thds/core/source/src.py
CHANGED
|
@@ -4,7 +4,6 @@ from dataclasses import dataclass
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from .. import hashing, types
|
|
7
|
-
from . import _download
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
@dataclass(frozen=True)
|
|
@@ -73,6 +72,8 @@ class Source(os.PathLike):
|
|
|
73
72
|
failure will raise SourceHashMismatchError.
|
|
74
73
|
"""
|
|
75
74
|
if self.cached_path is None or not self.cached_path.exists():
|
|
75
|
+
from . import _download # ugly circular import
|
|
76
|
+
|
|
76
77
|
lpath = _download._get_download_handler(self.uri)(self.hash)
|
|
77
78
|
# path() used to be responsible for checking the hash, but since we pass it to the downloader,
|
|
78
79
|
# it really makes more sense to allow the downloader to decide how to verify its own download,
|
thds/core/tmp.py
CHANGED
|
@@ -4,12 +4,13 @@ Module is given this name partly because we tend not to name other things intern
|
|
|
4
4
|
the word 'tmp', preferring instead 'temp'. Therefore `tmp` will be a little less ambiguous
|
|
5
5
|
overall.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
import contextlib
|
|
8
9
|
import shutil
|
|
9
10
|
import tempfile
|
|
10
11
|
import typing as ty
|
|
11
12
|
from pathlib import Path
|
|
12
|
-
from
|
|
13
|
+
from uuid import uuid4
|
|
13
14
|
|
|
14
15
|
from .home import HOMEDIR
|
|
15
16
|
from .types import StrOrPath
|
|
@@ -45,10 +46,7 @@ def temppath_same_fs(lcl_path: StrOrPath = "") -> ty.Iterator[Path]:
|
|
|
45
46
|
# we would prefer _not_ to reinvent the wheel here, but if the tempfiles are
|
|
46
47
|
# getting created on a different volume, then moves are no longer atomic, and
|
|
47
48
|
# that's a huge pain for lots of reasons.
|
|
48
|
-
fname_parts = [
|
|
49
|
-
".thds-core-tmp-home-fs",
|
|
50
|
-
str(SystemRandom().random())[2:], # makes a float look like a numeric string
|
|
51
|
-
]
|
|
49
|
+
fname_parts = [".core-tmp-home-fs", str(uuid4())]
|
|
52
50
|
if basename:
|
|
53
51
|
fname_parts.append(basename)
|
|
54
52
|
dpath = parent_dir / "-".join(fname_parts)
|
|
@@ -64,8 +62,10 @@ def temppath_same_fs(lcl_path: StrOrPath = "") -> ty.Iterator[Path]:
|
|
|
64
62
|
with tempfile.TemporaryDirectory() as tdir:
|
|
65
63
|
# actually check whether we're on the same filesystem.
|
|
66
64
|
if _are_same_fs(parent_dir, Path(tdir)):
|
|
67
|
-
|
|
65
|
+
# the standard path has us just using a normal temporary directory that we don't create ourselves.
|
|
66
|
+
yield Path(tdir) / "-".join(filter(None, ["core-tmp", basename]))
|
|
68
67
|
else:
|
|
68
|
+
# but if we need to do something special... here we are.
|
|
69
69
|
yield from _tempdir_same_filesystem()
|
|
70
70
|
|
|
71
71
|
|
|
@@ -1,31 +1,32 @@
|
|
|
1
|
-
thds/core/__init__.py,sha256=
|
|
1
|
+
thds/core/__init__.py,sha256=BUX2dBk_xTCX-E_jXnDiIK4jxY97iOxEmwC1oRRz7Z4,955
|
|
2
2
|
thds/core/ansi_esc.py,sha256=QZ3CptZbX4N_hyP2IgqfTbNt9tBPaqy7ReTMQIzGbrc,870
|
|
3
3
|
thds/core/cache.py,sha256=nL0oAyZrhPqyBBLevnOWSWVoEBrftaG3aE6Qq6tvmAA,7153
|
|
4
4
|
thds/core/calgitver.py,sha256=6ioH5MGE65l_Dp924oD5CWrLyxKgmhtn46YwGxFpHfM,2497
|
|
5
5
|
thds/core/cm.py,sha256=WZB8eQU0DaBYj9s97nc3PuCtai9guovfyiQH68zhLzY,1086
|
|
6
6
|
thds/core/concurrency.py,sha256=NQunF_tJ_z8cfVyhzkTPlb-nZrgu-vIk9_3XffgscKQ,3520
|
|
7
7
|
thds/core/config.py,sha256=VWymw6pqPRvX7wwsJ0Y-D2gLoCclAHhARmTnuUw7kb0,10014
|
|
8
|
-
thds/core/cpus.py,sha256=
|
|
8
|
+
thds/core/cpus.py,sha256=wcxNvcJaXfm7P-peLusbF5EhD0fGGt9tG_EeL5rbKU8,3836
|
|
9
9
|
thds/core/decos.py,sha256=VpFTKTArXepICxN4U8C8J6Z5KDq-yVjFZQzqs2jeVAk,1341
|
|
10
10
|
thds/core/dict_utils.py,sha256=MatsjZC9lchfdaDqNAzL2mkTZytDnCAqg56sMm71wbE,6364
|
|
11
|
+
thds/core/dump_stacks.py,sha256=srE4VlgU1kwcMFf12skenSdinx7Lu174MjV-gLNh6n4,1033
|
|
11
12
|
thds/core/env.py,sha256=HkuyFmGpCgdQUB1r2GbpCqB3cs1lCsvp47Ghk1DHBo8,1083
|
|
12
13
|
thds/core/exit_after.py,sha256=0lz63nz2NTiIdyBDYyRa9bQShxQKe7eISy8VhXeW4HU,3485
|
|
13
14
|
thds/core/files.py,sha256=NJlPXj7BejKd_Pa06MOywVv_YapT4bVedfsJHrWX8nI,4579
|
|
14
15
|
thds/core/fretry.py,sha256=PKgOxCMjcF4zsFfXFvPXpomv5J6KU6llB1EaKukugig,6942
|
|
15
16
|
thds/core/generators.py,sha256=rcdFpPj0NMJWSaSZTnBfTeZxTTORNB633Lng-BW1284,1939
|
|
16
17
|
thds/core/git.py,sha256=cfdN1oXyfz7k7T2XaseTqL6Ng53B9lfKtzDLmFjojRs,2947
|
|
17
|
-
thds/core/hash_cache.py,sha256=
|
|
18
|
-
thds/core/hashing.py,sha256=
|
|
18
|
+
thds/core/hash_cache.py,sha256=jSFijG33UUQjVSkbuACdg4KzIBaf28i7hSQXCO49Qh0,4066
|
|
19
|
+
thds/core/hashing.py,sha256=8UmbivijnFTzLu42zYsJSksogjrSEYrhzlKUjQBt8CM,4254
|
|
19
20
|
thds/core/home.py,sha256=tTClL_AarIKeri1aNCpuIC6evD7qr83ESGD173B81hU,470
|
|
20
21
|
thds/core/hostname.py,sha256=canFGr-JaaG7nUfsQlyL0JT-2tnZoT1BvXzyaOMK1vA,208
|
|
21
22
|
thds/core/imports.py,sha256=0LVegY8I8_XKZPcqiIp2OVVzEDtyqYA3JETf9OAKNKs,568
|
|
22
23
|
thds/core/inspect.py,sha256=3IY9CSa7zAcAVyBDOYfMtJ2QU5cRc98JaN91XAbaSok,2368
|
|
23
24
|
thds/core/iterators.py,sha256=d3iTQDR0gCW1nMRmknQeodR_4THzR9Ajmp8F8KCCFgg,208
|
|
24
25
|
thds/core/lazy.py,sha256=e1WvG4LsbEydV0igEr_Vl1cq05zlQNIE8MFYT90yglE,3289
|
|
25
|
-
thds/core/link.py,sha256=
|
|
26
|
+
thds/core/link.py,sha256=4-9d22l_oSkKoSzlYEO-rwxO1hvvj6VETY7LwvGcX6M,5534
|
|
26
27
|
thds/core/logical_root.py,sha256=gWkIYRv9kNQfzbpxJaYiwNXVz1neZ2NvnvProtOn9d8,1399
|
|
27
28
|
thds/core/merge_args.py,sha256=7oj7dtO1-XVkfTM3aBlq3QlZbo8tb6X7E3EVIR-60t8,5781
|
|
28
|
-
thds/core/meta.py,sha256=
|
|
29
|
+
thds/core/meta.py,sha256=3oX7wTO_SmrVKABFPLHHIVyNBXEil1MdGfc5s88_Isk,12134
|
|
29
30
|
thds/core/parallel.py,sha256=HXAn9aIYqNE5rnRN5ypxR6CUucdfzE5T5rJ_MUv-pFk,7590
|
|
30
31
|
thds/core/pickle_visit.py,sha256=QNMWIi5buvk2zsvx1-D-FKL7tkrFUFDs387vxgGebgU,833
|
|
31
32
|
thds/core/prof.py,sha256=5ViolfPsAPwUTHuhAe-bon7IArPGXydpGoB5uZmObDk,8264
|
|
@@ -38,7 +39,7 @@ thds/core/source_serde.py,sha256=X4c7LiT3VidejqtTel9YB6dWGB3x-ct39KF9E50Nbx4,139
|
|
|
38
39
|
thds/core/stack_context.py,sha256=17lPOuYWclUpZ-VXRkPgI4WbiMzq7_ZY6Kj1QK_1oNo,1332
|
|
39
40
|
thds/core/thunks.py,sha256=p1OvMBJ4VGMsD8BVA7zwPeAp0L3y_nxVozBF2E78t3M,1053
|
|
40
41
|
thds/core/timer.py,sha256=aOpNP-wHKaKs6ONK5fOtIOgx00FChVZquG4PeaEYH_k,5376
|
|
41
|
-
thds/core/tmp.py,sha256=
|
|
42
|
+
thds/core/tmp.py,sha256=jA8FwDbXo3hx8o4kRjAlkwpcI77X86GY4Sktkps29ho,3166
|
|
42
43
|
thds/core/types.py,sha256=sFqI_8BsB1u85PSizjBZw8PBtplC7U54E19wZZWCEvI,152
|
|
43
44
|
thds/core/log/__init__.py,sha256=bDbZvlxyymY6VrQzD8lCn0egniLEiA9hpNMAXZ7e7wY,1348
|
|
44
45
|
thds/core/log/basic_config.py,sha256=2Y9U_c4PTrIsCmaN7Ps6Xr90AhJPzdYjeUzUMqO7oFU,6704
|
|
@@ -46,12 +47,12 @@ thds/core/log/json_formatter.py,sha256=C5bRsSbAqaQqfTm88jc3mYe3vwKZZLAxET8s7_u7a
|
|
|
46
47
|
thds/core/log/kw_formatter.py,sha256=9-MVOd2r5NEkYNne9qWyFMeR5lac3w7mjHXsDa681i0,3379
|
|
47
48
|
thds/core/log/kw_logger.py,sha256=CyZVPnkUMtrUL2Lyk261AIEPmoP-buf_suFAhQlU1io,4063
|
|
48
49
|
thds/core/log/logfmt.py,sha256=i66zoG2oERnE1P_0TVXdlfJ1YgUmvtMjqRtdV5u2SvU,10366
|
|
49
|
-
thds/core/source/__init__.py,sha256=
|
|
50
|
-
thds/core/source/_construct.py,sha256=
|
|
50
|
+
thds/core/source/__init__.py,sha256=e-cRoLl1HKY3YrDjpV5p_i7zvr1L4q51-t1ISTxdig4,543
|
|
51
|
+
thds/core/source/_construct.py,sha256=lt6OUOz_s9VBZMZJHXVpfIjmuTH7w1PBhpre0dlW1Zw,3914
|
|
51
52
|
thds/core/source/_construct_tree.py,sha256=5Zk3a5a0uVxklWw6q4JOvI_bErqwlBngUz4TyEAWn1g,616
|
|
52
|
-
thds/core/source/_download.py,sha256=
|
|
53
|
-
thds/core/source/serde.py,sha256=
|
|
54
|
-
thds/core/source/src.py,sha256=
|
|
53
|
+
thds/core/source/_download.py,sha256=faKWxgzw1fTqOoyjtgi4IyhiZpBYTm_GZxwqC6LTiXU,2764
|
|
54
|
+
thds/core/source/serde.py,sha256=zEAR24AewgDqqkIxcPpS7NZ-ZbEnQPK_A7siWlE3Q0E,3091
|
|
55
|
+
thds/core/source/src.py,sha256=nTUBgsAES0J73enIEbc5BitgnxA5kBnf88oYZoMQGnM,4596
|
|
55
56
|
thds/core/source/tree.py,sha256=iNCoCE655MwXQwc2Y0IIm1HMVk5Inj0NGVU9U8Wl_90,4317
|
|
56
57
|
thds/core/sqlite/__init__.py,sha256=tDMzuO76qTtckJHldPQ6nPZ6kcvhhoJrVuuW42JtaSQ,606
|
|
57
58
|
thds/core/sqlite/connect.py,sha256=l4QaSAI8RjP7Qh2FjmJ3EwRgfGf65Z3-LjtC9ocHM_U,977
|
|
@@ -68,8 +69,8 @@ thds/core/sqlite/structured.py,sha256=SvZ67KcVcVdmpR52JSd52vMTW2ALUXmlHEeD-VrzWV
|
|
|
68
69
|
thds/core/sqlite/types.py,sha256=oUkfoKRYNGDPZRk29s09rc9ha3SCk2SKr_K6WKebBFs,1308
|
|
69
70
|
thds/core/sqlite/upsert.py,sha256=BmKK6fsGVedt43iY-Lp7dnAu8aJ1e9CYlPVEQR2pMj4,5827
|
|
70
71
|
thds/core/sqlite/write.py,sha256=z0219vDkQDCnsV0WLvsj94keItr7H4j7Y_evbcoBrWU,3458
|
|
71
|
-
thds_core-1.
|
|
72
|
-
thds_core-1.
|
|
73
|
-
thds_core-1.
|
|
74
|
-
thds_core-1.
|
|
75
|
-
thds_core-1.
|
|
72
|
+
thds_core-1.41.20250702194312.dist-info/METADATA,sha256=8IWkjAfNxxRdJ0UlQirpotjr7BxPU_MY7ArfrpJ3Jt8,2216
|
|
73
|
+
thds_core-1.41.20250702194312.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
74
|
+
thds_core-1.41.20250702194312.dist-info/entry_points.txt,sha256=bOCOVhKZv7azF3FvaWX6uxE6yrjK6FcjqhtxXvLiFY8,161
|
|
75
|
+
thds_core-1.41.20250702194312.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
76
|
+
thds_core-1.41.20250702194312.dist-info/RECORD,,
|
|
File without changes
|
{thds_core-1.40.20250701001211.dist-info → thds_core-1.41.20250702194312.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_core-1.40.20250701001211.dist-info → thds_core-1.41.20250702194312.dist-info}/top_level.txt
RENAMED
|
File without changes
|