thds.core 1.31.20250213162956__py3-none-any.whl → 1.32.20250218201534__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.core might be problematic. Click here for more details.

thds/core/cm.py ADDED
@@ -0,0 +1,29 @@
1
+ """A keep-alive wrapper for context managers. Let's say you've got a thread pool executor
2
+ that you've created, and you want to be able to pass it to multiple users that expect to
3
+ 'enter' the thread pool themselves, using a `with` statement. But you don't want the
4
+ threads to be destroyed after the first use; you want to open the context yourself, but
5
+ still pass the expected context manager to the users. This is a way to do that.
6
+ """
7
+
8
+ import contextlib
9
+ import typing as ty
10
+
11
+ T = ty.TypeVar("T")
12
+
13
+
14
+ class _AlreadyEnteredContext(ty.ContextManager[T]):
15
+ def __init__(self, entered_context: T):
16
+ self.entered_context = entered_context
17
+
18
+ def __enter__(self) -> T:
19
+ # No-op enter; just return the underlying thing
20
+ return self.entered_context
21
+
22
+ def __exit__(self, exc_type, exc_value, traceback): # type: ignore
23
+ pass # No-op exit
24
+
25
+
26
+ @contextlib.contextmanager
27
+ def keep_context(context_manager: ty.ContextManager[T]) -> ty.Iterator[ty.ContextManager[T]]:
28
+ with context_manager as entered_context:
29
+ yield _AlreadyEnteredContext(entered_context)
thds/core/files.py CHANGED
@@ -19,7 +19,7 @@ FILE_SCHEME = "file://"
19
19
  logger = getLogger(__name__)
20
20
 
21
21
 
22
- def set_read_only(fpath: StrOrPath):
22
+ def set_read_only(fpath: StrOrPath) -> None:
23
23
  # thank you https://stackoverflow.com/a/51262451
24
24
  logger.debug("Setting '%s' to read-only", fpath)
25
25
  perms = stat.S_IMODE(os.lstat(fpath).st_mode)
@@ -87,7 +87,7 @@ def set_file_limit(n: int):
87
87
  assert resource.getrlimit(resource.RLIMIT_NOFILE) == (n, n)
88
88
 
89
89
 
90
- def bump_limits():
90
+ def bump_limits() -> None:
91
91
  """It was common to have to do this manually on our macs. Now that is no longer required."""
92
92
  set_file_limit(OPEN_FILES_LIMIT())
93
93
 
@@ -0,0 +1,36 @@
1
+ import typing as ty
2
+
3
+
4
+ def find_common_prefix(uris: ty.Iterable[str]) -> ty.List[str]:
5
+ uri_parts_list = [uri.split("/") for uri in uris]
6
+ if not uri_parts_list:
7
+ return list()
8
+
9
+ reference = uri_parts_list[0]
10
+
11
+ for i, part in enumerate(reference):
12
+ for uri_parts in uri_parts_list[1:]:
13
+ if i >= len(uri_parts) or uri_parts[i] != part:
14
+ if i == 0:
15
+ raise ValueError(f"Paths have no common prefix: {uris}")
16
+ return reference[:i]
17
+ return reference # the whole thing must be the common prefix
18
+
19
+
20
+ def find(paths: ty.Iterable[str], higher_logical_root: str = "") -> str:
21
+ common = find_common_prefix(paths)
22
+ if not higher_logical_root:
23
+ return "/".join(common) # lowest common root
24
+
25
+ # Split higher_logical_root into components
26
+ root_parts = higher_logical_root.split("/")
27
+
28
+ # Look for the sequence of parts in common
29
+ for i in range(len(common) - len(root_parts) + 1):
30
+ if common[i : i + len(root_parts)] == root_parts:
31
+ return "/".join(common[: i + 1])
32
+ # the logical root is the top level directory that corresponds to the
33
+ # higher_logical_root string, which may have multiple components that needed to
34
+ # match. So we take the common parts plus 1 to get that top level root.
35
+
36
+ raise ValueError(f"Higher root '{higher_logical_root}' not found")
thds/core/meta.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
- "git_commit": "30d8cd75a3d8e8b981101544c57544a36051476c",
2
+ "git_commit": "6aa6700b782d76b65b65256eaa49bd70e8217015",
3
3
  "git_branch": "main",
4
4
  "git_is_clean": true,
5
- "pyproject_version": "1.31.20250213162956",
5
+ "pyproject_version": "1.32.20250218201534",
6
6
  "thds_user": "runner",
7
7
  "misc": {}
8
8
  }
@@ -0,0 +1,9 @@
1
+ """Wrap openable, read-only data that is either locally-present or downloadable,
2
+
3
+ yet will not be downloaded (if non-local) until it is actually opened or unwrapped.
4
+ """
5
+
6
+ from . import serde, tree # noqa: F401
7
+ from ._construct import from_file, from_uri, path_from_uri, register_from_uri_handler # noqa: F401
8
+ from ._download import Downloader, register_download_handler # noqa: F401
9
+ from .src import Source # noqa: F401
@@ -0,0 +1,79 @@
1
+ import typing as ty
2
+ from functools import partial
3
+ from pathlib import Path
4
+
5
+ from ..files import is_file_uri, path_from_uri, to_uri
6
+ from ..hashing import Hash
7
+ from ..types import StrOrPath
8
+ from . import _download
9
+ from .src import Source
10
+
11
+ # Creation from local Files or from remote URIs
12
+
13
+
14
+ def from_file(filename: StrOrPath, hash: ty.Optional[Hash] = None, uri: str = "") -> Source:
15
+ """Create a read-only Source from a local file that already exists.
16
+
17
+ If URI is passed, the local file will be read and hashed, but the final URI in the
18
+ Source will be the one provided explicitly. NO UPLOAD IS PERFORMED. It is your
19
+ responsibility to ensure that your file has been uploaded to the URI you provide.
20
+ """
21
+ path = path_from_uri(filename) if isinstance(filename, str) else filename
22
+ assert isinstance(path, Path)
23
+ if not path.exists():
24
+ raise FileNotFoundError(path)
25
+
26
+ if uri:
27
+ src = from_uri(uri, _download._check_hash(hash, path))
28
+ else:
29
+ src = Source(to_uri(path), _download._check_hash(hash, path))
30
+ src._set_cached_path(path) # internally, it's okay to hack around immutability.
31
+ return src
32
+
33
+
34
+ class FromUri(ty.Protocol):
35
+ def __call__(self, hash: ty.Optional[Hash]) -> Source:
36
+ """Closure over a URI that creates a Source from a URI.
37
+
38
+ The Hash may be used to short-circuit creation that would result in creating
39
+ a Source that cannot match the expected Hash, but this is not required,
40
+ and the hash will be included in the Source object regardless, and will
41
+ be validated (if non-nil) at the time of source data access.
42
+ """
43
+
44
+
45
+ class FromUriHandler(ty.Protocol):
46
+ def __call__(self, uri: str) -> ty.Optional[FromUri]:
47
+ """Returns a FromUri object containing the URI if this URI can be handled. Returns
48
+ None if this URI cannot be handled.
49
+ """
50
+
51
+
52
+ def register_from_uri_handler(key: str, handler: FromUriHandler):
53
+ """If a library wants to customize how Sources are created from URIs that it handles,
54
+ it can register a handler here.
55
+ """
56
+ # key is not currently used for anything other than avoiding
57
+ # having duplicates registered for whatever reason.
58
+ _FROM_URI_HANDLERS[key] = handler
59
+
60
+
61
+ _FROM_URI_HANDLERS: ty.Dict[str, FromUriHandler] = dict()
62
+ register_from_uri_handler(
63
+ "local_file", lambda uri: partial(from_file, path_from_uri(uri)) if is_file_uri(uri) else None
64
+ )
65
+
66
+
67
+ def from_uri(uri: str, hash: ty.Optional[Hash] = None) -> Source:
68
+ """Create a read-only Source from a URI. The data should already exist at this remote
69
+ URI, although Source itself can make no guarantee that it always represents real data
70
+ - only that any data it does represent is read-only.
71
+
72
+ It may be advantageous for a URI-handling library to register a more specific
73
+ implementation of this function, if it is capable of determining a Hash for the blob
74
+ represented by the URI without downloading the blob.
75
+ """
76
+ for handler in _FROM_URI_HANDLERS.values():
77
+ if from_uri_ := handler(uri):
78
+ return from_uri_(hash)
79
+ return Source(uri=uri, hash=hash)
@@ -0,0 +1,84 @@
1
+ """Wrap openable, read-only data that is either locally-present or downloadable,
2
+
3
+ yet will not be downloaded (if non-local) until it is actually opened or unwrapped.
4
+ """
5
+
6
+ import typing as ty
7
+ from pathlib import Path
8
+
9
+ from .. import log
10
+ from ..files import is_file_uri, path_from_uri
11
+ from ..hash_cache import filehash
12
+ from ..hashing import Hash
13
+
14
+
15
+ class Downloader(ty.Protocol):
16
+ def __call__(self, hash: ty.Optional[Hash]) -> Path:
17
+ """Closure over a URI that downloads a file to a local path and returns the path.
18
+ The file may be placed anywhere as long as the file will be readable until the
19
+ program exits.
20
+
21
+ If the URI points to a missing file, this MUST raise any Exception that the
22
+ underlying implementation desires. It MUST NOT return a Path pointing to a
23
+ non-existent file.
24
+
25
+ The Hash may be used to short-circuit a download that would result in downloading
26
+ a file that does not match the expected hash, but the Downloader need not verify
27
+ the Hash of the file downloaded after the fact, as that will be performed by
28
+ default by the Source object.
29
+ """
30
+
31
+
32
+ class DownloadHandler(ty.Protocol):
33
+ def __call__(self, uri: str) -> ty.Optional[Downloader]:
34
+ """Returns a Downloader containing the URI if this URI can be handled. Returns
35
+ None if this URI cannot be handled.
36
+ """
37
+
38
+
39
+ def _LocalFileHandler(uri: str) -> ty.Optional[Downloader]:
40
+ if not is_file_uri(uri):
41
+ return None
42
+
43
+ def download_file(hash: ty.Optional[Hash]) -> Path:
44
+ lpath = path_from_uri(uri)
45
+ if not lpath.exists():
46
+ raise FileNotFoundError(lpath)
47
+ if hash:
48
+ _check_hash(hash, lpath)
49
+ return lpath
50
+
51
+ return download_file
52
+
53
+
54
+ def register_download_handler(key: str, handler: DownloadHandler):
55
+ # key is not currently used for anything other than avoiding
56
+ # having duplicates registered for whatever reason.
57
+ _DOWNLOAD_HANDLERS[key] = handler
58
+
59
+
60
+ _DOWNLOAD_HANDLERS: ty.Dict[str, DownloadHandler] = dict()
61
+ register_download_handler("local_file", _LocalFileHandler)
62
+
63
+
64
+ def _get_download_handler(uri: str) -> Downloader:
65
+ for handler in _DOWNLOAD_HANDLERS.values():
66
+ if downloader := handler(uri):
67
+ return downloader
68
+ raise ValueError(f"No SourcePath download handler for uri: {uri}")
69
+
70
+
71
+ class SourceHashMismatchError(ValueError):
72
+ pass
73
+
74
+
75
+ def _check_hash(expected_hash: ty.Optional[Hash], path: Path) -> Hash:
76
+ hash_algo = expected_hash.algo if expected_hash else "sha256"
77
+ with log.logger_context(hash_for=f"source-{hash_algo}"):
78
+ computed_hash = filehash(hash_algo, path)
79
+ if expected_hash and expected_hash != computed_hash:
80
+ raise SourceHashMismatchError(
81
+ f"{expected_hash.algo} mismatch for {path};"
82
+ f" got {computed_hash.bytes!r}, expected {expected_hash.bytes!r}"
83
+ )
84
+ return computed_hash
@@ -0,0 +1,105 @@
1
+ # this should later get promoted somewhere, probably
2
+ import json
3
+ import typing as ty
4
+ from functools import partial
5
+ from pathlib import Path
6
+
7
+ from thds.core import files, hashing, log, types
8
+
9
+ from . import _construct
10
+ from .src import Source
11
+
12
+ _SHA256_B64 = "sha256b64"
13
+ _MD5_B64 = "md5b64"
14
+
15
+ logger = log.getLogger(__name__)
16
+
17
+
18
+ def _from_sha256b64(d: dict) -> ty.Optional[hashing.Hash]:
19
+ if "sha256b64" in d:
20
+ return hashing.Hash(algo="sha256", bytes=hashing.db64(d[_SHA256_B64]))
21
+ return None
22
+
23
+
24
+ def _from_md5b64(d: dict) -> ty.Optional[hashing.Hash]:
25
+ if "md5b64" in d:
26
+ return hashing.Hash(algo="md5", bytes=hashing.db64(d[_MD5_B64]))
27
+ return None
28
+
29
+
30
+ HashParser = ty.Callable[[dict], ty.Optional[hashing.Hash]]
31
+ _BASE_PARSERS = (_from_sha256b64, _from_md5b64)
32
+
33
+
34
+ def base_parsers() -> ty.Tuple[HashParser, ...]:
35
+ return _BASE_PARSERS
36
+
37
+
38
+ def from_json(json_source: str, hash_parsers: ty.Collection[HashParser] = base_parsers()) -> Source:
39
+ d = json.loads(json_source)
40
+ return _construct.from_uri(
41
+ uri=d["uri"],
42
+ hash=next(filter(None, (p(d) for p in hash_parsers)), None),
43
+ )
44
+
45
+
46
+ def _generic_hash_serializer(
47
+ algo: str, stringify_hash: ty.Callable[[bytes], str], keyname: str, hash: hashing.Hash
48
+ ) -> ty.Optional[dict]:
49
+ if hash.algo == algo:
50
+ return {keyname: stringify_hash(hash.bytes)}
51
+ return None
52
+
53
+
54
+ _to_sha256b64 = partial(_generic_hash_serializer, "sha256", hashing.b64, _SHA256_B64)
55
+ _to_md5b64 = partial(_generic_hash_serializer, "md5", hashing.b64, _MD5_B64)
56
+
57
+ HashSerializer = ty.Callable[[hashing.Hash], ty.Optional[dict]]
58
+ _BASE_HASH_SERIALIZERS: ty.Tuple[HashSerializer, ...] = (_to_md5b64, _to_sha256b64) # type: ignore
59
+
60
+
61
+ def base_hash_serializers() -> ty.Tuple[HashSerializer, ...]:
62
+ return _BASE_HASH_SERIALIZERS
63
+
64
+
65
+ def to_json(
66
+ source: Source, hash_serializers: ty.Collection[HashSerializer] = base_hash_serializers()
67
+ ) -> str:
68
+ hash_dict = (
69
+ next(filter(None, (ser(source.hash) for ser in hash_serializers if source.hash)), None)
70
+ ) or dict()
71
+ return json.dumps(dict(uri=source.uri, **hash_dict))
72
+
73
+
74
+ def from_unknown_user_path(path: types.StrOrPath, desired_uri: str) -> Source:
75
+ """Sometimes you may want to load a Source directly from a Path provided by a user.
76
+
77
+ It _might_ represent something loadable as a from_json Source, but it might just be a
78
+ raw file that needs to be loaded with from_file!
79
+
80
+ This is a _reasonable_ (but not guaranteed!) way of trying to ascertain which one it
81
+ is, and specifying where it should live 'remotely' if such a thing becomes
82
+ necessary.
83
+
84
+ Your application might need to implement something more robust if the
85
+ actual underlying data is likely to be a JSON blob containing the key `uri`, for
86
+ instance.
87
+ """
88
+ with open(path) as readable:
89
+ try:
90
+ return from_json(readable.read(4096))
91
+ except (json.JSONDecodeError, UnicodeDecodeError):
92
+ return _construct.from_file(path, uri=desired_uri)
93
+
94
+
95
+ def write_to_json_file(source: Source, local_file: Path) -> bool:
96
+ """Write the canonical JSON serialization of the Source to a file."""
97
+ local_file.parent.mkdir(parents=True, exist_ok=True)
98
+ previous_source = local_file.read_text() if local_file.exists() else None
99
+ new_source = to_json(source) + "\n"
100
+ if new_source != previous_source:
101
+ with files.atomic_text_writer(local_file) as f:
102
+ logger.info(f"Writing {source} to {local_file}")
103
+ f.write(new_source)
104
+ return True
105
+ return False
@@ -0,0 +1,86 @@
1
+ import os
2
+ import typing as ty
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ from .. import hashing
7
+ from . import _download
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class Source(os.PathLike):
12
+ """Source is meant to be a consistent in-memory representation for an abstract,
13
+ **read-only** source of data that may not be present locally when an application
14
+ starts.
15
+
16
+ A Source uses `os.PathLike` (`__fspath__`) to support transparent `open(src)` calls,
17
+ so in many cases it will be a drop-in replacement for Path or str filenames. If you
18
+ need an actual Path object, you can call `path()` to get one, but you should prefer to
19
+ defer this until the actual location of use.
20
+
21
+ By 'wrapping' read-only data in these objects, we can unify the code around how we
22
+ unwrap and use them, which should allow us to more easily support different execution
23
+ environments and sources of data.
24
+
25
+ For instance, a Source could be a file on disk, but it could also be a file in
26
+ ADLS.
27
+
28
+ Furthermore, libraries which build on top of this one may use this representation to
29
+ identify opportunities for optimization, by representing the Source in a stable
30
+ and consistent format that allows different underlying data sources to fulfill the
31
+ request for the data based on environmental context. A library could choose to
32
+ transparently transform a local-path-based Source into a Source representing a
33
+ remote file, without changing the semantics of the object as observed by the code.
34
+
35
+ One reason a Hash is part of the interface is so that libraries interacting with the
36
+ object can use the hash as a canonical 'name' for the data, if one is available.
37
+
38
+ Another reason is that we can add a layer of consistency checking to data we're
39
+ working with, at the cost of a few compute cycles. Since Sources are meant to represent
40
+ read-only data, the Hash is a meaningful and persistent marker of data identity.
41
+
42
+ Do not call its constructor in application code. Use `from_file` or `from_uri` instead.
43
+ """
44
+
45
+ uri: str
46
+ hash: ty.Optional[hashing.Hash] = None
47
+ # hash and equality are based only on the _identity_ of the object,
48
+ # not on the other properties that provide some caching functionality.
49
+
50
+ @property
51
+ def cached_path(self) -> ty.Optional[Path]:
52
+ """This is part of the public interface as far as checking to see whether a file
53
+ is already present locally, but its existence and value is not part of equality or
54
+ the hash for this class - it exists purely as an optimization.
55
+ """
56
+ return getattr(self, "__cached_path", None)
57
+
58
+ def _set_cached_path(self, lpath: ty.Optional[Path]):
59
+ """protected interface for setting a cached Path since the attribute is not
60
+ available via the constructor.
61
+ """
62
+ super().__setattr__("__cached_path", lpath) # this works around dataclass.frozen.
63
+ # https://noklam.github.io/blog/posts/2022-04-22-python-dataclass-partiala-immutable.html
64
+
65
+ def path(self) -> Path:
66
+ """Any Source can be turned into a local file path.
67
+
68
+ Remember that the resulting data is meant to be read-only. If you want to mutate
69
+ the data, you should first make a copy.
70
+
71
+ If not already present locally, this will incur a one-time download. Then, if the
72
+ Source has a Hash, the Hash will be validated against the downloaded file, and a
73
+ failure will raise SourceHashMismatchError.
74
+ """
75
+ if self.cached_path is None or not self.cached_path.exists():
76
+ lpath = _download._get_download_handler(self.uri)(self.hash)
77
+ # path() used to be responsible for checking the hash, but since we pass it to the downloader,
78
+ # it really makes more sense to allow the downloader to decide how to verify its own download,
79
+ # and we don't want to duplicate any effort that it may have already put in.
80
+ self._set_cached_path(lpath)
81
+
82
+ assert self.cached_path and self.cached_path.exists()
83
+ return self.cached_path
84
+
85
+ def __fspath__(self) -> str:
86
+ return os.fspath(self.path())
@@ -0,0 +1,106 @@
1
+ import concurrent.futures
2
+ import os
3
+ import shutil
4
+ import typing as ty
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from .. import cm, link, logical_root, parallel, thunks, types
9
+ from .src import Source
10
+
11
+ _MAX_PARALLELISM = 90
12
+
13
+
14
+ def _logical_tree_replication_operations(
15
+ local_paths: ty.Iterable[Path], logical_local_root: Path, dest_dir: Path
16
+ ) -> ty.Tuple[Path, ty.List[ty.Tuple[Path, Path]]]:
17
+ """
18
+ Pure function that determines required copy operations.
19
+ Returns (logical_dest, list of (src, dest) pairs)
20
+ """
21
+ logical_dest = dest_dir / logical_local_root.name
22
+ operations = [(src, logical_dest / src.relative_to(logical_local_root)) for src in local_paths]
23
+ return logical_dest, operations
24
+
25
+
26
+ def replicate_logical_tree(
27
+ local_paths: ty.Iterable[Path],
28
+ logical_local_root: Path,
29
+ dest_dir: Path,
30
+ copy: ty.Callable[[Path, Path], ty.Any] = link.cheap_copy,
31
+ executor_cm: ty.Optional[ty.ContextManager[concurrent.futures.Executor]] = None,
32
+ ) -> Path:
33
+ """
34
+ Replicate only the specified files from logical_root into dest_dir.
35
+ Returns the path to the logical root in the new location.
36
+ """
37
+ logical_dest, operations = _logical_tree_replication_operations(
38
+ local_paths, logical_local_root, dest_dir
39
+ )
40
+
41
+ top_level_of_logical_dest_dir = dest_dir / logical_local_root.name
42
+ shutil.rmtree(top_level_of_logical_dest_dir, ignore_errors=True)
43
+
44
+ def copy_to(src: Path, dest: Path) -> None:
45
+ dest.parent.mkdir(parents=True, exist_ok=True)
46
+ copy(src, dest)
47
+
48
+ for _ in parallel.failfast(
49
+ parallel.yield_all(
50
+ ((src, thunks.thunking(copy_to)(src, dest)) for src, dest in operations),
51
+ executor_cm=executor_cm,
52
+ )
53
+ ):
54
+ pass
55
+ return top_level_of_logical_dest_dir
56
+
57
+
58
+ @dataclass
59
+ class SourceTree(os.PathLike):
60
+ """Represent a fixed set of sources (with hashes where available) as a list of
61
+ sources, plus the (optional) logical root of the tree, so that they can be 'unwrapped'
62
+ as a local directory structure.
63
+ """
64
+
65
+ sources: ty.List[Source]
66
+ higher_logical_root: str = ""
67
+ # there may be cases where, rather than identifying the 'lowest common prefix' of a
68
+ # set of sources/URIs, we may wish to represent a 'higher' root for the sake of some
69
+ # consuming system. in those cases, this can be specified and we'll find the lowest
70
+ # common prefix _above_ that.
71
+
72
+ def path(self, dest_dir: ty.Optional[types.StrOrPath] = None) -> Path:
73
+ """Return a local path to a directory that corresponds to the logical root.
74
+
75
+ This incurs a download of _all_ sources explicitly represented by the list.
76
+
77
+ If you want to _ensure_ that _only_ the listed sources are present in the
78
+ directory, despite any other files which may be present in an
79
+ implementation-specific cache, you must pass a Path to a directory that you are
80
+ willing to have emptied, and this method will copy the files into it.
81
+ """
82
+ with cm.keep_context(
83
+ concurrent.futures.ThreadPoolExecutor(max_workers=_MAX_PARALLELISM)
84
+ ) as thread_pool:
85
+ local_paths = [
86
+ local_path
87
+ for _, local_path in parallel.failfast(
88
+ parallel.yield_all(
89
+ # src.path() is a thunk that downloads the data if not already present locally.
90
+ # Source allows registration of download handlers by URI scheme.
91
+ ((src, src.path) for src in self.sources),
92
+ executor_cm=thread_pool,
93
+ )
94
+ )
95
+ ]
96
+ local_logical_root = Path(logical_root.find(map(str, local_paths), self.higher_logical_root))
97
+ assert local_logical_root.is_dir()
98
+ if not dest_dir:
99
+ return local_logical_root
100
+
101
+ return replicate_logical_tree(
102
+ local_paths, local_logical_root, Path(dest_dir).resolve(), executor_cm=thread_pool
103
+ )
104
+
105
+ def __fspath__(self) -> str: # implement the os.PathLike protocol
106
+ return str(self.path())
thds/core/source_serde.py CHANGED
@@ -1,104 +1,2 @@
1
- # this should later get promoted somewhere, probably
2
- import json
3
- import typing as ty
4
- from functools import partial
5
- from pathlib import Path
6
-
7
- from thds.core import files, hashing, log, source, types
8
-
9
- _SHA256_B64 = "sha256b64"
10
- _MD5_B64 = "md5b64"
11
-
12
- logger = log.getLogger(__name__)
13
-
14
-
15
- def _from_sha256b64(d: dict) -> ty.Optional[hashing.Hash]:
16
- if "sha256b64" in d:
17
- return hashing.Hash(algo="sha256", bytes=hashing.db64(d[_SHA256_B64]))
18
- return None
19
-
20
-
21
- def _from_md5b64(d: dict) -> ty.Optional[hashing.Hash]:
22
- if "md5b64" in d:
23
- return hashing.Hash(algo="md5", bytes=hashing.db64(d[_MD5_B64]))
24
- return None
25
-
26
-
27
- HashParser = ty.Callable[[dict], ty.Optional[hashing.Hash]]
28
- _BASE_PARSERS = (_from_sha256b64, _from_md5b64)
29
-
30
-
31
- def base_parsers() -> ty.Tuple[HashParser, ...]:
32
- return _BASE_PARSERS
33
-
34
-
35
- def from_json(
36
- json_source: str, hash_parsers: ty.Collection[HashParser] = base_parsers()
37
- ) -> source.Source:
38
- d = json.loads(json_source)
39
- return source.from_uri(
40
- uri=d["uri"],
41
- hash=next(filter(None, (p(d) for p in hash_parsers)), None),
42
- )
43
-
44
-
45
- def _generic_hash_serializer(
46
- algo: str, stringify_hash: ty.Callable[[bytes], str], keyname: str, hash: hashing.Hash
47
- ) -> ty.Optional[dict]:
48
- if hash.algo == algo:
49
- return {keyname: stringify_hash(hash.bytes)}
50
- return None
51
-
52
-
53
- _to_sha256b64 = partial(_generic_hash_serializer, "sha256", hashing.b64, _SHA256_B64)
54
- _to_md5b64 = partial(_generic_hash_serializer, "md5", hashing.b64, _MD5_B64)
55
-
56
- HashSerializer = ty.Callable[[hashing.Hash], ty.Optional[dict]]
57
- _BASE_HASH_SERIALIZERS: ty.Tuple[HashSerializer, ...] = (_to_md5b64, _to_sha256b64) # type: ignore
58
-
59
-
60
- def base_hash_serializers() -> ty.Tuple[HashSerializer, ...]:
61
- return _BASE_HASH_SERIALIZERS
62
-
63
-
64
- def to_json(
65
- source: source.Source, hash_serializers: ty.Collection[HashSerializer] = base_hash_serializers()
66
- ) -> str:
67
- hash_dict = (
68
- next(filter(None, (ser(source.hash) for ser in hash_serializers if source.hash)), None)
69
- ) or dict()
70
- return json.dumps(dict(uri=source.uri, **hash_dict))
71
-
72
-
73
- def from_unknown_user_path(path: types.StrOrPath, desired_uri: str) -> source.Source:
74
- """Sometimes you may want to load a Source directly from a Path provided by a user.
75
-
76
- It _might_ represent something loadable as a from_json Source, but it might just be a
77
- raw file that needs to be loaded with from_file!
78
-
79
- This is a _reasonable_ (but not guaranteed!) way of trying to ascertain which one it
80
- is, and specifying where it should live 'remotely' if such a thing becomes
81
- necessary.
82
-
83
- Your application might need to implement something more robust if the
84
- actual underlying data is likely to be a JSON blob containing the key `uri`, for
85
- instance.
86
- """
87
- with open(path) as readable:
88
- try:
89
- return from_json(readable.read(4096))
90
- except (json.JSONDecodeError, UnicodeDecodeError):
91
- return source.from_file(path, uri=desired_uri)
92
-
93
-
94
- def write_to_json_file(source: source.Source, local_file: Path) -> bool:
95
- """Write the canonical JSON serialization of the Source to a file."""
96
- local_file.parent.mkdir(parents=True, exist_ok=True)
97
- previous_source = local_file.read_text() if local_file.exists() else None
98
- new_source = to_json(source) + "\n"
99
- if new_source != previous_source:
100
- with files.atomic_text_writer(local_file) as f:
101
- logger.info(f"Writing {source} to {local_file}")
102
- f.write(new_source)
103
- return True
104
- return False
1
+ # deprecated alias for source.serde
2
+ from .source.serde import from_json, from_unknown_user_path, to_json, write_to_json_file # noqa: F401
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: thds.core
3
- Version: 1.31.20250213162956
3
+ Version: 1.32.20250218201534
4
4
  Summary: Core utilities.
5
5
  Author: Trilliant Health
6
6
  Description-Content-Type: text/markdown
@@ -2,13 +2,14 @@ thds/core/__init__.py,sha256=imKpmnrBV0_7-1d1Pc2yR5jxbvOrmIplLm7Ig_eK1OU,934
2
2
  thds/core/ansi_esc.py,sha256=QZ3CptZbX4N_hyP2IgqfTbNt9tBPaqy7ReTMQIzGbrc,870
3
3
  thds/core/cache.py,sha256=nL0oAyZrhPqyBBLevnOWSWVoEBrftaG3aE6Qq6tvmAA,7153
4
4
  thds/core/calgitver.py,sha256=HklIz-SczK92Vm2rXtTSDiVxAcxUW_GPVCRRGt4BmBA,2324
5
+ thds/core/cm.py,sha256=WZB8eQU0DaBYj9s97nc3PuCtai9guovfyiQH68zhLzY,1086
5
6
  thds/core/concurrency.py,sha256=NQunF_tJ_z8cfVyhzkTPlb-nZrgu-vIk9_3XffgscKQ,3520
6
7
  thds/core/config.py,sha256=N-WVpPDrfTSFKz0m7WrqZPBdd17dycpDx9nhbATkf3c,9092
7
8
  thds/core/decos.py,sha256=VpFTKTArXepICxN4U8C8J6Z5KDq-yVjFZQzqs2jeVAk,1341
8
9
  thds/core/dict_utils.py,sha256=MAVkGJg4KQN1UGBLEKuPdQucZaXg_jJakujQ-GUrYzw,6471
9
10
  thds/core/env.py,sha256=M36CYkPZ5AUf_-n8EqjsMGwWOzaKEn0KgRwnqUK7jS4,1094
10
11
  thds/core/exit_after.py,sha256=0lz63nz2NTiIdyBDYyRa9bQShxQKe7eISy8VhXeW4HU,3485
11
- thds/core/files.py,sha256=8amNymTRuW17o6Qay73juxwkcW1DysWXeAByeofadkM,4540
12
+ thds/core/files.py,sha256=35vhbaDv4OkL_n1PCM-ki54708aibFMrnURpth_5UsA,4556
12
13
  thds/core/fretry.py,sha256=Tui2q6vXV6c7mjTa1czLrXiugHUEwQp-sZdiwXfxvmM,3829
13
14
  thds/core/generators.py,sha256=rcdFpPj0NMJWSaSZTnBfTeZxTTORNB633Lng-BW1284,1939
14
15
  thds/core/git.py,sha256=I6kaEvwcvVxCLYHhTTfnHle-GkmgOR9_fHs03QxgBfI,2792
@@ -21,8 +22,9 @@ thds/core/inspect.py,sha256=vCxKqw8XG2W1cuj0MwjdXhe9TLQrGdjRraS6UEYsbf8,1955
21
22
  thds/core/iterators.py,sha256=d3iTQDR0gCW1nMRmknQeodR_4THzR9Ajmp8F8KCCFgg,208
22
23
  thds/core/lazy.py,sha256=e1WvG4LsbEydV0igEr_Vl1cq05zlQNIE8MFYT90yglE,3289
23
24
  thds/core/link.py,sha256=kmFJIFvEZc16-7S7IGvtTpzwl3VuvFl3yPlE6WJJ03w,5404
25
+ thds/core/logical_root.py,sha256=gWkIYRv9kNQfzbpxJaYiwNXVz1neZ2NvnvProtOn9d8,1399
24
26
  thds/core/merge_args.py,sha256=7oj7dtO1-XVkfTM3aBlq3QlZbo8tb6X7E3EVIR-60t8,5781
25
- thds/core/meta.json,sha256=HhmqF-oKGnmptGrl7rwZkxD9SzwePtjo82gyFSAfp48,196
27
+ thds/core/meta.json,sha256=yYe9d8_WiKFL5-fGrUD1hBo8tCJtbY-wet_cGdgcy_A,196
26
28
  thds/core/meta.py,sha256=IPLAKrH06HooPMNf5FeqJvUcM-JljTGXddrAQ5oAX8E,16896
27
29
  thds/core/parallel.py,sha256=HXAn9aIYqNE5rnRN5ypxR6CUucdfzE5T5rJ_MUv-pFk,7590
28
30
  thds/core/pickle_visit.py,sha256=QNMWIi5buvk2zsvx1-D-FKL7tkrFUFDs387vxgGebgU,833
@@ -32,8 +34,7 @@ thds/core/protocols.py,sha256=4na2EeWUDWfLn5-SxfMmKegDSndJ5z-vwMhDavhCpEM,409
32
34
  thds/core/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
35
  thds/core/scaling.py,sha256=f7CtdgK0sN6nroTq5hLAkG8xwbWhbCZUULstSKjoxO0,1615
34
36
  thds/core/scope.py,sha256=iPRhS-lIe-axDctqxBtEPeF0PM_w-0tRS-9kPweUGBY,7205
35
- thds/core/source.py,sha256=ACMe_e-0tuwUknOobomC9_q6vHM3hMTouWsSsJ-qHQk,9884
36
- thds/core/source_serde.py,sha256=TqW4MTrXQ49JJaXrkmFcymSBIWvBlkxO2JOPNQap_F4,3523
37
+ thds/core/source_serde.py,sha256=X4c7LiT3VidejqtTel9YB6dWGB3x-ct39KF9E50Nbx4,139
37
38
  thds/core/stack_context.py,sha256=17lPOuYWclUpZ-VXRkPgI4WbiMzq7_ZY6Kj1QK_1oNo,1332
38
39
  thds/core/thunks.py,sha256=p1OvMBJ4VGMsD8BVA7zwPeAp0L3y_nxVozBF2E78t3M,1053
39
40
  thds/core/timer.py,sha256=1FfcQ4-Gp6WQFXR0GKeT_8jwtamEfnTukdSbDKTAJVM,5432
@@ -45,6 +46,12 @@ thds/core/log/json_formatter.py,sha256=C5bRsSbAqaQqfTm88jc3mYe3vwKZZLAxET8s7_u7a
45
46
  thds/core/log/kw_formatter.py,sha256=9-MVOd2r5NEkYNne9qWyFMeR5lac3w7mjHXsDa681i0,3379
46
47
  thds/core/log/kw_logger.py,sha256=CyZVPnkUMtrUL2Lyk261AIEPmoP-buf_suFAhQlU1io,4063
47
48
  thds/core/log/logfmt.py,sha256=qS6BbdlOZPRnxmHenVL3uK43OQ30NJUnz92S6d_Yh2A,10360
49
+ thds/core/source/__init__.py,sha256=RiaUHNunoaw4XJUrwR5vJzSS6HGxOUKUONR_ipX5654,424
50
+ thds/core/source/_construct.py,sha256=klN6-fSJrsbbUhp92wzhJcF73h_PKKJItNLC__vwlIs,3122
51
+ thds/core/source/_download.py,sha256=pUhkphHdB7y4ZpxZZ6ITIS5giXMHuRf420yYAJwx6aE,2924
52
+ thds/core/source/serde.py,sha256=wXCfuv_Dv3QvJJr-uebGmTrfhCU_1a8VX3VJnXhVHfU,3539
53
+ thds/core/source/src.py,sha256=A1PSR5vANLwnUWLsFNVLkkeUdaidzRAzq8vri_a5w9E,4141
54
+ thds/core/source/tree.py,sha256=vjAqnQXGE0XiI0WvlLyXGqEAZbyjq6XmdUeWAR0HI4M,4144
48
55
  thds/core/sqlite/__init__.py,sha256=tDMzuO76qTtckJHldPQ6nPZ6kcvhhoJrVuuW42JtaSQ,606
49
56
  thds/core/sqlite/connect.py,sha256=l4QaSAI8RjP7Qh2FjmJ3EwRgfGf65Z3-LjtC9ocHM_U,977
50
57
  thds/core/sqlite/copy.py,sha256=y3IRQTBrWDfKuVIfW7fYuEgwRCRKHjN0rxVFkIb9VrQ,1155
@@ -60,8 +67,8 @@ thds/core/sqlite/structured.py,sha256=swCbDoyVT6cE7Kl79Wh_rg5Z1-yrUDJbiVJF4bjset
60
67
  thds/core/sqlite/types.py,sha256=oUkfoKRYNGDPZRk29s09rc9ha3SCk2SKr_K6WKebBFs,1308
61
68
  thds/core/sqlite/upsert.py,sha256=BmKK6fsGVedt43iY-Lp7dnAu8aJ1e9CYlPVEQR2pMj4,5827
62
69
  thds/core/sqlite/write.py,sha256=z0219vDkQDCnsV0WLvsj94keItr7H4j7Y_evbcoBrWU,3458
63
- thds.core-1.31.20250213162956.dist-info/METADATA,sha256=NVfvS6EbBrUw_-SsR3Ckb87WNCQ9LPcXiX_ukJ5Gep4,2123
64
- thds.core-1.31.20250213162956.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
65
- thds.core-1.31.20250213162956.dist-info/entry_points.txt,sha256=bOCOVhKZv7azF3FvaWX6uxE6yrjK6FcjqhtxXvLiFY8,161
66
- thds.core-1.31.20250213162956.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
67
- thds.core-1.31.20250213162956.dist-info/RECORD,,
70
+ thds.core-1.32.20250218201534.dist-info/METADATA,sha256=fBdWYnrnIKHNioHwRHhFjWY_tUm-atCCLoHJgdoSqds,2123
71
+ thds.core-1.32.20250218201534.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
72
+ thds.core-1.32.20250218201534.dist-info/entry_points.txt,sha256=bOCOVhKZv7azF3FvaWX6uxE6yrjK6FcjqhtxXvLiFY8,161
73
+ thds.core-1.32.20250218201534.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
74
+ thds.core-1.32.20250218201534.dist-info/RECORD,,
thds/core/source.py DELETED
@@ -1,238 +0,0 @@
1
- """Wrap openable, read-only data that is either locally-present or downloadable,
2
-
3
- yet will not be downloaded (if non-local) until it is actually opened or unwrapped.
4
- """
5
-
6
- import os
7
- import typing as ty
8
- from dataclasses import dataclass
9
- from functools import partial
10
- from pathlib import Path
11
-
12
- from . import log
13
- from .files import is_file_uri, path_from_uri, to_uri
14
- from .hash_cache import filehash
15
- from .hashing import Hash
16
- from .types import StrOrPath
17
-
18
-
19
- class Downloader(ty.Protocol):
20
- def __call__(self, hash: ty.Optional[Hash]) -> Path:
21
- """Closure over a URI that downloads a file to a local path and returns the path.
22
- The file may be placed anywhere as long as the file will be readable until the
23
- program exits.
24
-
25
- If the URI points to a missing file, this MUST raise any Exception that the
26
- underlying implementation desires. It MUST NOT return a Path pointing to a
27
- non-existent file.
28
-
29
- The Hash may be used to short-circuit a download that would result in downloading
30
- a file that does not match the expected hash, but the Downloader need not verify
31
- the Hash of the file downloaded after the fact, as that will be performed by
32
- default by the Source object.
33
- """
34
-
35
-
36
- class DownloadHandler(ty.Protocol):
37
- def __call__(self, uri: str) -> ty.Optional[Downloader]:
38
- """Returns a Downloader containing the URI if this URI can be handled. Returns
39
- None if this URI cannot be handled.
40
- """
41
-
42
-
43
- def _LocalFileHandler(uri: str) -> ty.Optional[Downloader]:
44
- if not is_file_uri(uri):
45
- return None
46
-
47
- def download_file(hash: ty.Optional[Hash]) -> Path:
48
- lpath = path_from_uri(uri)
49
- if not lpath.exists():
50
- raise FileNotFoundError(lpath)
51
- if hash:
52
- _check_hash(hash, lpath)
53
- return lpath
54
-
55
- return download_file
56
-
57
-
58
- def register_download_handler(key: str, handler: DownloadHandler):
59
- # key is not currently used for anything other than avoiding
60
- # having duplicates registered for whatever reason.
61
- _DOWNLOAD_HANDLERS[key] = handler
62
-
63
-
64
- _DOWNLOAD_HANDLERS: ty.Dict[str, DownloadHandler] = dict()
65
- register_download_handler("local_file", _LocalFileHandler)
66
-
67
-
68
- def _get_download_handler(uri: str) -> Downloader:
69
- for handler in _DOWNLOAD_HANDLERS.values():
70
- if downloader := handler(uri):
71
- return downloader
72
- raise ValueError(f"No SourcePath download handler for uri: {uri}")
73
-
74
-
75
- class SourceHashMismatchError(ValueError):
76
- pass
77
-
78
-
79
- def _check_hash(expected_hash: ty.Optional[Hash], path: Path) -> Hash:
80
- hash_algo = expected_hash.algo if expected_hash else "sha256"
81
- with log.logger_context(hash_for=f"source-{hash_algo}"):
82
- computed_hash = filehash(hash_algo, path)
83
- if expected_hash and expected_hash != computed_hash:
84
- raise SourceHashMismatchError(
85
- f"{expected_hash.algo} mismatch for {path};"
86
- f" got {computed_hash.bytes!r}, expected {expected_hash.bytes!r}"
87
- )
88
- return computed_hash
89
-
90
-
91
- @dataclass(frozen=True)
92
- class Source(os.PathLike):
93
- """Source is meant to be a consistent in-memory representation for an abstract,
94
- **read-only** source of data that may not be present locally when an application
95
- starts.
96
-
97
- A Source uses `os.PathLike` (`__fspath__`) to support transparent `open(src)` calls,
98
- so in many cases it will be a drop-in replacement for Path or str filenames. If you
99
- need an actual Path object, you can call `path()` to get one, but you should prefer to
100
- defer this until the actual location of use.
101
-
102
- By 'wrapping' read-only data in these objects, we can unify the code around how we
103
- unwrap and use them, which should allow us to more easily support different execution
104
- environments and sources of data.
105
-
106
- For instance, a Source could be a file on disk, but it could also be a file in
107
- ADLS.
108
-
109
- Furthermore, libraries which build on top of this one may use this representation to
110
- identify opportunities for optimization, by representing the Source in a stable
111
- and consistent format that allows different underlying data sources to fulfill the
112
- request for the data based on environmental context. A library could choose to
113
- transparently transform a local-path-based Source into a Source representing a
114
- remote file, without changing the semantics of the object as observed by the code.
115
-
116
- One reason a Hash is part of the interface is so that libraries interacting with the
117
- object can use the hash as a canonical 'name' for the data, if one is available.
118
-
119
- Another reason is that we can add a layer of consistency checking to data we're
120
- working with, at the cost of a few compute cycles. Since Sources are meant to represent
121
- read-only data, the Hash is a meaningful and persistent marker of data identity.
122
-
123
- Do not call its constructor in application code. Use `from_file` or `from_uri` instead.
124
- """
125
-
126
- uri: str
127
- hash: ty.Optional[Hash] = None
128
- # hash and equality are based only on the _identity_ of the object,
129
- # not on the other properties that provide some caching functionality.
130
-
131
- @property
132
- def cached_path(self) -> ty.Optional[Path]:
133
- """This is part of the public interface as far as checking to see whether a file
134
- is already present locally, but its existence and value is not part of equality or
135
- the hash for this class - it exists purely as an optimization.
136
- """
137
- return getattr(self, "__cached_path", None)
138
-
139
- def _set_cached_path(self, lpath: ty.Optional[Path]):
140
- """protected interface for setting a cached Path since the attribute is not
141
- available via the constructor.
142
- """
143
- super().__setattr__("__cached_path", lpath) # this works around dataclass.frozen.
144
- # https://noklam.github.io/blog/posts/2022-04-22-python-dataclass-partiala-immutable.html
145
-
146
- def path(self) -> Path:
147
- """Any Source can be turned into a local file path.
148
-
149
- Remember that the resulting data is meant to be read-only. If you want to mutate
150
- the data, you should first make a copy.
151
-
152
- If not already present locally, this will incur a one-time download. Then, if the
153
- Source has a Hash, the Hash will be validated against the downloaded file, and a
154
- failure will raise SourceHashMismatchError.
155
- """
156
- if self.cached_path is None or not self.cached_path.exists():
157
- lpath = _get_download_handler(self.uri)(self.hash)
158
- # path() used to be responsible for checking the hash, but since we pass it to the downloader,
159
- # it really makes more sense to allow the downloader to decide how to verify its own download,
160
- # and we don't want to duplicate any effort that it may have already put in.
161
- self._set_cached_path(lpath)
162
-
163
- assert self.cached_path and self.cached_path.exists()
164
- return self.cached_path
165
-
166
- def __fspath__(self) -> str:
167
- return os.fspath(self.path())
168
-
169
-
170
- # Creation from local Files or from remote URIs
171
-
172
-
173
- def from_file(filename: StrOrPath, hash: ty.Optional[Hash] = None, uri: str = "") -> Source:
174
- """Create a read-only Source from a local file that already exists.
175
-
176
- If URI is passed, the local file will be read and hashed, but the final URI in the
177
- Source will be the one provided explicitly. NO UPLOAD IS PERFORMED. It is your
178
- responsibility to ensure that your file has been uploaded to the URI you provide.
179
- """
180
- path = path_from_uri(filename) if isinstance(filename, str) else filename
181
- assert isinstance(path, Path)
182
- if not path.exists():
183
- raise FileNotFoundError(path)
184
-
185
- if uri:
186
- src = from_uri(uri, _check_hash(hash, path))
187
- else:
188
- src = Source(to_uri(path), _check_hash(hash, path))
189
- src._set_cached_path(path) # internally, it's okay to hack around immutability.
190
- return src
191
-
192
-
193
- class FromUri(ty.Protocol):
194
- def __call__(self, hash: ty.Optional[Hash]) -> Source:
195
- """Closure over a URI that creates a Source from a URI.
196
-
197
- The Hash may be used to short-circuit creation that would result in creating
198
- a Source that cannot match the expected Hash, but this is not required,
199
- and the hash will be included in the Source object regardless, and will
200
- be validated (if non-nil) at the time of source data access.
201
- """
202
-
203
-
204
- class FromUriHandler(ty.Protocol):
205
- def __call__(self, uri: str) -> ty.Optional[FromUri]:
206
- """Returns a FromUri object containing the URI if this URI can be handled. Returns
207
- None if this URI cannot be handled.
208
- """
209
-
210
-
211
- def register_from_uri_handler(key: str, handler: FromUriHandler):
212
- """If a library wants to customize how Sources are created from URIs that it handles,
213
- it can register a handler here.
214
- """
215
- # key is not currently used for anything other than avoiding
216
- # having duplicates registered for whatever reason.
217
- _FROM_URI_HANDLERS[key] = handler
218
-
219
-
220
- _FROM_URI_HANDLERS: ty.Dict[str, FromUriHandler] = dict()
221
- register_from_uri_handler(
222
- "local_file", lambda uri: partial(from_file, path_from_uri(uri)) if is_file_uri(uri) else None
223
- )
224
-
225
-
226
- def from_uri(uri: str, hash: ty.Optional[Hash] = None) -> Source:
227
- """Create a read-only Source from a URI. The data should already exist at this remote
228
- URI, although Source itself can make no guarantee that it always represents real data
229
- - only that any data it does represent is read-only.
230
-
231
- It may be advantageous for a URI-handling library to register a more specific
232
- implementation of this function, if it is capable of determining a Hash for the blob
233
- represented by the URI without downloading the blob.
234
- """
235
- for handler in _FROM_URI_HANDLERS.values():
236
- if from_uri_ := handler(uri):
237
- return from_uri_(hash)
238
- return Source(uri=uri, hash=hash)