thds.mops 3.8.20250701001208__py3-none-any.whl → 3.8.20250701190352__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thds/mops/pure/adls/blob_store.py +22 -23
- thds/mops/pure/core/content_addressed.py +7 -1
- {thds_mops-3.8.20250701001208.dist-info → thds_mops-3.8.20250701190352.dist-info}/METADATA +1 -1
- {thds_mops-3.8.20250701001208.dist-info → thds_mops-3.8.20250701190352.dist-info}/RECORD +7 -7
- {thds_mops-3.8.20250701001208.dist-info → thds_mops-3.8.20250701190352.dist-info}/WHEEL +0 -0
- {thds_mops-3.8.20250701001208.dist-info → thds_mops-3.8.20250701190352.dist-info}/entry_points.txt +0 -0
- {thds_mops-3.8.20250701001208.dist-info → thds_mops-3.8.20250701190352.dist-info}/top_level.txt +0 -0
|
@@ -7,8 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from azure.core.exceptions import HttpResponseError
|
|
8
8
|
from azure.storage.filedatalake import DataLakeFileClient
|
|
9
9
|
|
|
10
|
-
from thds
|
|
11
|
-
from thds.adls.cached_up_down import download_to_cache, upload_through_cache
|
|
10
|
+
from thds import adls
|
|
12
11
|
from thds.adls.errors import blob_not_found_translation, is_blob_not_found
|
|
13
12
|
from thds.adls.global_client import get_global_fs_client
|
|
14
13
|
from thds.core import config, fretry, home, link, log, scope
|
|
@@ -28,11 +27,11 @@ log.getLogger("azure.core").setLevel(logging.WARNING)
|
|
|
28
27
|
logger = log.getLogger(__name__)
|
|
29
28
|
|
|
30
29
|
|
|
31
|
-
def _selective_upload_path(path: Path,
|
|
30
|
+
def _selective_upload_path(path: Path, adls_uri: str) -> None:
|
|
32
31
|
if path.stat().st_size > _5_MB:
|
|
33
|
-
upload_through_cache(
|
|
32
|
+
adls.upload_through_cache(adls_uri, path)
|
|
34
33
|
else:
|
|
35
|
-
|
|
34
|
+
adls.upload(adls_uri, path)
|
|
36
35
|
|
|
37
36
|
|
|
38
37
|
def is_creds_failure(exc: Exception) -> bool:
|
|
@@ -46,15 +45,15 @@ _azure_creds_retry = fretry.retry_sleep(is_creds_failure, fretry.expo(retries=9,
|
|
|
46
45
|
|
|
47
46
|
class AdlsBlobStore(BlobStore):
|
|
48
47
|
def control_root(self, uri: str) -> str:
|
|
49
|
-
return str(
|
|
48
|
+
return str(adls.fqn.parse(uri).root())
|
|
50
49
|
|
|
51
|
-
def _client(self, fqn: AdlsFqn) -> DataLakeFileClient:
|
|
52
|
-
return get_global_fs_client(fqn.sa, fqn.container).get_file_client(fqn.path)
|
|
50
|
+
def _client(self, fqn: adls.AdlsFqn) -> DataLakeFileClient:
|
|
51
|
+
return adls.get_global_fs_client(fqn.sa, fqn.container).get_file_client(fqn.path)
|
|
53
52
|
|
|
54
53
|
@_azure_creds_retry
|
|
55
54
|
@scope.bound
|
|
56
55
|
def readbytesinto(self, remote_uri: str, stream: ty.IO[bytes], type_hint: str = "bytes") -> None:
|
|
57
|
-
fqn =
|
|
56
|
+
fqn = adls.fqn.parse(remote_uri)
|
|
58
57
|
scope.enter(log.logger_context(download=fqn))
|
|
59
58
|
logger.debug(f"<----- downloading {type_hint}")
|
|
60
59
|
with blob_not_found_translation(fqn):
|
|
@@ -66,7 +65,7 @@ class AdlsBlobStore(BlobStore):
|
|
|
66
65
|
@scope.bound
|
|
67
66
|
def getfile(self, remote_uri: str) -> Path:
|
|
68
67
|
scope.enter(log.logger_context(download="mops-getfile"))
|
|
69
|
-
return download_to_cache(
|
|
68
|
+
return adls.download_to_cache(remote_uri)
|
|
70
69
|
|
|
71
70
|
@_azure_creds_retry
|
|
72
71
|
@scope.bound
|
|
@@ -74,18 +73,18 @@ class AdlsBlobStore(BlobStore):
|
|
|
74
73
|
self, remote_uri: str, data: AnyStrSrc, type_hint: str = "application/octet-stream"
|
|
75
74
|
) -> None:
|
|
76
75
|
"""Upload data to a remote path."""
|
|
77
|
-
|
|
76
|
+
adls.upload(remote_uri, data, content_type=type_hint)
|
|
78
77
|
|
|
79
78
|
@_azure_creds_retry
|
|
80
79
|
@scope.bound
|
|
81
80
|
def putfile(self, path: Path, remote_uri: str) -> None:
|
|
82
81
|
scope.enter(log.logger_context(upload="mops-putfile"))
|
|
83
|
-
_selective_upload_path(path,
|
|
82
|
+
_selective_upload_path(path, remote_uri)
|
|
84
83
|
|
|
85
84
|
@_azure_creds_retry
|
|
86
85
|
@scope.bound
|
|
87
86
|
def exists(self, remote_uri: str) -> bool:
|
|
88
|
-
fqn =
|
|
87
|
+
fqn = adls.fqn.parse(remote_uri)
|
|
89
88
|
scope.enter(log.logger_context(exists=fqn))
|
|
90
89
|
return on_slow(
|
|
91
90
|
lambda secs: LogSlow(f"Took {int(secs)}s to check if file exists."),
|
|
@@ -93,19 +92,19 @@ class AdlsBlobStore(BlobStore):
|
|
|
93
92
|
)(lambda: self._client(fqn).exists())()
|
|
94
93
|
|
|
95
94
|
def join(self, *parts: str) -> str:
|
|
96
|
-
return join(*parts).rstrip("/")
|
|
95
|
+
return adls.fqn.join(*parts).rstrip("/")
|
|
97
96
|
|
|
98
97
|
def split(self, uri: str) -> ty.List[str]:
|
|
99
|
-
fqn =
|
|
98
|
+
fqn = adls.fqn.parse(uri)
|
|
100
99
|
return [str(fqn.root()), *fqn.path.split("/")]
|
|
101
100
|
|
|
102
101
|
def is_blob_not_found(self, exc: Exception) -> bool:
|
|
103
102
|
return is_blob_not_found(exc)
|
|
104
103
|
|
|
105
104
|
def list(self, uri: str) -> ty.List[str]:
|
|
106
|
-
fqn =
|
|
105
|
+
fqn = adls.fqn.parse(uri)
|
|
107
106
|
return [
|
|
108
|
-
str(AdlsFqn(fqn.sa, fqn.container, path.name))
|
|
107
|
+
str(adls.fqn.AdlsFqn(fqn.sa, fqn.container, path.name))
|
|
109
108
|
for path in get_global_fs_client(fqn.sa, fqn.container).get_paths(fqn.path, recursive=False)
|
|
110
109
|
]
|
|
111
110
|
|
|
@@ -128,10 +127,10 @@ class DangerouslyCachingStore(AdlsBlobStore):
|
|
|
128
127
|
"""
|
|
129
128
|
|
|
130
129
|
def __init__(self, root: Path):
|
|
131
|
-
self._cache =
|
|
130
|
+
self._cache = adls.Cache(root.resolve(), ("ref", "hard"))
|
|
132
131
|
|
|
133
132
|
def exists(self, remote_uri: str) -> bool:
|
|
134
|
-
cache_path = self._cache.path(
|
|
133
|
+
cache_path = self._cache.path(adls.fqn.parse(remote_uri))
|
|
135
134
|
if cache_path.exists():
|
|
136
135
|
return True
|
|
137
136
|
return super().exists(remote_uri)
|
|
@@ -141,10 +140,10 @@ class DangerouslyCachingStore(AdlsBlobStore):
|
|
|
141
140
|
# of some sort. We use a completely separate cache for all of these things, because
|
|
142
141
|
# in previous implementations, none of these things would have been cached at all.
|
|
143
142
|
# (see comment on getfile below...)
|
|
144
|
-
fqn =
|
|
143
|
+
fqn = adls.fqn.parse(remote_uri)
|
|
145
144
|
cache_path = self._cache.path(fqn)
|
|
146
145
|
if not cache_path.exists():
|
|
147
|
-
download.download_or_use_verified(
|
|
146
|
+
adls.download.download_or_use_verified(
|
|
148
147
|
get_global_fs_client(fqn.sa, fqn.container), fqn.path, cache_path, cache=self._cache
|
|
149
148
|
)
|
|
150
149
|
with cache_path.open("rb") as f:
|
|
@@ -162,7 +161,7 @@ class DangerouslyCachingStore(AdlsBlobStore):
|
|
|
162
161
|
# (a link, usually) to our separate cache directory so that it's possible to
|
|
163
162
|
# completely empty this particular mops cache (and all its 'dangerous' behavior)
|
|
164
163
|
# simply by deleting that one cache directory.
|
|
165
|
-
cache_path = self._cache.path(
|
|
164
|
+
cache_path = self._cache.path(adls.fqn.parse(remote_uri))
|
|
166
165
|
if cache_path.exists():
|
|
167
166
|
return cache_path
|
|
168
167
|
outpath = super().getfile(remote_uri)
|
|
@@ -176,7 +175,7 @@ _DEFAULT_CONTROL_CACHE = config.item(
|
|
|
176
175
|
|
|
177
176
|
|
|
178
177
|
def get_adls_blob_store(uri: str) -> ty.Optional[AdlsBlobStore]:
|
|
179
|
-
if not uri.startswith(ADLS_SCHEME):
|
|
178
|
+
if not uri.startswith(adls.ADLS_SCHEME):
|
|
180
179
|
return None
|
|
181
180
|
|
|
182
181
|
if DISABLE_CONTROL_CACHE() or not _DEFAULT_CONTROL_CACHE():
|
|
@@ -12,7 +12,13 @@ B64_ADDRESSED = "{algo}-b64-addressed"
|
|
|
12
12
|
|
|
13
13
|
def storage_content_addressed(hash_str: str, algo: str, storage_root: str = "") -> str:
|
|
14
14
|
hash_namespace = B64_ADDRESSED.format(algo=algo)
|
|
15
|
-
return
|
|
15
|
+
return "/".join(
|
|
16
|
+
[
|
|
17
|
+
(storage_root or active_storage_root()).strip("/"),
|
|
18
|
+
hash_namespace.strip("/"),
|
|
19
|
+
hash_str.strip("/"),
|
|
20
|
+
]
|
|
21
|
+
)
|
|
16
22
|
|
|
17
23
|
|
|
18
24
|
class ContentAddressed(ty.NamedTuple):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thds.mops
|
|
3
|
-
Version: 3.8.
|
|
3
|
+
Version: 3.8.20250701190352
|
|
4
4
|
Summary: ML Ops tools for Trilliant Health
|
|
5
5
|
Author-email: Trilliant Health <info@trillianthealth.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
|
|
@@ -40,10 +40,10 @@ thds/mops/pure/_magic/sauce.py,sha256=xmO6Kch-ofVnrVFkxWm84C0-ao9vVCYq0nGGMuYeao
|
|
|
40
40
|
thds/mops/pure/_magic/shims.py,sha256=JI49ddv6lEUmNVsEl-XkGlsx2RpOMQoIOSSSfootYE8,1188
|
|
41
41
|
thds/mops/pure/adls/__init__.py,sha256=fw67xxwnizBurScMa-_zWb94lo5gamEVRt27V4bR0jc,54
|
|
42
42
|
thds/mops/pure/adls/_files.py,sha256=9m35Y4elWF0DjgAXVp4oi5CaY6fXWt8n67PilWxWJns,821
|
|
43
|
-
thds/mops/pure/adls/blob_store.py,sha256=
|
|
43
|
+
thds/mops/pure/adls/blob_store.py,sha256=ZWr7CKKcI-jz1sWZq4Jwq6LYkhFNxp-EFnNh83EJd84,7374
|
|
44
44
|
thds/mops/pure/adls/output_fqn.py,sha256=qnwdubjVwKShzZ5RruD0_85x86DtPwZNSgwADrdhrTs,748
|
|
45
45
|
thds/mops/pure/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
thds/mops/pure/core/content_addressed.py,sha256=
|
|
46
|
+
thds/mops/pure/core/content_addressed.py,sha256=RaCPvtM7bf0NnY5lNR5jPcNn2Moh-bmLtC4zOvdWjCU,1202
|
|
47
47
|
thds/mops/pure/core/deferred_work.py,sha256=3vjfqWFlqLLMcmX4nHVaaiidrG5N5KyAYhw6R0hoMzI,3716
|
|
48
48
|
thds/mops/pure/core/file_blob_store.py,sha256=N4m4LLrBZaqTJFR4D_eYl03a-n6yQBRsv0ID1bOS9TA,4298
|
|
49
49
|
thds/mops/pure/core/metadata.py,sha256=xAL2iz0pXrcKapmYnNrqSZ8nH2GVakA167NSpAfwiCI,8276
|
|
@@ -104,8 +104,8 @@ thds/mops/pure/tools/summarize/cli.py,sha256=7kDtn24ok8oBO3jFjlMmOK3jnZYpMoE_5Y8
|
|
|
104
104
|
thds/mops/pure/tools/summarize/run_summary.py,sha256=LUtvbankAYbss2NCF_XbNl05jkNgxYz_SLyERJlp4sk,5773
|
|
105
105
|
thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
|
|
107
|
-
thds_mops-3.8.
|
|
108
|
-
thds_mops-3.8.
|
|
109
|
-
thds_mops-3.8.
|
|
110
|
-
thds_mops-3.8.
|
|
111
|
-
thds_mops-3.8.
|
|
107
|
+
thds_mops-3.8.20250701190352.dist-info/METADATA,sha256=0Ls10UT3FUKluH4NI8oAHVK4XXKYsAMZaS603Rsy5ZI,2225
|
|
108
|
+
thds_mops-3.8.20250701190352.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
109
|
+
thds_mops-3.8.20250701190352.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
|
|
110
|
+
thds_mops-3.8.20250701190352.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
111
|
+
thds_mops-3.8.20250701190352.dist-info/RECORD,,
|
|
File without changes
|
{thds_mops-3.8.20250701001208.dist-info → thds_mops-3.8.20250701190352.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_mops-3.8.20250701001208.dist-info → thds_mops-3.8.20250701190352.dist-info}/top_level.txt
RENAMED
|
File without changes
|