thds.adls 4.1.20250711201240__py3-none-any.whl → 4.1.20250715194412__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/_progress.py +4 -0
- thds/adls/azcopy/download.py +6 -2
- thds/adls/azcopy/progress.py +8 -2
- thds/adls/azcopy/upload.py +1 -1
- thds/adls/file_properties.py +8 -1
- thds/adls/hashes.py +1 -1
- thds/adls/source_tree.py +7 -10
- {thds_adls-4.1.20250711201240.dist-info → thds_adls-4.1.20250715194412.dist-info}/METADATA +1 -1
- {thds_adls-4.1.20250711201240.dist-info → thds_adls-4.1.20250715194412.dist-info}/RECORD +12 -12
- {thds_adls-4.1.20250711201240.dist-info → thds_adls-4.1.20250715194412.dist-info}/WHEEL +0 -0
- {thds_adls-4.1.20250711201240.dist-info → thds_adls-4.1.20250715194412.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.1.20250711201240.dist-info → thds_adls-4.1.20250715194412.dist-info}/top_level.txt +0 -0
thds/adls/_progress.py
CHANGED
|
@@ -161,6 +161,10 @@ def get_global_download_tracker() -> Tracker:
|
|
|
161
161
|
return _GLOBAL_DN_TRACKER
|
|
162
162
|
|
|
163
163
|
|
|
164
|
+
def get_global_upload_tracker() -> Tracker:
|
|
165
|
+
return _GLOBAL_UP_TRACKER
|
|
166
|
+
|
|
167
|
+
|
|
164
168
|
def _proxy_io(io_type: str, stream: T, key: str, total_len: int) -> T:
|
|
165
169
|
assert io_type in ("read", "write"), io_type
|
|
166
170
|
|
thds/adls/azcopy/download.py
CHANGED
|
@@ -74,7 +74,9 @@ def sync_fastpath(
|
|
|
74
74
|
)
|
|
75
75
|
assert process.stdout
|
|
76
76
|
output_lines = list()
|
|
77
|
-
with progress.azcopy_tracker(
|
|
77
|
+
with progress.azcopy_tracker(
|
|
78
|
+
"down", dl_file_client.url, download_request.size_bytes or 0
|
|
79
|
+
) as track:
|
|
78
80
|
for line in process.stdout:
|
|
79
81
|
track(line)
|
|
80
82
|
output_lines.append(line.strip())
|
|
@@ -127,7 +129,9 @@ async def async_fastpath(
|
|
|
127
129
|
|
|
128
130
|
# Feed lines to the tracker asynchronously
|
|
129
131
|
output_lines = list()
|
|
130
|
-
with progress.azcopy_tracker(
|
|
132
|
+
with progress.azcopy_tracker(
|
|
133
|
+
"down", dl_file_client.url, download_request.size_bytes or 0
|
|
134
|
+
) as track:
|
|
131
135
|
while True:
|
|
132
136
|
line = await copy_proc.stdout.readline()
|
|
133
137
|
if not line: # EOF
|
thds/adls/azcopy/progress.py
CHANGED
|
@@ -25,9 +25,15 @@ def _parse_azcopy_json_output(line: str) -> AzCopyJsonLine:
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
@contextmanager
|
|
28
|
-
def azcopy_tracker(
|
|
28
|
+
def azcopy_tracker(
|
|
29
|
+
direction: ty.Literal["up", "down"], http_url: str, size_bytes: int
|
|
30
|
+
) -> ty.Iterator[ty.Callable[[str], None]]:
|
|
29
31
|
"""Context manager that tracks progress from AzCopy JSON lines. This works for both async and sync impls."""
|
|
30
|
-
tracker =
|
|
32
|
+
tracker = (
|
|
33
|
+
_progress.get_global_download_tracker
|
|
34
|
+
if direction == "down"
|
|
35
|
+
else _progress.get_global_upload_tracker
|
|
36
|
+
)()
|
|
31
37
|
adls_uri = urllib.parse.unquote(str(uri.parse_uri(http_url)))
|
|
32
38
|
if size_bytes:
|
|
33
39
|
tracker.add(adls_uri, total=size_bytes)
|
thds/adls/azcopy/upload.py
CHANGED
|
@@ -87,7 +87,7 @@ def run(
|
|
|
87
87
|
)
|
|
88
88
|
assert process.stdout
|
|
89
89
|
output_lines = list()
|
|
90
|
-
with progress.azcopy_tracker(uri.to_blob_windows_url(dest), size_bytes) as track:
|
|
90
|
+
with progress.azcopy_tracker("up", uri.to_blob_windows_url(dest), size_bytes) as track:
|
|
91
91
|
for line in process.stdout:
|
|
92
92
|
track(line)
|
|
93
93
|
output_lines.append(line.strip())
|
thds/adls/file_properties.py
CHANGED
|
@@ -29,10 +29,17 @@ def get_blob_properties(fqn: AdlsFqn) -> BlobProperties:
|
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
class ContentSettingsP(ty.Protocol):
|
|
33
|
+
content_md5: ty.Optional[bytearray]
|
|
34
|
+
|
|
35
|
+
|
|
32
36
|
class PropertiesP(ty.Protocol):
|
|
33
37
|
name: ty.Any
|
|
34
38
|
metadata: ty.Any
|
|
35
|
-
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def content_settings(self) -> ContentSettingsP:
|
|
42
|
+
pass
|
|
36
43
|
|
|
37
44
|
|
|
38
45
|
# At some point it may make sense to separate file and blob property modules,
|
thds/adls/hashes.py
CHANGED
|
@@ -74,7 +74,7 @@ def extract_hashes_from_props(
|
|
|
74
74
|
|
|
75
75
|
hashes = list(extract_hashes_from_metadata(props.metadata or dict()))
|
|
76
76
|
if props.content_settings and props.content_settings.content_md5:
|
|
77
|
-
hashes.append(hashing.Hash("md5", props.content_settings.content_md5))
|
|
77
|
+
hashes.append(hashing.Hash("md5", bytes(props.content_settings.content_md5)))
|
|
78
78
|
return {h.algo: h for h in hashes}
|
|
79
79
|
|
|
80
80
|
|
thds/adls/source_tree.py
CHANGED
|
@@ -6,25 +6,21 @@ from azure.storage.blob import BlobProperties, ContainerClient
|
|
|
6
6
|
from thds.core import hashing
|
|
7
7
|
from thds.core.source.tree import SourceTree
|
|
8
8
|
|
|
9
|
-
from . import fqn, global_client, source, uri
|
|
9
|
+
from . import fqn, global_client, hashes, source, uri
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
|
13
13
|
class BlobMeta:
|
|
14
14
|
path: str
|
|
15
15
|
size: int
|
|
16
|
-
|
|
16
|
+
hash: ty.Optional[hashing.Hash]
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def to_blob_meta(blob_props: BlobProperties) -> BlobMeta:
|
|
20
20
|
return BlobMeta(
|
|
21
21
|
blob_props.name,
|
|
22
22
|
blob_props.size,
|
|
23
|
-
(
|
|
24
|
-
hashing.Hash("md5", bytes(blob_props.content_settings.content_md5))
|
|
25
|
-
if blob_props.content_settings.content_md5
|
|
26
|
-
else None
|
|
27
|
-
),
|
|
23
|
+
next(iter(hashes.extract_hashes_from_props(blob_props).values()), None),
|
|
28
24
|
)
|
|
29
25
|
|
|
30
26
|
|
|
@@ -34,10 +30,11 @@ def to_blob_meta(blob_props: BlobProperties) -> BlobMeta:
|
|
|
34
30
|
def list_blob_meta(
|
|
35
31
|
container_client: ContainerClient, root_dir: str, match_suffix: str = ""
|
|
36
32
|
) -> ty.List[BlobMeta]:
|
|
37
|
-
"""Gets the path (relative to the SA/container root), size, and
|
|
33
|
+
"""Gets the path (relative to the SA/container root), size, and _a_ hash (if available) of all blobs in a directory."""
|
|
38
34
|
return [
|
|
39
35
|
to_blob_meta(blob_props)
|
|
40
|
-
for blob_props in container_client.list_blobs(name_starts_with=root_dir)
|
|
36
|
+
for blob_props in container_client.list_blobs(name_starts_with=root_dir, include=["metadata"])
|
|
37
|
+
# `list_blobs` does not include metadata by default, so we need to explicitly specify including it
|
|
41
38
|
if blob_props.size > 0
|
|
42
39
|
# container client lists directories as blobs with size 0
|
|
43
40
|
and blob_props.name.endswith(match_suffix)
|
|
@@ -54,7 +51,7 @@ def from_path(adls_path: uri.UriIsh, match_suffix: str = "") -> SourceTree:
|
|
|
54
51
|
container_root = root_fqn.root()
|
|
55
52
|
return SourceTree(
|
|
56
53
|
sources=[
|
|
57
|
-
source.from_adls(container_root / blob_meta.path, hash=blob_meta.
|
|
54
|
+
source.from_adls(container_root / blob_meta.path, hash=blob_meta.hash)
|
|
58
55
|
for blob_meta in list_blob_meta(container_client, root_fqn.path, match_suffix=match_suffix)
|
|
59
56
|
],
|
|
60
57
|
higher_logical_root=fqn.split(root_fqn)[-1],
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
thds/adls/__init__.py,sha256=g2Zb0EAAH-JzPMYHAub9liU4qa5pfqQDnILfEhmObGo,1036
|
|
2
|
-
thds/adls/_progress.py,sha256=
|
|
2
|
+
thds/adls/_progress.py,sha256=D6XIipzG_xwmxs_08LuiYFfThGqHTU2KiIyjNduiOFY,6656
|
|
3
3
|
thds/adls/_upload.py,sha256=mhTdWiQroaugYuwQg7R8CEgdfCYF4xvJthlsqO0jlnE,4692
|
|
4
4
|
thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
|
|
5
5
|
thds/adls/cached.py,sha256=up1F5JOVXdmwdZ8RAB2UDgiy6ooLg8IMULohBh75VpQ,3034
|
|
@@ -11,10 +11,10 @@ thds/adls/download.py,sha256=WOpMXGUbWImBdkM4tSW7qnCbu7G_cRXKF5pFQVLPPxs,18772
|
|
|
11
11
|
thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
|
|
12
12
|
thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
|
|
13
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
14
|
-
thds/adls/file_properties.py,sha256=
|
|
14
|
+
thds/adls/file_properties.py,sha256=dhRtbsMNOYfExkEiy76wrLfrJ6IMQeN1Z3LIxgKceqY,2042
|
|
15
15
|
thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
|
|
16
16
|
thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
|
|
17
|
-
thds/adls/hashes.py,sha256
|
|
17
|
+
thds/adls/hashes.py,sha256=-wRRATGmww7k2RD5Zmhq_Fq7Z2JihLy1njeHFekU15c,5316
|
|
18
18
|
thds/adls/impl.py,sha256=cNf1vmeS46X_wvyVdDJ8qFfowHn2QwtU5C80BmDtu5Y,43247
|
|
19
19
|
thds/adls/md5.py,sha256=hGT8AIX32VUsnRCbm8cel9OlxAiRrgjwNWQTqRDHM_k,374
|
|
20
20
|
thds/adls/named_roots.py,sha256=7SLbAoQQpV_mrFZaUPjYoS-F9dxQxN5Hg4M3YPirF_w,751
|
|
@@ -23,20 +23,20 @@ thds/adls/ro_cache.py,sha256=P-UVFZhqnE5wojqYmRVWZcqjl-pM1xVMm9VAm3nXlnA,4769
|
|
|
23
23
|
thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
|
|
24
24
|
thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
|
|
25
25
|
thds/adls/source.py,sha256=8HVMYuxDn1XYGwFFSBowMlvQ6r2Jm2CQlpu4h85JvsE,2559
|
|
26
|
-
thds/adls/source_tree.py,sha256=
|
|
26
|
+
thds/adls/source_tree.py,sha256=exBi7jNrOyj9nH5rzxZmZOqhRW9WXnmZLdLt4weoXYc,2292
|
|
27
27
|
thds/adls/upload.py,sha256=MRHK9Am-x5FKBPh1SXLTbPC1r0Xk0bGWNU8CcNuUMLo,6602
|
|
28
28
|
thds/adls/uri.py,sha256=9MXuW_KfpPvzBc4ERxuTJ3vvi_6yr7e1kMAW9mx2zXM,1414
|
|
29
29
|
thds/adls/azcopy/__init__.py,sha256=qn2dmT92EHcrtaQ8uwRoUgvtF6Fu3NQbhZItOBdIBmY,45
|
|
30
|
-
thds/adls/azcopy/download.py,sha256=
|
|
30
|
+
thds/adls/azcopy/download.py,sha256=FOtYyYh7ZXNWNdkj04yTV26lxcKOVj-YhS2p_EclYxA,6526
|
|
31
31
|
thds/adls/azcopy/login.py,sha256=923UaewVMPFzkDSgCQsbl-_g7qdFhpXpF0MGNIy3T_A,1538
|
|
32
|
-
thds/adls/azcopy/progress.py,sha256=
|
|
32
|
+
thds/adls/azcopy/progress.py,sha256=U9JzkHKyCglyzq3qMemcRKhmtDJhF1zXxHxlO_P3F9w,1526
|
|
33
33
|
thds/adls/azcopy/system_resources.py,sha256=okgDEKAp0oWGQF7OKikbgJ9buBeiOgNaDYy-36j6dHo,761
|
|
34
|
-
thds/adls/azcopy/upload.py,sha256=
|
|
34
|
+
thds/adls/azcopy/upload.py,sha256=RQLDJzS6qsMM12t5bykWJWBXs0UrmImrEFnPMxX2UlM,2767
|
|
35
35
|
thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
|
|
36
36
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
37
37
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
38
|
-
thds_adls-4.1.
|
|
39
|
-
thds_adls-4.1.
|
|
40
|
-
thds_adls-4.1.
|
|
41
|
-
thds_adls-4.1.
|
|
42
|
-
thds_adls-4.1.
|
|
38
|
+
thds_adls-4.1.20250715194412.dist-info/METADATA,sha256=BWc8D2IzGxH03rsA-f1M2KpCi0S_Fzjm7eWRMhDjsUs,587
|
|
39
|
+
thds_adls-4.1.20250715194412.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
thds_adls-4.1.20250715194412.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
41
|
+
thds_adls-4.1.20250715194412.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
42
|
+
thds_adls-4.1.20250715194412.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-4.1.20250711201240.dist-info → thds_adls-4.1.20250715194412.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.1.20250711201240.dist-info → thds_adls-4.1.20250715194412.dist-info}/top_level.txt
RENAMED
|
File without changes
|