thds.adls 4.1.20250701190349__py3-none-any.whl → 4.1.20250703020842__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/azcopy/download.py +18 -8
- thds/adls/azcopy/upload.py +6 -1
- thds/adls/download.py +19 -13
- thds/adls/tools/download.py +3 -2
- thds/adls/upload.py +6 -0
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250703020842.dist-info}/METADATA +1 -1
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250703020842.dist-info}/RECORD +10 -10
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250703020842.dist-info}/WHEEL +0 -0
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250703020842.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250703020842.dist-info}/top_level.txt +0 -0
thds/adls/azcopy/download.py
CHANGED
|
@@ -29,7 +29,7 @@ logger = log.getLogger(__name__)
|
|
|
29
29
|
@dataclass
|
|
30
30
|
class DownloadRequest:
|
|
31
31
|
temp_path: Path
|
|
32
|
-
size_bytes: int
|
|
32
|
+
size_bytes: ty.Optional[int]
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
@dataclass
|
|
@@ -73,19 +73,24 @@ def sync_fastpath(
|
|
|
73
73
|
env=system_resources.restrict_usage(),
|
|
74
74
|
)
|
|
75
75
|
assert process.stdout
|
|
76
|
-
|
|
76
|
+
output_lines = list()
|
|
77
|
+
with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes or 0) as track:
|
|
77
78
|
for line in process.stdout:
|
|
78
79
|
track(line)
|
|
80
|
+
output_lines.append(line.strip())
|
|
79
81
|
|
|
80
82
|
process.wait()
|
|
81
83
|
if process.returncode != 0:
|
|
82
|
-
raise subprocess.
|
|
84
|
+
raise subprocess.CalledProcessError(
|
|
85
|
+
process.returncode,
|
|
86
|
+
f"AzCopy failed with return code {process.returncode}\n\n" + "\n".join(output_lines),
|
|
87
|
+
)
|
|
83
88
|
assert (
|
|
84
89
|
download_request.temp_path.exists()
|
|
85
90
|
), f"AzCopy did not create the file at {download_request.temp_path}"
|
|
86
|
-
return
|
|
91
|
+
return
|
|
87
92
|
|
|
88
|
-
except (subprocess.
|
|
93
|
+
except (subprocess.SubprocessError, FileNotFoundError):
|
|
89
94
|
logger.warning("Falling back to Python SDK for download")
|
|
90
95
|
|
|
91
96
|
logger.debug("Downloading %s using Python SDK", dl_file_client.url)
|
|
@@ -121,19 +126,24 @@ async def async_fastpath(
|
|
|
121
126
|
assert copy_proc.stdout
|
|
122
127
|
|
|
123
128
|
# Feed lines to the tracker asynchronously
|
|
124
|
-
|
|
129
|
+
output_lines = list()
|
|
130
|
+
with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes or 0) as track:
|
|
125
131
|
while True:
|
|
126
132
|
line = await copy_proc.stdout.readline()
|
|
127
133
|
if not line: # EOF
|
|
128
134
|
break
|
|
129
135
|
track(line.decode().strip())
|
|
136
|
+
output_lines.append(line.decode().strip())
|
|
130
137
|
|
|
131
138
|
# Wait for process completion
|
|
132
139
|
exit_code = await copy_proc.wait()
|
|
133
140
|
if exit_code != 0:
|
|
134
|
-
raise subprocess.
|
|
141
|
+
raise subprocess.CalledProcessError(
|
|
142
|
+
exit_code,
|
|
143
|
+
f"AzCopy failed with return code {exit_code}\n\n" + "\n".join(output_lines),
|
|
144
|
+
)
|
|
135
145
|
|
|
136
|
-
return
|
|
146
|
+
return
|
|
137
147
|
|
|
138
148
|
except (subprocess.SubprocessError, FileNotFoundError):
|
|
139
149
|
logger.warning("Falling back to Python SDK for download")
|
thds/adls/azcopy/upload.py
CHANGED
|
@@ -86,10 +86,15 @@ def run(
|
|
|
86
86
|
env=system_resources.restrict_usage(),
|
|
87
87
|
)
|
|
88
88
|
assert process.stdout
|
|
89
|
+
output_lines = list()
|
|
89
90
|
with progress.azcopy_tracker(uri.to_blob_windows_url(dest), size_bytes) as track:
|
|
90
91
|
for line in process.stdout:
|
|
91
92
|
track(line)
|
|
93
|
+
output_lines.append(line.strip())
|
|
92
94
|
|
|
93
95
|
process.wait()
|
|
94
96
|
if process.returncode != 0:
|
|
95
|
-
raise subprocess.
|
|
97
|
+
raise subprocess.CalledProcessError(
|
|
98
|
+
process.returncode,
|
|
99
|
+
f"AzCopy failed with return code {process.returncode}\n\n" + "\n".join(output_lines),
|
|
100
|
+
)
|
thds/adls/download.py
CHANGED
|
@@ -7,6 +7,7 @@ import typing as ty
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
9
|
import aiohttp.http_exceptions
|
|
10
|
+
import requests.exceptions
|
|
10
11
|
from azure.core.exceptions import AzureError, HttpResponseError, ResourceModifiedError
|
|
11
12
|
from azure.storage.filedatalake import DataLakeFileClient, FileProperties, FileSystemClient, aio
|
|
12
13
|
|
|
@@ -22,27 +23,31 @@ from .ro_cache import Cache, from_cache_path_to_local, from_local_path_to_cache
|
|
|
22
23
|
logger = log.getLogger(__name__)
|
|
23
24
|
|
|
24
25
|
|
|
26
|
+
def _check_size(dpath: Path, expected_size: ty.Optional[int]) -> None:
|
|
27
|
+
actual_size = os.path.getsize(dpath)
|
|
28
|
+
if expected_size is not None and actual_size != expected_size:
|
|
29
|
+
raise errors.ContentLengthMismatchError(
|
|
30
|
+
f"Downloaded file {dpath} has size {actual_size} but expected {expected_size}"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
25
34
|
@contextlib.contextmanager
|
|
26
35
|
def _atomic_download_and_move(
|
|
27
36
|
fqn: AdlsFqn,
|
|
28
37
|
dest: StrOrPath,
|
|
29
38
|
properties: ty.Optional[FileProperties] = None,
|
|
30
39
|
) -> ty.Iterator[azcopy.download.DownloadRequest]:
|
|
31
|
-
known_size =
|
|
40
|
+
known_size = properties.size if properties else None
|
|
32
41
|
with tmp.temppath_same_fs(dest) as dpath:
|
|
33
42
|
logger.debug("Downloading %s", fqn)
|
|
34
|
-
if azcopy.download.should_use_azcopy(known_size):
|
|
43
|
+
if azcopy.download.should_use_azcopy(known_size or -1):
|
|
35
44
|
yield azcopy.download.DownloadRequest(dpath, known_size)
|
|
36
45
|
else:
|
|
37
46
|
with open(dpath, "wb") as down_f:
|
|
38
47
|
yield azcopy.download.SdkDownloadRequest(
|
|
39
|
-
dpath, known_size, report_download_progress(down_f, str(fqn), known_size)
|
|
48
|
+
dpath, known_size, report_download_progress(down_f, str(fqn), known_size or 0)
|
|
40
49
|
)
|
|
41
|
-
|
|
42
|
-
raise errors.ContentLengthMismatchError(
|
|
43
|
-
f"Downloaded file {dpath} has size {os.path.getsize(dpath)}"
|
|
44
|
-
f" but expected {known_size}."
|
|
45
|
-
)
|
|
50
|
+
_check_size(dpath, known_size)
|
|
46
51
|
try:
|
|
47
52
|
os.rename(dpath, dest) # will succeed even if dest is read-only
|
|
48
53
|
except OSError as oserr:
|
|
@@ -303,7 +308,7 @@ def _excs_to_retry() -> ty.Callable[[Exception], bool]:
|
|
|
303
308
|
filter(
|
|
304
309
|
None,
|
|
305
310
|
(
|
|
306
|
-
|
|
311
|
+
requests.exceptions.ConnectionError,
|
|
307
312
|
aiohttp.http_exceptions.ContentLengthError,
|
|
308
313
|
aiohttp.client_exceptions.ClientPayloadError,
|
|
309
314
|
getattr(
|
|
@@ -368,6 +373,9 @@ _async_dl_scope = scope.AsyncScope("adls.download.async")
|
|
|
368
373
|
|
|
369
374
|
@_dl_scope.bound
|
|
370
375
|
@_async_dl_scope.async_bound
|
|
376
|
+
@fretry.retry_regular_async(
|
|
377
|
+
fretry.is_exc(errors.ContentLengthMismatchError), fretry.iter_to_async(fretry.n_times(2))
|
|
378
|
+
)
|
|
371
379
|
async def async_download_or_use_verified(
|
|
372
380
|
fs_client: aio.FileSystemClient,
|
|
373
381
|
remote_key: str,
|
|
@@ -381,9 +389,8 @@ async def async_download_or_use_verified(
|
|
|
381
389
|
co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
|
|
382
390
|
fs_client, remote_key, local_path, expected_hash, cache
|
|
383
391
|
)
|
|
384
|
-
await _async_dl_scope.async_enter(
|
|
385
|
-
|
|
386
|
-
) # on __aexit__, will release the connection to the pool
|
|
392
|
+
await _async_dl_scope.async_enter(dl_file_client) # type: ignore[arg-type]
|
|
393
|
+
# on __aexit__, will release the connection to the pool
|
|
387
394
|
while True:
|
|
388
395
|
if co_request == _IoRequest.FILE_PROPERTIES:
|
|
389
396
|
if not file_properties:
|
|
@@ -393,7 +400,6 @@ async def async_download_or_use_verified(
|
|
|
393
400
|
co_request = co.send(file_properties)
|
|
394
401
|
elif isinstance(co_request, azcopy.download.DownloadRequest):
|
|
395
402
|
# coroutine is requesting download
|
|
396
|
-
|
|
397
403
|
await fretry.retry_regular_async(
|
|
398
404
|
_excs_to_retry(), fretry.iter_to_async(fretry.n_times(2))
|
|
399
405
|
)(
|
thds/adls/tools/download.py
CHANGED
|
@@ -16,9 +16,10 @@ def main():
|
|
|
16
16
|
help="A fully qualified path to an ADLS location. Accepts adls://, https:// and abfss:// URIs.",
|
|
17
17
|
)
|
|
18
18
|
parser.add_argument(
|
|
19
|
-
"
|
|
20
|
-
"
|
|
19
|
+
"copy_to",
|
|
20
|
+
nargs="?",
|
|
21
21
|
type=Path,
|
|
22
|
+
default=None,
|
|
22
23
|
help="This will create a link to the cached download at the specified location",
|
|
23
24
|
)
|
|
24
25
|
parser.add_argument(
|
thds/adls/upload.py
CHANGED
|
@@ -32,8 +32,11 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
|
|
|
32
32
|
@scope.bound
|
|
33
33
|
def _try_write_through() -> bool:
|
|
34
34
|
if isinstance(data, Path) and data.exists():
|
|
35
|
+
# we don't do hard or soft links because they share file permissions,
|
|
36
|
+
# and it's not up to us to change permissions on the src file.
|
|
35
37
|
link.link_or_copy(data, local_cache_path, "ref")
|
|
36
38
|
return True
|
|
39
|
+
|
|
37
40
|
out = scope.enter(tmp.temppath_same_fs(local_cache_path))
|
|
38
41
|
if hasattr(data, "read") and hasattr(data, "seek"):
|
|
39
42
|
with open(out, "wb") as f:
|
|
@@ -41,11 +44,13 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
|
|
|
41
44
|
data.seek(0) # type: ignore
|
|
42
45
|
link.link_or_copy(out, local_cache_path)
|
|
43
46
|
return True
|
|
47
|
+
|
|
44
48
|
if isinstance(data, bytes):
|
|
45
49
|
with open(out, "wb") as f:
|
|
46
50
|
f.write(data)
|
|
47
51
|
link.link_or_copy(out, local_cache_path)
|
|
48
52
|
return True
|
|
53
|
+
|
|
49
54
|
return False
|
|
50
55
|
|
|
51
56
|
if _try_write_through():
|
|
@@ -54,6 +59,7 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
|
|
|
54
59
|
# and we don't want to allow anyone to write to its copy.
|
|
55
60
|
files.set_read_only(local_cache_path)
|
|
56
61
|
return local_cache_path
|
|
62
|
+
|
|
57
63
|
except FileNotFoundError:
|
|
58
64
|
# may have hit a race condition.
|
|
59
65
|
# don't fail upload just because we couldn't write through the cache.
|
|
@@ -7,7 +7,7 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
|
|
|
7
7
|
thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
|
|
8
8
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
9
9
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
10
|
-
thds/adls/download.py,sha256=
|
|
10
|
+
thds/adls/download.py,sha256=u-ckaExnTMAVUlhMbnxM_urBggbZkgXMQSSfp1czPeQ,18350
|
|
11
11
|
thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
|
|
12
12
|
thds/adls/errors.py,sha256=6cLg2E4SB8ic46PBzA3ynRH4b1oR8qRb07RBgKGJRxY,1783
|
|
13
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
@@ -24,19 +24,19 @@ thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
|
|
|
24
24
|
thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
|
|
25
25
|
thds/adls/source.py,sha256=8HVMYuxDn1XYGwFFSBowMlvQ6r2Jm2CQlpu4h85JvsE,2559
|
|
26
26
|
thds/adls/source_tree.py,sha256=yP_v2XrKxXqUOdZ-x8kqHhBFAuur3AlAq3zi4hHj4AE,2235
|
|
27
|
-
thds/adls/upload.py,sha256=
|
|
27
|
+
thds/adls/upload.py,sha256=MRHK9Am-x5FKBPh1SXLTbPC1r0Xk0bGWNU8CcNuUMLo,6602
|
|
28
28
|
thds/adls/uri.py,sha256=9MXuW_KfpPvzBc4ERxuTJ3vvi_6yr7e1kMAW9mx2zXM,1414
|
|
29
29
|
thds/adls/azcopy/__init__.py,sha256=qn2dmT92EHcrtaQ8uwRoUgvtF6Fu3NQbhZItOBdIBmY,45
|
|
30
|
-
thds/adls/azcopy/download.py,sha256=
|
|
30
|
+
thds/adls/azcopy/download.py,sha256=MwkUaQTrrXRX9yip_hiLZXzSgGqyTDUxjr0MeXitWuo,6450
|
|
31
31
|
thds/adls/azcopy/login.py,sha256=923UaewVMPFzkDSgCQsbl-_g7qdFhpXpF0MGNIy3T_A,1538
|
|
32
32
|
thds/adls/azcopy/progress.py,sha256=K7TVmSiWfu561orL3GuOnlQX9VtVxWVECAq9NiweYNo,1387
|
|
33
33
|
thds/adls/azcopy/system_resources.py,sha256=okgDEKAp0oWGQF7OKikbgJ9buBeiOgNaDYy-36j6dHo,761
|
|
34
|
-
thds/adls/azcopy/upload.py,sha256=
|
|
35
|
-
thds/adls/tools/download.py,sha256=
|
|
34
|
+
thds/adls/azcopy/upload.py,sha256=0l5FzV9IgZ2iQhm4eKZjTdw4SO17bHd8VnwcTev1lUs,2761
|
|
35
|
+
thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
|
|
36
36
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
37
37
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
38
|
-
thds_adls-4.1.
|
|
39
|
-
thds_adls-4.1.
|
|
40
|
-
thds_adls-4.1.
|
|
41
|
-
thds_adls-4.1.
|
|
42
|
-
thds_adls-4.1.
|
|
38
|
+
thds_adls-4.1.20250703020842.dist-info/METADATA,sha256=zWALY04Mrs1zZNpje2AyaiU-B6tN7La5ldtAbVEXxac,587
|
|
39
|
+
thds_adls-4.1.20250703020842.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
thds_adls-4.1.20250703020842.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
41
|
+
thds_adls-4.1.20250703020842.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
42
|
+
thds_adls-4.1.20250703020842.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250703020842.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250703020842.dist-info}/top_level.txt
RENAMED
|
File without changes
|