thds.adls 4.1.20250701190349__py3-none-any.whl → 4.1.20250702194306__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/azcopy/download.py +5 -5
- thds/adls/download.py +17 -13
- thds/adls/tools/download.py +3 -2
- thds/adls/upload.py +6 -0
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250702194306.dist-info}/METADATA +1 -1
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250702194306.dist-info}/RECORD +9 -9
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250702194306.dist-info}/WHEEL +0 -0
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250702194306.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250702194306.dist-info}/top_level.txt +0 -0
thds/adls/azcopy/download.py
CHANGED
|
@@ -29,7 +29,7 @@ logger = log.getLogger(__name__)
|
|
|
29
29
|
@dataclass
|
|
30
30
|
class DownloadRequest:
|
|
31
31
|
temp_path: Path
|
|
32
|
-
size_bytes: int
|
|
32
|
+
size_bytes: ty.Optional[int]
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
@dataclass
|
|
@@ -73,7 +73,7 @@ def sync_fastpath(
|
|
|
73
73
|
env=system_resources.restrict_usage(),
|
|
74
74
|
)
|
|
75
75
|
assert process.stdout
|
|
76
|
-
with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes) as track:
|
|
76
|
+
with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes or 0) as track:
|
|
77
77
|
for line in process.stdout:
|
|
78
78
|
track(line)
|
|
79
79
|
|
|
@@ -83,7 +83,7 @@ def sync_fastpath(
|
|
|
83
83
|
assert (
|
|
84
84
|
download_request.temp_path.exists()
|
|
85
85
|
), f"AzCopy did not create the file at {download_request.temp_path}"
|
|
86
|
-
return
|
|
86
|
+
return
|
|
87
87
|
|
|
88
88
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
89
89
|
logger.warning("Falling back to Python SDK for download")
|
|
@@ -121,7 +121,7 @@ async def async_fastpath(
|
|
|
121
121
|
assert copy_proc.stdout
|
|
122
122
|
|
|
123
123
|
# Feed lines to the tracker asynchronously
|
|
124
|
-
with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes) as track:
|
|
124
|
+
with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes or 0) as track:
|
|
125
125
|
while True:
|
|
126
126
|
line = await copy_proc.stdout.readline()
|
|
127
127
|
if not line: # EOF
|
|
@@ -133,7 +133,7 @@ async def async_fastpath(
|
|
|
133
133
|
if exit_code != 0:
|
|
134
134
|
raise subprocess.SubprocessError()
|
|
135
135
|
|
|
136
|
-
return
|
|
136
|
+
return
|
|
137
137
|
|
|
138
138
|
except (subprocess.SubprocessError, FileNotFoundError):
|
|
139
139
|
logger.warning("Falling back to Python SDK for download")
|
thds/adls/download.py
CHANGED
|
@@ -22,27 +22,31 @@ from .ro_cache import Cache, from_cache_path_to_local, from_local_path_to_cache
|
|
|
22
22
|
logger = log.getLogger(__name__)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def _check_size(dpath: Path, expected_size: ty.Optional[int]) -> None:
|
|
26
|
+
actual_size = os.path.getsize(dpath)
|
|
27
|
+
if expected_size is not None and actual_size != expected_size:
|
|
28
|
+
raise errors.ContentLengthMismatchError(
|
|
29
|
+
f"Downloaded file {dpath} has size {actual_size} but expected {expected_size}"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
25
33
|
@contextlib.contextmanager
|
|
26
34
|
def _atomic_download_and_move(
|
|
27
35
|
fqn: AdlsFqn,
|
|
28
36
|
dest: StrOrPath,
|
|
29
37
|
properties: ty.Optional[FileProperties] = None,
|
|
30
38
|
) -> ty.Iterator[azcopy.download.DownloadRequest]:
|
|
31
|
-
known_size =
|
|
39
|
+
known_size = properties.size if properties else None
|
|
32
40
|
with tmp.temppath_same_fs(dest) as dpath:
|
|
33
41
|
logger.debug("Downloading %s", fqn)
|
|
34
|
-
if azcopy.download.should_use_azcopy(known_size):
|
|
42
|
+
if azcopy.download.should_use_azcopy(known_size or -1):
|
|
35
43
|
yield azcopy.download.DownloadRequest(dpath, known_size)
|
|
36
44
|
else:
|
|
37
45
|
with open(dpath, "wb") as down_f:
|
|
38
46
|
yield azcopy.download.SdkDownloadRequest(
|
|
39
|
-
dpath, known_size, report_download_progress(down_f, str(fqn), known_size)
|
|
47
|
+
dpath, known_size, report_download_progress(down_f, str(fqn), known_size or 0)
|
|
40
48
|
)
|
|
41
|
-
|
|
42
|
-
raise errors.ContentLengthMismatchError(
|
|
43
|
-
f"Downloaded file {dpath} has size {os.path.getsize(dpath)}"
|
|
44
|
-
f" but expected {known_size}."
|
|
45
|
-
)
|
|
49
|
+
_check_size(dpath, known_size)
|
|
46
50
|
try:
|
|
47
51
|
os.rename(dpath, dest) # will succeed even if dest is read-only
|
|
48
52
|
except OSError as oserr:
|
|
@@ -303,7 +307,6 @@ def _excs_to_retry() -> ty.Callable[[Exception], bool]:
|
|
|
303
307
|
filter(
|
|
304
308
|
None,
|
|
305
309
|
(
|
|
306
|
-
errors.ContentLengthMismatchError,
|
|
307
310
|
aiohttp.http_exceptions.ContentLengthError,
|
|
308
311
|
aiohttp.client_exceptions.ClientPayloadError,
|
|
309
312
|
getattr(
|
|
@@ -368,6 +371,9 @@ _async_dl_scope = scope.AsyncScope("adls.download.async")
|
|
|
368
371
|
|
|
369
372
|
@_dl_scope.bound
|
|
370
373
|
@_async_dl_scope.async_bound
|
|
374
|
+
@fretry.retry_regular_async(
|
|
375
|
+
fretry.is_exc(errors.ContentLengthMismatchError), fretry.iter_to_async(fretry.n_times(2))
|
|
376
|
+
)
|
|
371
377
|
async def async_download_or_use_verified(
|
|
372
378
|
fs_client: aio.FileSystemClient,
|
|
373
379
|
remote_key: str,
|
|
@@ -381,9 +387,8 @@ async def async_download_or_use_verified(
|
|
|
381
387
|
co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
|
|
382
388
|
fs_client, remote_key, local_path, expected_hash, cache
|
|
383
389
|
)
|
|
384
|
-
await _async_dl_scope.async_enter(
|
|
385
|
-
|
|
386
|
-
) # on __aexit__, will release the connection to the pool
|
|
390
|
+
await _async_dl_scope.async_enter(dl_file_client) # type: ignore[arg-type]
|
|
391
|
+
# on __aexit__, will release the connection to the pool
|
|
387
392
|
while True:
|
|
388
393
|
if co_request == _IoRequest.FILE_PROPERTIES:
|
|
389
394
|
if not file_properties:
|
|
@@ -393,7 +398,6 @@ async def async_download_or_use_verified(
|
|
|
393
398
|
co_request = co.send(file_properties)
|
|
394
399
|
elif isinstance(co_request, azcopy.download.DownloadRequest):
|
|
395
400
|
# coroutine is requesting download
|
|
396
|
-
|
|
397
401
|
await fretry.retry_regular_async(
|
|
398
402
|
_excs_to_retry(), fretry.iter_to_async(fretry.n_times(2))
|
|
399
403
|
)(
|
thds/adls/tools/download.py
CHANGED
|
@@ -16,9 +16,10 @@ def main():
|
|
|
16
16
|
help="A fully qualified path to an ADLS location. Accepts adls://, https:// and abfss:// URIs.",
|
|
17
17
|
)
|
|
18
18
|
parser.add_argument(
|
|
19
|
-
"
|
|
20
|
-
"
|
|
19
|
+
"copy_to",
|
|
20
|
+
nargs="?",
|
|
21
21
|
type=Path,
|
|
22
|
+
default=None,
|
|
22
23
|
help="This will create a link to the cached download at the specified location",
|
|
23
24
|
)
|
|
24
25
|
parser.add_argument(
|
thds/adls/upload.py
CHANGED
|
@@ -32,8 +32,11 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
|
|
|
32
32
|
@scope.bound
|
|
33
33
|
def _try_write_through() -> bool:
|
|
34
34
|
if isinstance(data, Path) and data.exists():
|
|
35
|
+
# we don't do hard or soft links because they share file permissions,
|
|
36
|
+
# and it's not up to us to change permissions on the src file.
|
|
35
37
|
link.link_or_copy(data, local_cache_path, "ref")
|
|
36
38
|
return True
|
|
39
|
+
|
|
37
40
|
out = scope.enter(tmp.temppath_same_fs(local_cache_path))
|
|
38
41
|
if hasattr(data, "read") and hasattr(data, "seek"):
|
|
39
42
|
with open(out, "wb") as f:
|
|
@@ -41,11 +44,13 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
|
|
|
41
44
|
data.seek(0) # type: ignore
|
|
42
45
|
link.link_or_copy(out, local_cache_path)
|
|
43
46
|
return True
|
|
47
|
+
|
|
44
48
|
if isinstance(data, bytes):
|
|
45
49
|
with open(out, "wb") as f:
|
|
46
50
|
f.write(data)
|
|
47
51
|
link.link_or_copy(out, local_cache_path)
|
|
48
52
|
return True
|
|
53
|
+
|
|
49
54
|
return False
|
|
50
55
|
|
|
51
56
|
if _try_write_through():
|
|
@@ -54,6 +59,7 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
|
|
|
54
59
|
# and we don't want to allow anyone to write to its copy.
|
|
55
60
|
files.set_read_only(local_cache_path)
|
|
56
61
|
return local_cache_path
|
|
62
|
+
|
|
57
63
|
except FileNotFoundError:
|
|
58
64
|
# may have hit a race condition.
|
|
59
65
|
# don't fail upload just because we couldn't write through the cache.
|
|
@@ -7,7 +7,7 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
|
|
|
7
7
|
thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
|
|
8
8
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
9
9
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
10
|
-
thds/adls/download.py,sha256=
|
|
10
|
+
thds/adls/download.py,sha256=HzmhHM0FAmxtCRkK9M7NajsIzIuHD74GuxP3dyLoP1Q,18266
|
|
11
11
|
thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
|
|
12
12
|
thds/adls/errors.py,sha256=6cLg2E4SB8ic46PBzA3ynRH4b1oR8qRb07RBgKGJRxY,1783
|
|
13
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
@@ -24,19 +24,19 @@ thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
|
|
|
24
24
|
thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
|
|
25
25
|
thds/adls/source.py,sha256=8HVMYuxDn1XYGwFFSBowMlvQ6r2Jm2CQlpu4h85JvsE,2559
|
|
26
26
|
thds/adls/source_tree.py,sha256=yP_v2XrKxXqUOdZ-x8kqHhBFAuur3AlAq3zi4hHj4AE,2235
|
|
27
|
-
thds/adls/upload.py,sha256=
|
|
27
|
+
thds/adls/upload.py,sha256=MRHK9Am-x5FKBPh1SXLTbPC1r0Xk0bGWNU8CcNuUMLo,6602
|
|
28
28
|
thds/adls/uri.py,sha256=9MXuW_KfpPvzBc4ERxuTJ3vvi_6yr7e1kMAW9mx2zXM,1414
|
|
29
29
|
thds/adls/azcopy/__init__.py,sha256=qn2dmT92EHcrtaQ8uwRoUgvtF6Fu3NQbhZItOBdIBmY,45
|
|
30
|
-
thds/adls/azcopy/download.py,sha256=
|
|
30
|
+
thds/adls/azcopy/download.py,sha256=8shLbizgKr5WLmOitQ8TY28EVj2IdT7iSRmRgqFNLAg,6008
|
|
31
31
|
thds/adls/azcopy/login.py,sha256=923UaewVMPFzkDSgCQsbl-_g7qdFhpXpF0MGNIy3T_A,1538
|
|
32
32
|
thds/adls/azcopy/progress.py,sha256=K7TVmSiWfu561orL3GuOnlQX9VtVxWVECAq9NiweYNo,1387
|
|
33
33
|
thds/adls/azcopy/system_resources.py,sha256=okgDEKAp0oWGQF7OKikbgJ9buBeiOgNaDYy-36j6dHo,761
|
|
34
34
|
thds/adls/azcopy/upload.py,sha256=bvtYdbaFsZkOHFLDpeBlTKqw63P3_kbImInI04ZlekM,2601
|
|
35
|
-
thds/adls/tools/download.py,sha256=
|
|
35
|
+
thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
|
|
36
36
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
37
37
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
38
|
-
thds_adls-4.1.
|
|
39
|
-
thds_adls-4.1.
|
|
40
|
-
thds_adls-4.1.
|
|
41
|
-
thds_adls-4.1.
|
|
42
|
-
thds_adls-4.1.
|
|
38
|
+
thds_adls-4.1.20250702194306.dist-info/METADATA,sha256=zgZubxCu37Sqrjn7b5NTJNlPxZbRgbcx-omuFBiVfMg,587
|
|
39
|
+
thds_adls-4.1.20250702194306.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
thds_adls-4.1.20250702194306.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
41
|
+
thds_adls-4.1.20250702194306.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
42
|
+
thds_adls-4.1.20250702194306.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250702194306.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.1.20250701190349.dist-info → thds_adls-4.1.20250702194306.dist-info}/top_level.txt
RENAMED
|
File without changes
|