thds.adls 3.1.20250402145019__py3-none-any.whl → 3.1.20250407153132__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/download.py +22 -3
- thds/adls/download_lock.py +10 -4
- {thds_adls-3.1.20250402145019.dist-info → thds_adls-3.1.20250407153132.dist-info}/METADATA +1 -1
- {thds_adls-3.1.20250402145019.dist-info → thds_adls-3.1.20250407153132.dist-info}/RECORD +7 -7
- {thds_adls-3.1.20250402145019.dist-info → thds_adls-3.1.20250407153132.dist-info}/WHEEL +0 -0
- {thds_adls-3.1.20250402145019.dist-info → thds_adls-3.1.20250407153132.dist-info}/entry_points.txt +0 -0
- {thds_adls-3.1.20250402145019.dist-info → thds_adls-3.1.20250407153132.dist-info}/top_level.txt +0 -0
thds/adls/download.py
CHANGED
|
@@ -5,6 +5,7 @@ import shutil
|
|
|
5
5
|
import typing as ty
|
|
6
6
|
from base64 import b64decode
|
|
7
7
|
|
|
8
|
+
import aiohttp.http_exceptions
|
|
8
9
|
from azure.core.exceptions import AzureError, HttpResponseError
|
|
9
10
|
from azure.storage.filedatalake import (
|
|
10
11
|
ContentSettings,
|
|
@@ -13,7 +14,7 @@ from azure.storage.filedatalake import (
|
|
|
13
14
|
FileSystemClient,
|
|
14
15
|
)
|
|
15
16
|
|
|
16
|
-
from thds.core import log, scope, tmp
|
|
17
|
+
from thds.core import fretry, log, scope, tmp
|
|
17
18
|
from thds.core.hashing import b64
|
|
18
19
|
from thds.core.types import StrOrPath
|
|
19
20
|
|
|
@@ -313,6 +314,13 @@ def _set_md5_if_missing(
|
|
|
313
314
|
return file_properties.content_settings
|
|
314
315
|
|
|
315
316
|
|
|
317
|
+
def _excs_to_retry() -> ty.Callable[[Exception], bool]:
|
|
318
|
+
"""These are exceptions that we observe to be spurious failures worth retrying."""
|
|
319
|
+
return fretry.is_exc(
|
|
320
|
+
aiohttp.http_exceptions.ContentLengthError, aiohttp.client_exceptions.ClientPayloadError
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
316
324
|
@_dl_scope.bound
|
|
317
325
|
def download_or_use_verified(
|
|
318
326
|
fs_client: FileSystemClient,
|
|
@@ -339,7 +347,10 @@ def download_or_use_verified(
|
|
|
339
347
|
co_request = co.send(file_properties)
|
|
340
348
|
elif isinstance(co_request, azcopy.download.DownloadRequest):
|
|
341
349
|
# coroutine is requesting download
|
|
342
|
-
|
|
350
|
+
fretry.retry_regular(_excs_to_retry(), fretry.n_times(2))(
|
|
351
|
+
# retry n_times(2) means _retry_ twice.
|
|
352
|
+
azcopy.download.sync_fastpath
|
|
353
|
+
)(dl_file_client, co_request)
|
|
343
354
|
co_request = co.send(None)
|
|
344
355
|
else:
|
|
345
356
|
raise ValueError(f"Unexpected coroutine request: {co_request}")
|
|
@@ -378,7 +389,15 @@ async def async_download_or_use_verified(
|
|
|
378
389
|
co_request = co.send(file_properties)
|
|
379
390
|
elif isinstance(co_request, azcopy.download.DownloadRequest):
|
|
380
391
|
# coroutine is requesting download
|
|
381
|
-
|
|
392
|
+
|
|
393
|
+
await fretry.retry_regular_async(
|
|
394
|
+
_excs_to_retry(), fretry.iter_to_async(fretry.n_times(2))
|
|
395
|
+
)(
|
|
396
|
+
# retry n_times(2) means _retry_ twice.
|
|
397
|
+
azcopy.download.async_fastpath
|
|
398
|
+
)(
|
|
399
|
+
dl_file_client, co_request
|
|
400
|
+
)
|
|
382
401
|
co_request = co.send(None)
|
|
383
402
|
else:
|
|
384
403
|
raise ValueError(f"Unexpected coroutine request: {co_request}")
|
thds/adls/download_lock.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import stat
|
|
2
2
|
import time
|
|
3
3
|
from datetime import timedelta
|
|
4
4
|
from pathlib import Path
|
|
@@ -11,6 +11,8 @@ from .md5 import hex_md5_str
|
|
|
11
11
|
|
|
12
12
|
DOWNLOAD_LOCKS_DIR = config.item("dir", home.HOMEDIR() / ".adls-md5-download-locks", parse=Path)
|
|
13
13
|
_CLEAN_UP_LOCKFILES_AFTER_TIME = timedelta(hours=24)
|
|
14
|
+
_CLEAN_UP_LOCKFILES_EVERY = timedelta(hours=1).total_seconds()
|
|
15
|
+
_LAST_CLEANED_BY_THIS_PROCESS = time.monotonic() - _CLEAN_UP_LOCKFILES_EVERY
|
|
14
16
|
logger = log.getLogger(__name__)
|
|
15
17
|
|
|
16
18
|
|
|
@@ -19,7 +21,8 @@ def _clean_download_locks() -> int:
|
|
|
19
21
|
deletion_threshold = time.time() - _CLEAN_UP_LOCKFILES_AFTER_TIME.total_seconds()
|
|
20
22
|
try:
|
|
21
23
|
for f in DOWNLOAD_LOCKS_DIR().iterdir():
|
|
22
|
-
|
|
24
|
+
fstat = f.stat()
|
|
25
|
+
if stat.S_ISREG(fstat.st_mode) and fstat.st_mtime < deletion_threshold:
|
|
23
26
|
f.unlink()
|
|
24
27
|
deleted += 1
|
|
25
28
|
except Exception:
|
|
@@ -31,8 +34,11 @@ def _clean_download_locks() -> int:
|
|
|
31
34
|
|
|
32
35
|
|
|
33
36
|
def _occasionally_clean_download_locks():
|
|
34
|
-
|
|
35
|
-
|
|
37
|
+
global _LAST_CLEANED_BY_THIS_PROCESS
|
|
38
|
+
# do this about once an hour
|
|
39
|
+
if time.monotonic() > _LAST_CLEANED_BY_THIS_PROCESS + _CLEAN_UP_LOCKFILES_EVERY:
|
|
40
|
+
_LAST_CLEANED_BY_THIS_PROCESS = time.monotonic()
|
|
41
|
+
# minor race condition with other threads but it doesn't really matter.
|
|
36
42
|
_clean_download_locks()
|
|
37
43
|
|
|
38
44
|
|
|
@@ -7,8 +7,8 @@ thds/adls/conf.py,sha256=q1SPrgb46NpobVzwt_Oyv71-BvsIbZLq9nRWS3LZjz0,1990
|
|
|
7
7
|
thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
|
|
8
8
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
9
9
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
10
|
-
thds/adls/download.py,sha256=
|
|
11
|
-
thds/adls/download_lock.py,sha256=
|
|
10
|
+
thds/adls/download.py,sha256=ds8makvLOiC8LibHzb_X85XLaSPGMCNxG7ZL7bEVzCI,17584
|
|
11
|
+
thds/adls/download_lock.py,sha256=ttD2GhPNRnITNoV1XH2PvKbMsHppZirjy3RZ4P4kgKM,2826
|
|
12
12
|
thds/adls/errors.py,sha256=B_rMsQvQnNmP_sf-x8kmGsv2vIeOh4G9kVbdNVyk350,1469
|
|
13
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
14
14
|
thds/adls/file_properties.py,sha256=V3VEjEG3PNyeQaJ159Kco5l7c2EiyjE0yygtmVVjd6E,1597
|
|
@@ -33,8 +33,8 @@ thds/adls/resource/up_down.py,sha256=3uNlTvm2gVhSyYdQTBwsGecOgwtINQfINckR-awwV0Y
|
|
|
33
33
|
thds/adls/tools/download.py,sha256=vvBO8lSDl9oPugv75qpCkoemT9pOM9BV6yeExlkyG08,1594
|
|
34
34
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
35
35
|
thds/adls/tools/upload.py,sha256=eMk4pdug1aCMPDDWpIE3Zoq77i5APp9Uuh-sVCCDNJE,493
|
|
36
|
-
thds_adls-3.1.
|
|
37
|
-
thds_adls-3.1.
|
|
38
|
-
thds_adls-3.1.
|
|
39
|
-
thds_adls-3.1.
|
|
40
|
-
thds_adls-3.1.
|
|
36
|
+
thds_adls-3.1.20250407153132.dist-info/METADATA,sha256=WIZ-EI3_ANwrwbPYM3ZGD4LYZU9O1jBoChMYcOvCUn4,548
|
|
37
|
+
thds_adls-3.1.20250407153132.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
38
|
+
thds_adls-3.1.20250407153132.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
39
|
+
thds_adls-3.1.20250407153132.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
40
|
+
thds_adls-3.1.20250407153132.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-3.1.20250402145019.dist-info → thds_adls-3.1.20250407153132.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-3.1.20250402145019.dist-info → thds_adls-3.1.20250407153132.dist-info}/top_level.txt
RENAMED
|
File without changes
|