thds.adls 4.2.20251007062717__py3-none-any.whl → 4.3.20251008013223__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/download.py +3 -3
- thds/adls/{download_lock.py → file_lock.py} +12 -12
- thds/adls/upload.py +3 -0
- {thds_adls-4.2.20251007062717.dist-info → thds_adls-4.3.20251008013223.dist-info}/METADATA +1 -1
- {thds_adls-4.2.20251007062717.dist-info → thds_adls-4.3.20251008013223.dist-info}/RECORD +8 -8
- {thds_adls-4.2.20251007062717.dist-info → thds_adls-4.3.20251008013223.dist-info}/WHEEL +0 -0
- {thds_adls-4.2.20251007062717.dist-info → thds_adls-4.3.20251008013223.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.2.20251007062717.dist-info → thds_adls-4.3.20251008013223.dist-info}/top_level.txt +0 -0
thds/adls/download.py
CHANGED
|
@@ -16,7 +16,7 @@ from thds.core.types import StrOrPath
|
|
|
16
16
|
|
|
17
17
|
from . import azcopy, errors, etag, hashes
|
|
18
18
|
from ._progress import report_download_progress
|
|
19
|
-
from .
|
|
19
|
+
from .file_lock import file_lock
|
|
20
20
|
from .fqn import AdlsFqn
|
|
21
21
|
from .ro_cache import Cache, from_cache_path_to_local, from_local_path_to_cache
|
|
22
22
|
|
|
@@ -240,12 +240,12 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
|
|
|
240
240
|
# No cache hit, so its time to prepare to download. if a cache was provided, we will
|
|
241
241
|
# _put_ the resulting file in it.
|
|
242
242
|
|
|
243
|
-
|
|
243
|
+
file_lock_str = str(cache.path(fqn) if cache else local_path)
|
|
244
244
|
# create lockfile name from the (shared) cache path if present, otherwise the final
|
|
245
245
|
# destination. Non-cache users may then still incur multiple downloads in parallel,
|
|
246
246
|
# but if you wanted to coordinate then you should probably have been using the global
|
|
247
247
|
# cache in the first place.
|
|
248
|
-
_dl_scope.enter(
|
|
248
|
+
_dl_scope.enter(file_lock(file_lock_str))
|
|
249
249
|
|
|
250
250
|
# re-attempt cache hit - we may have gotten the lock after somebody else downloaded
|
|
251
251
|
if file_result := attempt_cache_hit():
|
|
@@ -9,18 +9,18 @@ from thds.core import config, home, log
|
|
|
9
9
|
|
|
10
10
|
from .md5 import hex_md5_str
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
FILELOCKS_DIR = config.item("dir", home.HOMEDIR() / ".thds/adls/file-locks", parse=Path)
|
|
13
13
|
_CLEAN_UP_LOCKFILES_AFTER_TIME = timedelta(hours=24)
|
|
14
14
|
_CLEAN_UP_LOCKFILES_EVERY = timedelta(hours=1).total_seconds()
|
|
15
15
|
_LAST_CLEANED_BY_THIS_PROCESS = time.monotonic() - _CLEAN_UP_LOCKFILES_EVERY
|
|
16
16
|
logger = log.getLogger(__name__)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def
|
|
19
|
+
def _clean_file_locks() -> int:
|
|
20
20
|
deleted = 0
|
|
21
21
|
deletion_threshold = time.time() - _CLEAN_UP_LOCKFILES_AFTER_TIME.total_seconds()
|
|
22
22
|
try:
|
|
23
|
-
for f in
|
|
23
|
+
for f in FILELOCKS_DIR().rglob("*"):
|
|
24
24
|
fstat = f.stat()
|
|
25
25
|
if stat.S_ISREG(fstat.st_mode) and fstat.st_mtime < deletion_threshold:
|
|
26
26
|
f.unlink()
|
|
@@ -29,20 +29,20 @@ def _clean_download_locks() -> int:
|
|
|
29
29
|
# this should be, hopefully, both very rare and completely inconsequential as to
|
|
30
30
|
# program correctness. if you see this happen multiple times, you may have some
|
|
31
31
|
# read-only files or something and want to manually clean up this directory.
|
|
32
|
-
logger.exception("Failed to clean
|
|
32
|
+
logger.exception("Failed to clean file locks directory.")
|
|
33
33
|
return deleted
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def
|
|
36
|
+
def _occasionally_clean_file_locks():
|
|
37
37
|
global _LAST_CLEANED_BY_THIS_PROCESS
|
|
38
38
|
# do this about once an hour
|
|
39
39
|
if time.monotonic() > _LAST_CLEANED_BY_THIS_PROCESS + _CLEAN_UP_LOCKFILES_EVERY:
|
|
40
40
|
_LAST_CLEANED_BY_THIS_PROCESS = time.monotonic()
|
|
41
41
|
# minor race condition with other threads but it doesn't really matter.
|
|
42
|
-
|
|
42
|
+
_clean_file_locks()
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
def
|
|
45
|
+
def file_lock(lock_unique_str: str, locktype: str = "download") -> FileLock:
|
|
46
46
|
"""Note that the lockfiles will never be deleted automatically.
|
|
47
47
|
https://py-filelock.readthedocs.io/en/latest/api.html#filelock.BaseFileLock.release
|
|
48
48
|
|
|
@@ -50,7 +50,7 @@ def download_lock(download_unique_str: str) -> FileLock:
|
|
|
50
50
|
https://stackoverflow.com/questions/58098634/why-does-the-python-filelock-library-delete-lockfiles-on-windows-but-not-unix
|
|
51
51
|
|
|
52
52
|
This means local developers would have a whole bunch of zero-byte files in their
|
|
53
|
-
|
|
53
|
+
file locks directory. So, we take a slightly idiosyncratic approach to cleaning
|
|
54
54
|
this up: not wanting to run this code on every download, but also not wanting
|
|
55
55
|
developers to see an infinitely-growing mess. Since parallel downloads will
|
|
56
56
|
(generally) not constitute a correctness issue, the 'safest' time to clean it up will
|
|
@@ -58,11 +58,11 @@ def download_lock(download_unique_str: str) -> FileLock:
|
|
|
58
58
|
we can get rid of old lockfiles after they've existed for more than 24 hours, since
|
|
59
59
|
it's quite rare that a download would last that long.
|
|
60
60
|
"""
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
lock_type_dir = FILELOCKS_DIR() / locktype
|
|
62
|
+
lock_type_dir.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
_occasionally_clean_file_locks()
|
|
63
64
|
return FileLock(
|
|
64
|
-
|
|
65
|
-
/ (download_unique_str.split("/")[-1][:50] + hex_md5_str(download_unique_str)),
|
|
65
|
+
lock_type_dir / (lock_unique_str.split("/")[-1][:50] + hex_md5_str(lock_unique_str)),
|
|
66
66
|
# is_singleton=True,
|
|
67
67
|
# critical for keeping this reentrant without passing the lock around.
|
|
68
68
|
# see https://github.com/tox-dev/filelock/issues/315#issuecomment-2016797681
|
thds/adls/upload.py
CHANGED
|
@@ -17,6 +17,7 @@ from . import azcopy, hashes
|
|
|
17
17
|
from ._progress import report_upload_progress
|
|
18
18
|
from ._upload import upload_decision_and_metadata
|
|
19
19
|
from .conf import UPLOAD_FILE_MAX_CONCURRENCY
|
|
20
|
+
from .file_lock import file_lock
|
|
20
21
|
from .fqn import AdlsFqn
|
|
21
22
|
from .global_client import get_global_blob_container_client
|
|
22
23
|
from .ro_cache import Cache
|
|
@@ -101,6 +102,8 @@ def upload(
|
|
|
101
102
|
# we always use the original source file to upload, not the cached path,
|
|
102
103
|
# because uploading from a shared location risks race conditions.
|
|
103
104
|
|
|
105
|
+
scope.enter(file_lock(str(dest_), locktype="upload"))
|
|
106
|
+
|
|
104
107
|
blob_container_client = get_global_blob_container_client(dest_.sa, dest_.container)
|
|
105
108
|
blob_client = blob_container_client.get_blob_client(dest_.path)
|
|
106
109
|
decision = upload_decision_and_metadata(blob_client.get_blob_properties, src) # type: ignore [arg-type]
|
|
@@ -8,10 +8,10 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
|
|
|
8
8
|
thds/adls/copy.py,sha256=aD6AquUR8r5W9SXd6Nm1qPrFH_fYpLC5dZk6HjPJnSQ,6611
|
|
9
9
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
10
10
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
11
|
-
thds/adls/download.py,sha256=
|
|
12
|
-
thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
|
|
11
|
+
thds/adls/download.py,sha256=IPg5nz_sGE7dX8DUQyWjG2D9z54PXLScap-pZzTUFTk,19142
|
|
13
12
|
thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
|
|
14
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
14
|
+
thds/adls/file_lock.py,sha256=yLak5XDpnIYwfUNdpGFbIGG64uEs98-yVscNpJlqMxM,3176
|
|
15
15
|
thds/adls/file_properties.py,sha256=dhRtbsMNOYfExkEiy76wrLfrJ6IMQeN1Z3LIxgKceqY,2042
|
|
16
16
|
thds/adls/fqn.py,sha256=pIfEw25SjZNGmtzOLwrYCblciK35VQ35ZWb_mRaZKK0,5915
|
|
17
17
|
thds/adls/global_client.py,sha256=q6oG1OOsfl4fbti81u8TE9d4Jx9-phlYgsGSc4032w8,3721
|
|
@@ -26,7 +26,7 @@ thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
|
|
|
26
26
|
thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
|
|
27
27
|
thds/adls/source.py,sha256=8HVMYuxDn1XYGwFFSBowMlvQ6r2Jm2CQlpu4h85JvsE,2559
|
|
28
28
|
thds/adls/source_tree.py,sha256=gl2JLjxAduo4cGQBb8LqBnmRHHk2wqIC5yt-sqkXOEo,2589
|
|
29
|
-
thds/adls/upload.py,sha256=
|
|
29
|
+
thds/adls/upload.py,sha256=7eWPpPuIgCBb3Svk05K1UNv0376q8wDTQkpTaKLtg-w,6694
|
|
30
30
|
thds/adls/uri.py,sha256=9MXuW_KfpPvzBc4ERxuTJ3vvi_6yr7e1kMAW9mx2zXM,1414
|
|
31
31
|
thds/adls/azcopy/__init__.py,sha256=qn2dmT92EHcrtaQ8uwRoUgvtF6Fu3NQbhZItOBdIBmY,45
|
|
32
32
|
thds/adls/azcopy/download.py,sha256=FOtYyYh7ZXNWNdkj04yTV26lxcKOVj-YhS2p_EclYxA,6526
|
|
@@ -38,8 +38,8 @@ thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1
|
|
|
38
38
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
39
39
|
thds/adls/tools/ls_fast.py,sha256=Nowc-efAL_Y4ybPwZzKIeh7KGIjfecRzdWvJZcBzq_8,585
|
|
40
40
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
41
|
-
thds_adls-4.
|
|
42
|
-
thds_adls-4.
|
|
43
|
-
thds_adls-4.
|
|
44
|
-
thds_adls-4.
|
|
45
|
-
thds_adls-4.
|
|
41
|
+
thds_adls-4.3.20251008013223.dist-info/METADATA,sha256=YNBtKknKx9Hv62k3anREQ5VBI6_KY5LoZ6h68aNLMt8,587
|
|
42
|
+
thds_adls-4.3.20251008013223.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
43
|
+
thds_adls-4.3.20251008013223.dist-info/entry_points.txt,sha256=rtVF0A2MMTYUsBScF6b3AlOuk2Vm02QK7Tc2bDcDpk0,200
|
|
44
|
+
thds_adls-4.3.20251008013223.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
45
|
+
thds_adls-4.3.20251008013223.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-4.2.20251007062717.dist-info → thds_adls-4.3.20251008013223.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.2.20251007062717.dist-info → thds_adls-4.3.20251008013223.dist-info}/top_level.txt
RENAMED
|
File without changes
|