thds.adls 4.2.20251002171438__py3-none-any.whl → 4.3.20251008013223__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

thds/adls/download.py CHANGED
@@ -16,7 +16,7 @@ from thds.core.types import StrOrPath
16
16
 
17
17
  from . import azcopy, errors, etag, hashes
18
18
  from ._progress import report_download_progress
19
- from .download_lock import download_lock
19
+ from .file_lock import file_lock
20
20
  from .fqn import AdlsFqn
21
21
  from .ro_cache import Cache, from_cache_path_to_local, from_local_path_to_cache
22
22
 
@@ -240,12 +240,12 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
240
240
  # No cache hit, so its time to prepare to download. if a cache was provided, we will
241
241
  # _put_ the resulting file in it.
242
242
 
243
- file_lock = str(cache.path(fqn) if cache else local_path)
243
+ file_lock_str = str(cache.path(fqn) if cache else local_path)
244
244
  # create lockfile name from the (shared) cache path if present, otherwise the final
245
245
  # destination. Non-cache users may then still incur multiple downloads in parallel,
246
246
  # but if you wanted to coordinate then you should probably have been using the global
247
247
  # cache in the first place.
248
- _dl_scope.enter(download_lock(file_lock))
248
+ _dl_scope.enter(file_lock(file_lock_str))
249
249
 
250
250
  # re-attempt cache hit - we may have gotten the lock after somebody else downloaded
251
251
  if file_result := attempt_cache_hit():
@@ -9,18 +9,18 @@ from thds.core import config, home, log
9
9
 
10
10
  from .md5 import hex_md5_str
11
11
 
12
- DOWNLOAD_LOCKS_DIR = config.item("dir", home.HOMEDIR() / ".thds/adls/download-locks", parse=Path)
12
+ FILELOCKS_DIR = config.item("dir", home.HOMEDIR() / ".thds/adls/file-locks", parse=Path)
13
13
  _CLEAN_UP_LOCKFILES_AFTER_TIME = timedelta(hours=24)
14
14
  _CLEAN_UP_LOCKFILES_EVERY = timedelta(hours=1).total_seconds()
15
15
  _LAST_CLEANED_BY_THIS_PROCESS = time.monotonic() - _CLEAN_UP_LOCKFILES_EVERY
16
16
  logger = log.getLogger(__name__)
17
17
 
18
18
 
19
- def _clean_download_locks() -> int:
19
+ def _clean_file_locks() -> int:
20
20
  deleted = 0
21
21
  deletion_threshold = time.time() - _CLEAN_UP_LOCKFILES_AFTER_TIME.total_seconds()
22
22
  try:
23
- for f in DOWNLOAD_LOCKS_DIR().iterdir():
23
+ for f in FILELOCKS_DIR().rglob("*"):
24
24
  fstat = f.stat()
25
25
  if stat.S_ISREG(fstat.st_mode) and fstat.st_mtime < deletion_threshold:
26
26
  f.unlink()
@@ -29,20 +29,20 @@ def _clean_download_locks() -> int:
29
29
  # this should be, hopefully, both very rare and completely inconsequential as to
30
30
  # program correctness. if you see this happen multiple times, you may have some
31
31
  # read-only files or something and want to manually clean up this directory.
32
- logger.exception("Failed to clean download locks directory.")
32
+ logger.exception("Failed to clean file locks directory.")
33
33
  return deleted
34
34
 
35
35
 
36
- def _occasionally_clean_download_locks():
36
+ def _occasionally_clean_file_locks():
37
37
  global _LAST_CLEANED_BY_THIS_PROCESS
38
38
  # do this about once an hour
39
39
  if time.monotonic() > _LAST_CLEANED_BY_THIS_PROCESS + _CLEAN_UP_LOCKFILES_EVERY:
40
40
  _LAST_CLEANED_BY_THIS_PROCESS = time.monotonic()
41
41
  # minor race condition with other threads but it doesn't really matter.
42
- _clean_download_locks()
42
+ _clean_file_locks()
43
43
 
44
44
 
45
- def download_lock(download_unique_str: str) -> FileLock:
45
+ def file_lock(lock_unique_str: str, locktype: str = "download") -> FileLock:
46
46
  """Note that the lockfiles will never be deleted automatically.
47
47
  https://py-filelock.readthedocs.io/en/latest/api.html#filelock.BaseFileLock.release
48
48
 
@@ -50,7 +50,7 @@ def download_lock(download_unique_str: str) -> FileLock:
50
50
  https://stackoverflow.com/questions/58098634/why-does-the-python-filelock-library-delete-lockfiles-on-windows-but-not-unix
51
51
 
52
52
  This means local developers would have a whole bunch of zero-byte files in their
53
- download locks directory. So, we take a slightly idiosyncratic approach to cleaning
53
+ file locks directory. So, we take a slightly idiosyncratic approach to cleaning
54
54
  this up: not wanting to run this code on every download, but also not wanting
55
55
  developers to see an infinitely-growing mess. Since parallel downloads will
56
56
  (generally) not constitute a correctness issue, the 'safest' time to clean it up will
@@ -58,11 +58,11 @@ def download_lock(download_unique_str: str) -> FileLock:
58
58
  we can get rid of old lockfiles after they've existed for more than 24 hours, since
59
59
  it's quite rare that a download would last that long.
60
60
  """
61
- DOWNLOAD_LOCKS_DIR().mkdir(parents=True, exist_ok=True)
62
- _occasionally_clean_download_locks()
61
+ lock_type_dir = FILELOCKS_DIR() / locktype
62
+ lock_type_dir.mkdir(parents=True, exist_ok=True)
63
+ _occasionally_clean_file_locks()
63
64
  return FileLock(
64
- DOWNLOAD_LOCKS_DIR()
65
- / (download_unique_str.split("/")[-1][:50] + hex_md5_str(download_unique_str)),
65
+ lock_type_dir / (lock_unique_str.split("/")[-1][:50] + hex_md5_str(lock_unique_str)),
66
66
  # is_singleton=True,
67
67
  # critical for keeping this reentrant without passing the lock around.
68
68
  # see https://github.com/tox-dev/filelock/issues/315#issuecomment-2016797681
thds/adls/list_fast.py CHANGED
@@ -31,7 +31,7 @@ def multilayer_yield_blob_meta(fqn: AdlsFqn, layers: int = 1) -> ty.Iterator[Blo
31
31
  # directly yield the blobs
32
32
  yield from yield_blob_meta(
33
33
  global_client.get_global_blob_container_client(fqn.sa, fqn.container),
34
- fqn.path,
34
+ fqn.path.rstrip("/") + "/",
35
35
  )
36
36
  return
37
37
 
thds/adls/upload.py CHANGED
@@ -17,6 +17,7 @@ from . import azcopy, hashes
17
17
  from ._progress import report_upload_progress
18
18
  from ._upload import upload_decision_and_metadata
19
19
  from .conf import UPLOAD_FILE_MAX_CONCURRENCY
20
+ from .file_lock import file_lock
20
21
  from .fqn import AdlsFqn
21
22
  from .global_client import get_global_blob_container_client
22
23
  from .ro_cache import Cache
@@ -101,6 +102,8 @@ def upload(
101
102
  # we always use the original source file to upload, not the cached path,
102
103
  # because uploading from a shared location risks race conditions.
103
104
 
105
+ scope.enter(file_lock(str(dest_), locktype="upload"))
106
+
104
107
  blob_container_client = get_global_blob_container_client(dest_.sa, dest_.container)
105
108
  blob_client = blob_container_client.get_blob_client(dest_.path)
106
109
  decision = upload_decision_and_metadata(blob_client.get_blob_properties, src) # type: ignore [arg-type]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.adls
3
- Version: 4.2.20251002171438
3
+ Version: 4.3.20251008013223
4
4
  Summary: ADLS tools
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  License: MIT
@@ -8,16 +8,16 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
8
8
  thds/adls/copy.py,sha256=aD6AquUR8r5W9SXd6Nm1qPrFH_fYpLC5dZk6HjPJnSQ,6611
9
9
  thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
10
10
  thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
11
- thds/adls/download.py,sha256=z31w4Yuz4CqmU0iectcXmSoM2QJb1mSp9tGs0GHEhtY,19146
12
- thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
11
+ thds/adls/download.py,sha256=IPg5nz_sGE7dX8DUQyWjG2D9z54PXLScap-pZzTUFTk,19142
13
12
  thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
14
13
  thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
14
+ thds/adls/file_lock.py,sha256=yLak5XDpnIYwfUNdpGFbIGG64uEs98-yVscNpJlqMxM,3176
15
15
  thds/adls/file_properties.py,sha256=dhRtbsMNOYfExkEiy76wrLfrJ6IMQeN1Z3LIxgKceqY,2042
16
16
  thds/adls/fqn.py,sha256=pIfEw25SjZNGmtzOLwrYCblciK35VQ35ZWb_mRaZKK0,5915
17
17
  thds/adls/global_client.py,sha256=q6oG1OOsfl4fbti81u8TE9d4Jx9-phlYgsGSc4032w8,3721
18
18
  thds/adls/hashes.py,sha256=2x1zcT_87en_vqFrLFs6F_EZCGpn7hk_81dYAwcypm8,5459
19
19
  thds/adls/impl.py,sha256=cNf1vmeS46X_wvyVdDJ8qFfowHn2QwtU5C80BmDtu5Y,43247
20
- thds/adls/list_fast.py,sha256=7jHnln4DMWYVLHhejj-fdWMBWflBiWfynegKxcUlNDY,4189
20
+ thds/adls/list_fast.py,sha256=yk0ydFiBa7U5JU3BCcIGCcrnS-J3yJaZbaZQ_Xj9xWU,4207
21
21
  thds/adls/md5.py,sha256=hGT8AIX32VUsnRCbm8cel9OlxAiRrgjwNWQTqRDHM_k,374
22
22
  thds/adls/named_roots.py,sha256=7SLbAoQQpV_mrFZaUPjYoS-F9dxQxN5Hg4M3YPirF_w,751
23
23
  thds/adls/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -26,7 +26,7 @@ thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
26
26
  thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
27
27
  thds/adls/source.py,sha256=8HVMYuxDn1XYGwFFSBowMlvQ6r2Jm2CQlpu4h85JvsE,2559
28
28
  thds/adls/source_tree.py,sha256=gl2JLjxAduo4cGQBb8LqBnmRHHk2wqIC5yt-sqkXOEo,2589
29
- thds/adls/upload.py,sha256=MRHK9Am-x5FKBPh1SXLTbPC1r0Xk0bGWNU8CcNuUMLo,6602
29
+ thds/adls/upload.py,sha256=7eWPpPuIgCBb3Svk05K1UNv0376q8wDTQkpTaKLtg-w,6694
30
30
  thds/adls/uri.py,sha256=9MXuW_KfpPvzBc4ERxuTJ3vvi_6yr7e1kMAW9mx2zXM,1414
31
31
  thds/adls/azcopy/__init__.py,sha256=qn2dmT92EHcrtaQ8uwRoUgvtF6Fu3NQbhZItOBdIBmY,45
32
32
  thds/adls/azcopy/download.py,sha256=FOtYyYh7ZXNWNdkj04yTV26lxcKOVj-YhS2p_EclYxA,6526
@@ -38,8 +38,8 @@ thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1
38
38
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
39
39
  thds/adls/tools/ls_fast.py,sha256=Nowc-efAL_Y4ybPwZzKIeh7KGIjfecRzdWvJZcBzq_8,585
40
40
  thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
41
- thds_adls-4.2.20251002171438.dist-info/METADATA,sha256=V7J4XH60NB7l3QCwQnIrlZls6UEJhFYBKoi8DLtGSbY,587
42
- thds_adls-4.2.20251002171438.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- thds_adls-4.2.20251002171438.dist-info/entry_points.txt,sha256=rtVF0A2MMTYUsBScF6b3AlOuk2Vm02QK7Tc2bDcDpk0,200
44
- thds_adls-4.2.20251002171438.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
45
- thds_adls-4.2.20251002171438.dist-info/RECORD,,
41
+ thds_adls-4.3.20251008013223.dist-info/METADATA,sha256=YNBtKknKx9Hv62k3anREQ5VBI6_KY5LoZ6h68aNLMt8,587
42
+ thds_adls-4.3.20251008013223.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ thds_adls-4.3.20251008013223.dist-info/entry_points.txt,sha256=rtVF0A2MMTYUsBScF6b3AlOuk2Vm02QK7Tc2bDcDpk0,200
44
+ thds_adls-4.3.20251008013223.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
45
+ thds_adls-4.3.20251008013223.dist-info/RECORD,,