thds.adls 3.1.20250407135823__py3-none-any.whl → 3.1.20250408202524__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

thds/adls/download.py CHANGED
@@ -5,6 +5,7 @@ import shutil
5
5
  import typing as ty
6
6
  from base64 import b64decode
7
7
 
8
+ import aiohttp.http_exceptions
8
9
  from azure.core.exceptions import AzureError, HttpResponseError
9
10
  from azure.storage.filedatalake import (
10
11
  ContentSettings,
@@ -13,7 +14,7 @@ from azure.storage.filedatalake import (
13
14
  FileSystemClient,
14
15
  )
15
16
 
16
- from thds.core import log, scope, tmp
17
+ from thds.core import fretry, log, scope, tmp
17
18
  from thds.core.hashing import b64
18
19
  from thds.core.types import StrOrPath
19
20
 
@@ -313,6 +314,13 @@ def _set_md5_if_missing(
313
314
  return file_properties.content_settings
314
315
 
315
316
 
317
+ def _excs_to_retry() -> ty.Callable[[Exception], bool]:
318
+ """These are exceptions that we observe to be spurious failures worth retrying."""
319
+ return fretry.is_exc(
320
+ aiohttp.http_exceptions.ContentLengthError, aiohttp.client_exceptions.ClientPayloadError
321
+ )
322
+
323
+
316
324
  @_dl_scope.bound
317
325
  def download_or_use_verified(
318
326
  fs_client: FileSystemClient,
@@ -339,7 +347,10 @@ def download_or_use_verified(
339
347
  co_request = co.send(file_properties)
340
348
  elif isinstance(co_request, azcopy.download.DownloadRequest):
341
349
  # coroutine is requesting download
342
- azcopy.download.sync_fastpath(dl_file_client, co_request)
350
+ fretry.retry_regular(_excs_to_retry(), fretry.n_times(2))(
351
+ # retry n_times(2) means _retry_ twice.
352
+ azcopy.download.sync_fastpath
353
+ )(dl_file_client, co_request)
343
354
  co_request = co.send(None)
344
355
  else:
345
356
  raise ValueError(f"Unexpected coroutine request: {co_request}")
@@ -378,7 +389,15 @@ async def async_download_or_use_verified(
378
389
  co_request = co.send(file_properties)
379
390
  elif isinstance(co_request, azcopy.download.DownloadRequest):
380
391
  # coroutine is requesting download
381
- await azcopy.download.async_fastpath(dl_file_client, co_request)
392
+
393
+ await fretry.retry_regular_async(
394
+ _excs_to_retry(), fretry.iter_to_async(fretry.n_times(2))
395
+ )(
396
+ # retry n_times(2) means _retry_ twice.
397
+ azcopy.download.async_fastpath
398
+ )(
399
+ dl_file_client, co_request
400
+ )
382
401
  co_request = co.send(None)
383
402
  else:
384
403
  raise ValueError(f"Unexpected coroutine request: {co_request}")
@@ -1,4 +1,4 @@
1
- import random
1
+ import stat
2
2
  import time
3
3
  from datetime import timedelta
4
4
  from pathlib import Path
@@ -11,6 +11,8 @@ from .md5 import hex_md5_str
11
11
 
12
12
  DOWNLOAD_LOCKS_DIR = config.item("dir", home.HOMEDIR() / ".adls-md5-download-locks", parse=Path)
13
13
  _CLEAN_UP_LOCKFILES_AFTER_TIME = timedelta(hours=24)
14
+ _CLEAN_UP_LOCKFILES_EVERY = timedelta(hours=1).total_seconds()
15
+ _LAST_CLEANED_BY_THIS_PROCESS = time.monotonic() - _CLEAN_UP_LOCKFILES_EVERY
14
16
  logger = log.getLogger(__name__)
15
17
 
16
18
 
@@ -19,7 +21,8 @@ def _clean_download_locks() -> int:
19
21
  deletion_threshold = time.time() - _CLEAN_UP_LOCKFILES_AFTER_TIME.total_seconds()
20
22
  try:
21
23
  for f in DOWNLOAD_LOCKS_DIR().iterdir():
22
- if f.is_file() and f.stat().st_mtime < deletion_threshold:
24
+ fstat = f.stat()
25
+ if stat.S_ISREG(fstat.st_mode) and fstat.st_mtime < deletion_threshold:
23
26
  f.unlink()
24
27
  deleted += 1
25
28
  except Exception:
@@ -31,8 +34,11 @@ def _clean_download_locks() -> int:
31
34
 
32
35
 
33
36
  def _occasionally_clean_download_locks():
34
- if random.random() < 0.005: # do this about every 200 downloads
35
- # random.random is considered to be very fast, and we have no need of cryptographic quality.
37
+ global _LAST_CLEANED_BY_THIS_PROCESS
38
+ # do this about once an hour
39
+ if time.monotonic() > _LAST_CLEANED_BY_THIS_PROCESS + _CLEAN_UP_LOCKFILES_EVERY:
40
+ _LAST_CLEANED_BY_THIS_PROCESS = time.monotonic()
41
+ # minor race condition with other threads but it doesn't really matter.
36
42
  _clean_download_locks()
37
43
 
38
44
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: thds.adls
3
- Version: 3.1.20250407135823
3
+ Version: 3.1.20250408202524
4
4
  Summary: ADLS tools
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  License: MIT
@@ -7,8 +7,8 @@ thds/adls/conf.py,sha256=q1SPrgb46NpobVzwt_Oyv71-BvsIbZLq9nRWS3LZjz0,1990
7
7
  thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
8
8
  thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
9
9
  thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
10
- thds/adls/download.py,sha256=JUF-J6bfaSzDM389errg8tHPssGt-K1yp3O_pznls3o,16852
11
- thds/adls/download_lock.py,sha256=_JZj-kjCUfHk9FvrmEuYpJYknmbam5eReFhGNDgzdLQ,2520
10
+ thds/adls/download.py,sha256=ds8makvLOiC8LibHzb_X85XLaSPGMCNxG7ZL7bEVzCI,17584
11
+ thds/adls/download_lock.py,sha256=ttD2GhPNRnITNoV1XH2PvKbMsHppZirjy3RZ4P4kgKM,2826
12
12
  thds/adls/errors.py,sha256=B_rMsQvQnNmP_sf-x8kmGsv2vIeOh4G9kVbdNVyk350,1469
13
13
  thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
14
14
  thds/adls/file_properties.py,sha256=V3VEjEG3PNyeQaJ159Kco5l7c2EiyjE0yygtmVVjd6E,1597
@@ -33,8 +33,8 @@ thds/adls/resource/up_down.py,sha256=3uNlTvm2gVhSyYdQTBwsGecOgwtINQfINckR-awwV0Y
33
33
  thds/adls/tools/download.py,sha256=vvBO8lSDl9oPugv75qpCkoemT9pOM9BV6yeExlkyG08,1594
34
34
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
35
35
  thds/adls/tools/upload.py,sha256=eMk4pdug1aCMPDDWpIE3Zoq77i5APp9Uuh-sVCCDNJE,493
36
- thds_adls-3.1.20250407135823.dist-info/METADATA,sha256=uMWMLqoD4fkF67t6XXhyJ4h5YgaNCWHJlxJmCJIWATc,548
37
- thds_adls-3.1.20250407135823.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
38
- thds_adls-3.1.20250407135823.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
39
- thds_adls-3.1.20250407135823.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
40
- thds_adls-3.1.20250407135823.dist-info/RECORD,,
36
+ thds_adls-3.1.20250408202524.dist-info/METADATA,sha256=uN4O6WEdsKfOuz340C5XnDYiuQWvWQcDnY9f5SRrBRY,548
37
+ thds_adls-3.1.20250408202524.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
38
+ thds_adls-3.1.20250408202524.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
39
+ thds_adls-3.1.20250408202524.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
40
+ thds_adls-3.1.20250408202524.dist-info/RECORD,,