thds.adls 4.1.20250701190349__py3-none-any.whl → 4.1.20250703020842__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

@@ -29,7 +29,7 @@ logger = log.getLogger(__name__)
29
29
  @dataclass
30
30
  class DownloadRequest:
31
31
  temp_path: Path
32
- size_bytes: int
32
+ size_bytes: ty.Optional[int]
33
33
 
34
34
 
35
35
  @dataclass
@@ -73,19 +73,24 @@ def sync_fastpath(
73
73
  env=system_resources.restrict_usage(),
74
74
  )
75
75
  assert process.stdout
76
- with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes) as track:
76
+ output_lines = list()
77
+ with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes or 0) as track:
77
78
  for line in process.stdout:
78
79
  track(line)
80
+ output_lines.append(line.strip())
79
81
 
80
82
  process.wait()
81
83
  if process.returncode != 0:
82
- raise subprocess.SubprocessError(f"AzCopy failed with return code {process.returncode}")
84
+ raise subprocess.CalledProcessError(
85
+ process.returncode,
86
+ f"AzCopy failed with return code {process.returncode}\n\n" + "\n".join(output_lines),
87
+ )
83
88
  assert (
84
89
  download_request.temp_path.exists()
85
90
  ), f"AzCopy did not create the file at {download_request.temp_path}"
86
- return # success
91
+ return
87
92
 
88
- except (subprocess.CalledProcessError, FileNotFoundError):
93
+ except (subprocess.SubprocessError, FileNotFoundError):
89
94
  logger.warning("Falling back to Python SDK for download")
90
95
 
91
96
  logger.debug("Downloading %s using Python SDK", dl_file_client.url)
@@ -121,19 +126,24 @@ async def async_fastpath(
121
126
  assert copy_proc.stdout
122
127
 
123
128
  # Feed lines to the tracker asynchronously
124
- with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes) as track:
129
+ output_lines = list()
130
+ with progress.azcopy_tracker(dl_file_client.url, download_request.size_bytes or 0) as track:
125
131
  while True:
126
132
  line = await copy_proc.stdout.readline()
127
133
  if not line: # EOF
128
134
  break
129
135
  track(line.decode().strip())
136
+ output_lines.append(line.decode().strip())
130
137
 
131
138
  # Wait for process completion
132
139
  exit_code = await copy_proc.wait()
133
140
  if exit_code != 0:
134
- raise subprocess.SubprocessError()
141
+ raise subprocess.CalledProcessError(
142
+ exit_code,
143
+ f"AzCopy failed with return code {exit_code}\n\n" + "\n".join(output_lines),
144
+ )
135
145
 
136
- return # success
146
+ return
137
147
 
138
148
  except (subprocess.SubprocessError, FileNotFoundError):
139
149
  logger.warning("Falling back to Python SDK for download")
@@ -86,10 +86,15 @@ def run(
86
86
  env=system_resources.restrict_usage(),
87
87
  )
88
88
  assert process.stdout
89
+ output_lines = list()
89
90
  with progress.azcopy_tracker(uri.to_blob_windows_url(dest), size_bytes) as track:
90
91
  for line in process.stdout:
91
92
  track(line)
93
+ output_lines.append(line.strip())
92
94
 
93
95
  process.wait()
94
96
  if process.returncode != 0:
95
- raise subprocess.SubprocessError(f"AzCopy failed with return code {process.returncode}")
97
+ raise subprocess.CalledProcessError(
98
+ process.returncode,
99
+ f"AzCopy failed with return code {process.returncode}\n\n" + "\n".join(output_lines),
100
+ )
thds/adls/download.py CHANGED
@@ -7,6 +7,7 @@ import typing as ty
7
7
  from pathlib import Path
8
8
 
9
9
  import aiohttp.http_exceptions
10
+ import requests.exceptions
10
11
  from azure.core.exceptions import AzureError, HttpResponseError, ResourceModifiedError
11
12
  from azure.storage.filedatalake import DataLakeFileClient, FileProperties, FileSystemClient, aio
12
13
 
@@ -22,27 +23,31 @@ from .ro_cache import Cache, from_cache_path_to_local, from_local_path_to_cache
22
23
  logger = log.getLogger(__name__)
23
24
 
24
25
 
26
+ def _check_size(dpath: Path, expected_size: ty.Optional[int]) -> None:
27
+ actual_size = os.path.getsize(dpath)
28
+ if expected_size is not None and actual_size != expected_size:
29
+ raise errors.ContentLengthMismatchError(
30
+ f"Downloaded file {dpath} has size {actual_size} but expected {expected_size}"
31
+ )
32
+
33
+
25
34
  @contextlib.contextmanager
26
35
  def _atomic_download_and_move(
27
36
  fqn: AdlsFqn,
28
37
  dest: StrOrPath,
29
38
  properties: ty.Optional[FileProperties] = None,
30
39
  ) -> ty.Iterator[azcopy.download.DownloadRequest]:
31
- known_size = (properties.size or 0) if properties else 0
40
+ known_size = properties.size if properties else None
32
41
  with tmp.temppath_same_fs(dest) as dpath:
33
42
  logger.debug("Downloading %s", fqn)
34
- if azcopy.download.should_use_azcopy(known_size):
43
+ if azcopy.download.should_use_azcopy(known_size or -1):
35
44
  yield azcopy.download.DownloadRequest(dpath, known_size)
36
45
  else:
37
46
  with open(dpath, "wb") as down_f:
38
47
  yield azcopy.download.SdkDownloadRequest(
39
- dpath, known_size, report_download_progress(down_f, str(fqn), known_size)
48
+ dpath, known_size, report_download_progress(down_f, str(fqn), known_size or 0)
40
49
  )
41
- if known_size and os.path.getsize(dpath) != known_size:
42
- raise errors.ContentLengthMismatchError(
43
- f"Downloaded file {dpath} has size {os.path.getsize(dpath)}"
44
- f" but expected {known_size}."
45
- )
50
+ _check_size(dpath, known_size)
46
51
  try:
47
52
  os.rename(dpath, dest) # will succeed even if dest is read-only
48
53
  except OSError as oserr:
@@ -303,7 +308,7 @@ def _excs_to_retry() -> ty.Callable[[Exception], bool]:
303
308
  filter(
304
309
  None,
305
310
  (
306
- errors.ContentLengthMismatchError,
311
+ requests.exceptions.ConnectionError,
307
312
  aiohttp.http_exceptions.ContentLengthError,
308
313
  aiohttp.client_exceptions.ClientPayloadError,
309
314
  getattr(
@@ -368,6 +373,9 @@ _async_dl_scope = scope.AsyncScope("adls.download.async")
368
373
 
369
374
  @_dl_scope.bound
370
375
  @_async_dl_scope.async_bound
376
+ @fretry.retry_regular_async(
377
+ fretry.is_exc(errors.ContentLengthMismatchError), fretry.iter_to_async(fretry.n_times(2))
378
+ )
371
379
  async def async_download_or_use_verified(
372
380
  fs_client: aio.FileSystemClient,
373
381
  remote_key: str,
@@ -381,9 +389,8 @@ async def async_download_or_use_verified(
381
389
  co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
382
390
  fs_client, remote_key, local_path, expected_hash, cache
383
391
  )
384
- await _async_dl_scope.async_enter(
385
- dl_file_client # type: ignore[arg-type]
386
- ) # on __aexit__, will release the connection to the pool
392
+ await _async_dl_scope.async_enter(dl_file_client) # type: ignore[arg-type]
393
+ # on __aexit__, will release the connection to the pool
387
394
  while True:
388
395
  if co_request == _IoRequest.FILE_PROPERTIES:
389
396
  if not file_properties:
@@ -393,7 +400,6 @@ async def async_download_or_use_verified(
393
400
  co_request = co.send(file_properties)
394
401
  elif isinstance(co_request, azcopy.download.DownloadRequest):
395
402
  # coroutine is requesting download
396
-
397
403
  await fretry.retry_regular_async(
398
404
  _excs_to_retry(), fretry.iter_to_async(fretry.n_times(2))
399
405
  )(
@@ -16,9 +16,10 @@ def main():
16
16
  help="A fully qualified path to an ADLS location. Accepts adls://, https:// and abfss:// URIs.",
17
17
  )
18
18
  parser.add_argument(
19
- "--copy-to",
20
- "-c",
19
+ "copy_to",
20
+ nargs="?",
21
21
  type=Path,
22
+ default=None,
22
23
  help="This will create a link to the cached download at the specified location",
23
24
  )
24
25
  parser.add_argument(
thds/adls/upload.py CHANGED
@@ -32,8 +32,11 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
32
32
  @scope.bound
33
33
  def _try_write_through() -> bool:
34
34
  if isinstance(data, Path) and data.exists():
35
+ # we don't do hard or soft links because they share file permissions,
36
+ # and it's not up to us to change permissions on the src file.
35
37
  link.link_or_copy(data, local_cache_path, "ref")
36
38
  return True
39
+
37
40
  out = scope.enter(tmp.temppath_same_fs(local_cache_path))
38
41
  if hasattr(data, "read") and hasattr(data, "seek"):
39
42
  with open(out, "wb") as f:
@@ -41,11 +44,13 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
41
44
  data.seek(0) # type: ignore
42
45
  link.link_or_copy(out, local_cache_path)
43
46
  return True
47
+
44
48
  if isinstance(data, bytes):
45
49
  with open(out, "wb") as f:
46
50
  f.write(data)
47
51
  link.link_or_copy(out, local_cache_path)
48
52
  return True
53
+
49
54
  return False
50
55
 
51
56
  if _try_write_through():
@@ -54,6 +59,7 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
54
59
  # and we don't want to allow anyone to write to its copy.
55
60
  files.set_read_only(local_cache_path)
56
61
  return local_cache_path
62
+
57
63
  except FileNotFoundError:
58
64
  # may have hit a race condition.
59
65
  # don't fail upload just because we couldn't write through the cache.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.adls
3
- Version: 4.1.20250701190349
3
+ Version: 4.1.20250703020842
4
4
  Summary: ADLS tools
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  License: MIT
@@ -7,7 +7,7 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
7
7
  thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
8
8
  thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
9
9
  thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
10
- thds/adls/download.py,sha256=N8JqNqD5ioHsEHcTl2bNJt3Bb187yyvZAXn4xW3flfU,18090
10
+ thds/adls/download.py,sha256=u-ckaExnTMAVUlhMbnxM_urBggbZkgXMQSSfp1czPeQ,18350
11
11
  thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
12
12
  thds/adls/errors.py,sha256=6cLg2E4SB8ic46PBzA3ynRH4b1oR8qRb07RBgKGJRxY,1783
13
13
  thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
@@ -24,19 +24,19 @@ thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
24
24
  thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
25
25
  thds/adls/source.py,sha256=8HVMYuxDn1XYGwFFSBowMlvQ6r2Jm2CQlpu4h85JvsE,2559
26
26
  thds/adls/source_tree.py,sha256=yP_v2XrKxXqUOdZ-x8kqHhBFAuur3AlAq3zi4hHj4AE,2235
27
- thds/adls/upload.py,sha256=gS_S66gorzdW83eavPUVJ3UYrv5u3HnftDXjdwEZOo8,6441
27
+ thds/adls/upload.py,sha256=MRHK9Am-x5FKBPh1SXLTbPC1r0Xk0bGWNU8CcNuUMLo,6602
28
28
  thds/adls/uri.py,sha256=9MXuW_KfpPvzBc4ERxuTJ3vvi_6yr7e1kMAW9mx2zXM,1414
29
29
  thds/adls/azcopy/__init__.py,sha256=qn2dmT92EHcrtaQ8uwRoUgvtF6Fu3NQbhZItOBdIBmY,45
30
- thds/adls/azcopy/download.py,sha256=J7QAoBehpxsY58ofgGQur-MtIwM0NEnV9_Cw4i_X3y8,6007
30
+ thds/adls/azcopy/download.py,sha256=MwkUaQTrrXRX9yip_hiLZXzSgGqyTDUxjr0MeXitWuo,6450
31
31
  thds/adls/azcopy/login.py,sha256=923UaewVMPFzkDSgCQsbl-_g7qdFhpXpF0MGNIy3T_A,1538
32
32
  thds/adls/azcopy/progress.py,sha256=K7TVmSiWfu561orL3GuOnlQX9VtVxWVECAq9NiweYNo,1387
33
33
  thds/adls/azcopy/system_resources.py,sha256=okgDEKAp0oWGQF7OKikbgJ9buBeiOgNaDYy-36j6dHo,761
34
- thds/adls/azcopy/upload.py,sha256=bvtYdbaFsZkOHFLDpeBlTKqw63P3_kbImInI04ZlekM,2601
35
- thds/adls/tools/download.py,sha256=Dmt-EBZUEF-gVfUcwjAD8VRKR5rhw-oozxl40lZHmdw,1562
34
+ thds/adls/azcopy/upload.py,sha256=0l5FzV9IgZ2iQhm4eKZjTdw4SO17bHd8VnwcTev1lUs,2761
35
+ thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
36
36
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
37
37
  thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
38
- thds_adls-4.1.20250701190349.dist-info/METADATA,sha256=gJNup1vZrpFp-0nor96kwmz__Ij_Zc5pkytWoEslYMU,587
39
- thds_adls-4.1.20250701190349.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- thds_adls-4.1.20250701190349.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
41
- thds_adls-4.1.20250701190349.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
42
- thds_adls-4.1.20250701190349.dist-info/RECORD,,
38
+ thds_adls-4.1.20250703020842.dist-info/METADATA,sha256=zWALY04Mrs1zZNpje2AyaiU-B6tN7La5ldtAbVEXxac,587
39
+ thds_adls-4.1.20250703020842.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ thds_adls-4.1.20250703020842.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
41
+ thds_adls-4.1.20250703020842.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
42
+ thds_adls-4.1.20250703020842.dist-info/RECORD,,