thds.adls 4.1.20250703020842__py3-none-any.whl → 4.1.20250709185906__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/download.py +14 -2
- thds/adls/errors.py +4 -0
- thds/adls/impl.py +19 -4
- {thds_adls-4.1.20250703020842.dist-info → thds_adls-4.1.20250709185906.dist-info}/METADATA +1 -1
- {thds_adls-4.1.20250703020842.dist-info → thds_adls-4.1.20250709185906.dist-info}/RECORD +8 -8
- {thds_adls-4.1.20250703020842.dist-info → thds_adls-4.1.20250709185906.dist-info}/WHEEL +0 -0
- {thds_adls-4.1.20250703020842.dist-info → thds_adls-4.1.20250709185906.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.1.20250703020842.dist-info → thds_adls-4.1.20250709185906.dist-info}/top_level.txt +0 -0
thds/adls/download.py
CHANGED
|
@@ -320,6 +320,18 @@ def _excs_to_retry() -> ty.Callable[[Exception], bool]:
|
|
|
320
320
|
)
|
|
321
321
|
|
|
322
322
|
|
|
323
|
+
def _log_nonfatal_hash_error_exc(exc: Exception, url: str) -> None:
|
|
324
|
+
"""Azure exceptions are very noisy."""
|
|
325
|
+
msg = "Unable to set hash for %s: %s"
|
|
326
|
+
exception_txt = str(exc)
|
|
327
|
+
log, extra_txt = (
|
|
328
|
+
(logger.debug, type(exc).__name__)
|
|
329
|
+
if ("AuthorizationPermissionMismatch" in exception_txt or "ConditionNotMet" in exception_txt)
|
|
330
|
+
else (logger.warning, exception_txt)
|
|
331
|
+
)
|
|
332
|
+
log(msg, url, extra_txt)
|
|
333
|
+
|
|
334
|
+
|
|
323
335
|
@_dl_scope.bound
|
|
324
336
|
def download_or_use_verified(
|
|
325
337
|
fs_client: FileSystemClient,
|
|
@@ -362,7 +374,7 @@ def download_or_use_verified(
|
|
|
362
374
|
assert file_properties
|
|
363
375
|
dl_file_client.set_metadata(meta, **etag.match_etag(file_properties))
|
|
364
376
|
except (HttpResponseError, ResourceModifiedError) as ex:
|
|
365
|
-
|
|
377
|
+
_log_nonfatal_hash_error_exc(ex, dl_file_client.url)
|
|
366
378
|
return si.value.hit
|
|
367
379
|
except AzureError as err:
|
|
368
380
|
errors.translate_azure_error(fs_client, remote_key, err)
|
|
@@ -420,7 +432,7 @@ async def async_download_or_use_verified(
|
|
|
420
432
|
await dl_file_client.set_metadata(meta, **etag.match_etag(file_properties)) # type: ignore[misc]
|
|
421
433
|
# TODO - check above type ignore
|
|
422
434
|
except (HttpResponseError, ResourceModifiedError) as ex:
|
|
423
|
-
|
|
435
|
+
_log_nonfatal_hash_error_exc(ex, dl_file_client.url)
|
|
424
436
|
return si.value.hit
|
|
425
437
|
except AzureError as err:
|
|
426
438
|
errors.translate_azure_error(fs_client, remote_key, err)
|
thds/adls/errors.py
CHANGED
|
@@ -27,6 +27,10 @@ class ContentLengthMismatchError(BlobPropertiesValidationError):
|
|
|
27
27
|
"""Raised when the content length of a file does not match the expected value as retrieved from the server."""
|
|
28
28
|
|
|
29
29
|
|
|
30
|
+
class NotADirectoryError(ValueError):
|
|
31
|
+
"""Raised when a path was expected to be a directory but is not."""
|
|
32
|
+
|
|
33
|
+
|
|
30
34
|
def is_blob_not_found(exc: Exception) -> bool:
|
|
31
35
|
return (isinstance(exc, HttpResponseError) and exc.status_code == 404) or isinstance(
|
|
32
36
|
exc, BlobNotFoundError
|
thds/adls/impl.py
CHANGED
|
@@ -34,7 +34,7 @@ from thds.core import lazy, log
|
|
|
34
34
|
from ._upload import async_upload_decision_and_metadata
|
|
35
35
|
from .conf import CONNECTION_TIMEOUT, UPLOAD_CHUNK_SIZE
|
|
36
36
|
from .download import async_download_or_use_verified
|
|
37
|
-
from .errors import translate_azure_error
|
|
37
|
+
from .errors import NotADirectoryError, translate_azure_error
|
|
38
38
|
from .file_properties import is_directory
|
|
39
39
|
from .ro_cache import from_cache_path_to_local, global_cache
|
|
40
40
|
from .shared_credential import get_credential_kwargs
|
|
@@ -247,9 +247,10 @@ class ADLSFileSystem:
|
|
|
247
247
|
"""
|
|
248
248
|
# normalize remote path to a standard relative dir path -
|
|
249
249
|
# this ensures correctness of strip_prefix() below
|
|
250
|
-
|
|
250
|
+
stripped_remote_path = remote_path.strip("/")
|
|
251
|
+
remote_path = stripped_remote_path + "/"
|
|
251
252
|
dir_path = self._local_path_for(remote_path, local_path)
|
|
252
|
-
|
|
253
|
+
made_dir = False
|
|
253
254
|
path_filter_ = _true if path_filter is None else path_filter
|
|
254
255
|
|
|
255
256
|
# remove the remote directory prefix to determine a relative path for creation under dir_path
|
|
@@ -262,8 +263,22 @@ class ADLSFileSystem:
|
|
|
262
263
|
if not path.is_directory and path_filter_(path)
|
|
263
264
|
)
|
|
264
265
|
|
|
266
|
+
# shim generator to check for file vs directory, to prevent confusing errors that happen lower down
|
|
267
|
+
async def validated_paths() -> AsyncIterator[PathPair]:
|
|
268
|
+
async for path_pair in paths:
|
|
269
|
+
if path_pair.remote_path == stripped_remote_path:
|
|
270
|
+
raise NotADirectoryError(
|
|
271
|
+
f"Path '{stripped_remote_path}' points to a file, not a directory. "
|
|
272
|
+
f"Use fetch_file() instead."
|
|
273
|
+
)
|
|
274
|
+
nonlocal made_dir
|
|
275
|
+
if not made_dir:
|
|
276
|
+
dir_path.mkdir(exist_ok=True, parents=True)
|
|
277
|
+
made_dir = True
|
|
278
|
+
yield path_pair
|
|
279
|
+
|
|
265
280
|
local_paths = []
|
|
266
|
-
async for batch in self._async_batch(
|
|
281
|
+
async for batch in self._async_batch(validated_paths(), batch_size):
|
|
267
282
|
local_paths.extend(
|
|
268
283
|
await asyncio.gather(
|
|
269
284
|
*[
|
|
@@ -7,15 +7,15 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
|
|
|
7
7
|
thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
|
|
8
8
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
9
9
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
10
|
-
thds/adls/download.py,sha256=
|
|
10
|
+
thds/adls/download.py,sha256=WOpMXGUbWImBdkM4tSW7qnCbu7G_cRXKF5pFQVLPPxs,18772
|
|
11
11
|
thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
|
|
12
|
-
thds/adls/errors.py,sha256=
|
|
12
|
+
thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
|
|
13
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
14
14
|
thds/adls/file_properties.py,sha256=C9Kl3a5wuBNWYgZYnZbkH04u8uxadEcjVJIm3UevUM0,1912
|
|
15
15
|
thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
|
|
16
16
|
thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
|
|
17
17
|
thds/adls/hashes.py,sha256=RDQS-C38wskUhxXGFGLJ4ox8vm7ofurxSsUk13Ywijo,5309
|
|
18
|
-
thds/adls/impl.py,sha256=
|
|
18
|
+
thds/adls/impl.py,sha256=cNf1vmeS46X_wvyVdDJ8qFfowHn2QwtU5C80BmDtu5Y,43247
|
|
19
19
|
thds/adls/md5.py,sha256=hGT8AIX32VUsnRCbm8cel9OlxAiRrgjwNWQTqRDHM_k,374
|
|
20
20
|
thds/adls/named_roots.py,sha256=7SLbAoQQpV_mrFZaUPjYoS-F9dxQxN5Hg4M3YPirF_w,751
|
|
21
21
|
thds/adls/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -35,8 +35,8 @@ thds/adls/azcopy/upload.py,sha256=0l5FzV9IgZ2iQhm4eKZjTdw4SO17bHd8VnwcTev1lUs,27
|
|
|
35
35
|
thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
|
|
36
36
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
37
37
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
38
|
-
thds_adls-4.1.
|
|
39
|
-
thds_adls-4.1.
|
|
40
|
-
thds_adls-4.1.
|
|
41
|
-
thds_adls-4.1.
|
|
42
|
-
thds_adls-4.1.
|
|
38
|
+
thds_adls-4.1.20250709185906.dist-info/METADATA,sha256=xmQtReaLdgxixhJlbPzjQGhgClkAvRB15p2vSGi_BtY,587
|
|
39
|
+
thds_adls-4.1.20250709185906.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
thds_adls-4.1.20250709185906.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
41
|
+
thds_adls-4.1.20250709185906.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
42
|
+
thds_adls-4.1.20250709185906.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-4.1.20250703020842.dist-info → thds_adls-4.1.20250709185906.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.1.20250703020842.dist-info → thds_adls-4.1.20250709185906.dist-info}/top_level.txt
RENAMED
|
File without changes
|