thds.adls 4.1.20250722200042__py3-none-any.whl → 4.1.20250722213940__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/__init__.py +1 -1
- thds/adls/download.py +9 -15
- thds/adls/global_client.py +5 -5
- {thds_adls-4.1.20250722200042.dist-info → thds_adls-4.1.20250722213940.dist-info}/METADATA +1 -1
- {thds_adls-4.1.20250722200042.dist-info → thds_adls-4.1.20250722213940.dist-info}/RECORD +8 -9
- thds/adls/_fork_protector.py +0 -38
- {thds_adls-4.1.20250722200042.dist-info → thds_adls-4.1.20250722213940.dist-info}/WHEEL +0 -0
- {thds_adls-4.1.20250722200042.dist-info → thds_adls-4.1.20250722213940.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.1.20250722200042.dist-info → thds_adls-4.1.20250722213940.dist-info}/top_level.txt +0 -0
thds/adls/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ from .cached import download_directory, download_to_cache, upload_through_cache
|
|
|
5
5
|
from .copy import copy_file, copy_files, wait_for_copy # noqa: F401
|
|
6
6
|
from .errors import BlobNotFoundError # noqa: F401
|
|
7
7
|
from .fqn import * # noqa: F401,F403
|
|
8
|
-
from .global_client import get_global_fs_client # noqa: F401
|
|
8
|
+
from .global_client import get_global_client, get_global_fs_client # noqa: F401
|
|
9
9
|
from .impl import * # noqa: F401,F403
|
|
10
10
|
from .ro_cache import Cache, global_cache # noqa: F401
|
|
11
11
|
from .upload import upload # noqa: F401
|
thds/adls/download.py
CHANGED
|
@@ -158,12 +158,6 @@ IoRequest = ty.Union[_IoRequest, azcopy.download.DownloadRequest]
|
|
|
158
158
|
IoResponse = ty.Union[FileProperties, None]
|
|
159
159
|
|
|
160
160
|
|
|
161
|
-
def _assert_fp(fp: ty.Optional[FileProperties], fqn: AdlsFqn) -> None:
|
|
162
|
-
assert fp, f"FileProperties for {fqn} should not be None."
|
|
163
|
-
assert fp.name, f"FileProperties for {fqn} should have a name."
|
|
164
|
-
assert fp.name == fqn.path, (fp, fqn)
|
|
165
|
-
|
|
166
|
-
|
|
167
161
|
_dl_scope = scope.Scope("adls.download")
|
|
168
162
|
|
|
169
163
|
|
|
@@ -224,7 +218,6 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
|
|
|
224
218
|
# expectations from ADLS itself.
|
|
225
219
|
file_properties = yield _IoRequest.FILE_PROPERTIES
|
|
226
220
|
if file_properties:
|
|
227
|
-
_assert_fp(file_properties, fqn)
|
|
228
221
|
# critically, we expect the _first_ one in this list to be the fastest to verify.
|
|
229
222
|
expected_hash = next(iter(hashes.extract_hashes_from_props(file_properties).values()), None)
|
|
230
223
|
|
|
@@ -254,7 +247,6 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
|
|
|
254
247
|
|
|
255
248
|
logger.debug("Unable to find a cached version anywhere that we looked...")
|
|
256
249
|
file_properties = yield _IoRequest.FILE_PROPERTIES
|
|
257
|
-
_assert_fp(file_properties, fqn)
|
|
258
250
|
|
|
259
251
|
# if any of the remote hashes match the expected hash, verify that one.
|
|
260
252
|
# otherwise, verify the first remote hash in the list, since that's the fastest one.
|
|
@@ -294,14 +286,19 @@ def _prep_download_coroutine(
|
|
|
294
286
|
local_path: StrOrPath,
|
|
295
287
|
expected_hash: ty.Optional[hashing.Hash] = None,
|
|
296
288
|
cache: ty.Optional[Cache] = None,
|
|
297
|
-
) -> ty.Tuple[
|
|
289
|
+
) -> ty.Tuple[
|
|
290
|
+
ty.Generator[IoRequest, IoResponse, _FileResult],
|
|
291
|
+
IoRequest,
|
|
292
|
+
ty.Optional[FileProperties],
|
|
293
|
+
DataLakeFileClient,
|
|
294
|
+
]:
|
|
298
295
|
co = _download_or_use_verified_cached_coroutine(
|
|
299
296
|
AdlsFqn(ty.cast(str, fs_client.account_name), fs_client.file_system_name, remote_key),
|
|
300
297
|
local_path,
|
|
301
298
|
expected_hash=expected_hash,
|
|
302
299
|
cache=cache,
|
|
303
300
|
)
|
|
304
|
-
return co, co.send(None), fs_client.get_file_client(remote_key)
|
|
301
|
+
return co, co.send(None), None, fs_client.get_file_client(remote_key)
|
|
305
302
|
|
|
306
303
|
|
|
307
304
|
def _excs_to_retry() -> ty.Callable[[Exception], bool]:
|
|
@@ -336,7 +333,6 @@ def _log_nonfatal_hash_error_exc(exc: Exception, url: str) -> None:
|
|
|
336
333
|
|
|
337
334
|
|
|
338
335
|
@_dl_scope.bound
|
|
339
|
-
@fretry.retry_regular(fretry.is_exc(errors.ContentLengthMismatchError), fretry.n_times(2))
|
|
340
336
|
def download_or_use_verified(
|
|
341
337
|
fs_client: FileSystemClient,
|
|
342
338
|
remote_key: str,
|
|
@@ -352,16 +348,14 @@ def download_or_use_verified(
|
|
|
352
348
|
"""
|
|
353
349
|
file_properties = None
|
|
354
350
|
try:
|
|
355
|
-
co, co_request, dl_file_client = _prep_download_coroutine(
|
|
351
|
+
co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
|
|
356
352
|
fs_client, remote_key, local_path, expected_hash, cache
|
|
357
353
|
)
|
|
358
|
-
assert dl_file_client.path_name == remote_key
|
|
359
354
|
_dl_scope.enter(dl_file_client) # on __exit__, will release the connection to the pool
|
|
360
355
|
while True:
|
|
361
356
|
if co_request == _IoRequest.FILE_PROPERTIES:
|
|
362
357
|
if not file_properties:
|
|
363
358
|
# only fetch these if they haven't already been requested
|
|
364
|
-
assert dl_file_client.path_name == remote_key
|
|
365
359
|
file_properties = dl_file_client.get_file_properties()
|
|
366
360
|
co_request = co.send(file_properties)
|
|
367
361
|
elif isinstance(co_request, azcopy.download.DownloadRequest):
|
|
@@ -404,7 +398,7 @@ async def async_download_or_use_verified(
|
|
|
404
398
|
) -> ty.Optional[Path]:
|
|
405
399
|
file_properties = None
|
|
406
400
|
try:
|
|
407
|
-
co, co_request, dl_file_client = _prep_download_coroutine(
|
|
401
|
+
co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
|
|
408
402
|
fs_client, remote_key, local_path, expected_hash, cache
|
|
409
403
|
)
|
|
410
404
|
await _async_dl_scope.async_enter(dl_file_client) # type: ignore[arg-type]
|
thds/adls/global_client.py
CHANGED
|
@@ -4,7 +4,7 @@ from azure.storage.filedatalake import DataLakeServiceClient, FileSystemClient
|
|
|
4
4
|
|
|
5
5
|
from thds.core import cache, config
|
|
6
6
|
|
|
7
|
-
from . import
|
|
7
|
+
from . import conf
|
|
8
8
|
from .shared_credential import SharedCredential
|
|
9
9
|
|
|
10
10
|
DEFAULT_CONNECTION_POOL_SIZE = config.item("default_connection_pool_size", default=100, parse=int)
|
|
@@ -53,7 +53,9 @@ best approach for all applications.
|
|
|
53
53
|
This avoids creating a client at a module level and is
|
|
54
54
|
thread-safe.
|
|
55
55
|
"""
|
|
56
|
-
|
|
56
|
+
get_global_client = cache.locking(adls_fs_client)
|
|
57
|
+
# deprecated name - prefer get_global_fs_client
|
|
58
|
+
get_global_fs_client = get_global_client
|
|
57
59
|
|
|
58
60
|
|
|
59
61
|
def adls_blob_service_client(
|
|
@@ -82,6 +84,4 @@ def adls_blob_container_client(
|
|
|
82
84
|
return get_global_blob_service_client(storage_account, connpool_size).get_container_client(container)
|
|
83
85
|
|
|
84
86
|
|
|
85
|
-
get_global_blob_container_client =
|
|
86
|
-
cache.locking, adls_blob_container_client
|
|
87
|
-
)
|
|
87
|
+
get_global_blob_container_client = cache.locking(adls_blob_container_client)
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
thds/adls/__init__.py,sha256=
|
|
2
|
-
thds/adls/_fork_protector.py,sha256=WML1Ul-G_MCziBOyRp6GoNq1IGktn8-OQVjny1IZ1bs,950
|
|
1
|
+
thds/adls/__init__.py,sha256=g2Zb0EAAH-JzPMYHAub9liU4qa5pfqQDnILfEhmObGo,1036
|
|
3
2
|
thds/adls/_progress.py,sha256=D6XIipzG_xwmxs_08LuiYFfThGqHTU2KiIyjNduiOFY,6656
|
|
4
3
|
thds/adls/_upload.py,sha256=mhTdWiQroaugYuwQg7R8CEgdfCYF4xvJthlsqO0jlnE,4692
|
|
5
4
|
thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
|
|
@@ -8,13 +7,13 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
|
|
|
8
7
|
thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
|
|
9
8
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
10
9
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
11
|
-
thds/adls/download.py,sha256=
|
|
10
|
+
thds/adls/download.py,sha256=WOpMXGUbWImBdkM4tSW7qnCbu7G_cRXKF5pFQVLPPxs,18772
|
|
12
11
|
thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
|
|
13
12
|
thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
|
|
14
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
15
14
|
thds/adls/file_properties.py,sha256=dhRtbsMNOYfExkEiy76wrLfrJ6IMQeN1Z3LIxgKceqY,2042
|
|
16
15
|
thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
|
|
17
|
-
thds/adls/global_client.py,sha256=
|
|
16
|
+
thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
|
|
18
17
|
thds/adls/hashes.py,sha256=-wRRATGmww7k2RD5Zmhq_Fq7Z2JihLy1njeHFekU15c,5316
|
|
19
18
|
thds/adls/impl.py,sha256=cNf1vmeS46X_wvyVdDJ8qFfowHn2QwtU5C80BmDtu5Y,43247
|
|
20
19
|
thds/adls/md5.py,sha256=hGT8AIX32VUsnRCbm8cel9OlxAiRrgjwNWQTqRDHM_k,374
|
|
@@ -36,8 +35,8 @@ thds/adls/azcopy/upload.py,sha256=RQLDJzS6qsMM12t5bykWJWBXs0UrmImrEFnPMxX2UlM,27
|
|
|
36
35
|
thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
|
|
37
36
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
38
37
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
39
|
-
thds_adls-4.1.
|
|
40
|
-
thds_adls-4.1.
|
|
41
|
-
thds_adls-4.1.
|
|
42
|
-
thds_adls-4.1.
|
|
43
|
-
thds_adls-4.1.
|
|
38
|
+
thds_adls-4.1.20250722213940.dist-info/METADATA,sha256=4HRrxzhTRvxmMQS3S6cNKEa2PHvvfi1LJyrvF9ISyHI,587
|
|
39
|
+
thds_adls-4.1.20250722213940.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
+
thds_adls-4.1.20250722213940.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
41
|
+
thds_adls-4.1.20250722213940.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
42
|
+
thds_adls-4.1.20250722213940.dist-info/RECORD,,
|
thds/adls/_fork_protector.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import typing as ty
|
|
3
|
-
from functools import partial
|
|
4
|
-
|
|
5
|
-
from typing_extensions import Concatenate, ParamSpec
|
|
6
|
-
|
|
7
|
-
P = ParamSpec("P")
|
|
8
|
-
T = ty.TypeVar("T")
|
|
9
|
-
F = ty.TypeVar("F", bound=ty.Callable)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _pid_swallower(
|
|
13
|
-
func: ty.Callable[P, T],
|
|
14
|
-
pid: int,
|
|
15
|
-
*args: P.args,
|
|
16
|
-
**kwargs: P.kwargs,
|
|
17
|
-
) -> T:
|
|
18
|
-
return func(*args, **kwargs)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _pid_sending_wrapper(
|
|
22
|
-
caching_pid_swallower: ty.Callable[Concatenate[int, P], T],
|
|
23
|
-
*args: P.args,
|
|
24
|
-
**kwargs: P.kwargs,
|
|
25
|
-
) -> T:
|
|
26
|
-
return caching_pid_swallower(os.getpid(), *args, **kwargs)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def fork_safe_cached(
|
|
30
|
-
cache_deco: ty.Callable[[F], F],
|
|
31
|
-
func: ty.Callable[P, T],
|
|
32
|
-
) -> ty.Callable[P, T]:
|
|
33
|
-
"""Decorator to make a fork-safe cached.locking function by wrapping it in a function that
|
|
34
|
-
always calls os.getpid() to invalidate the cache on new processes."""
|
|
35
|
-
return partial(
|
|
36
|
-
_pid_sending_wrapper,
|
|
37
|
-
cache_deco(ty.cast(F, partial(_pid_swallower, func))),
|
|
38
|
-
)
|
|
File without changes
|
{thds_adls-4.1.20250722200042.dist-info → thds_adls-4.1.20250722213940.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.1.20250722200042.dist-info → thds_adls-4.1.20250722213940.dist-info}/top_level.txt
RENAMED
|
File without changes
|