thds.adls 4.1.20250722213940__py3-none-any.whl → 4.1.20250724233711__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/__init__.py +1 -1
- thds/adls/_fork_protector.py +38 -0
- thds/adls/download.py +15 -9
- thds/adls/global_client.py +5 -5
- {thds_adls-4.1.20250722213940.dist-info → thds_adls-4.1.20250724233711.dist-info}/METADATA +1 -1
- {thds_adls-4.1.20250722213940.dist-info → thds_adls-4.1.20250724233711.dist-info}/RECORD +9 -8
- {thds_adls-4.1.20250722213940.dist-info → thds_adls-4.1.20250724233711.dist-info}/WHEEL +0 -0
- {thds_adls-4.1.20250722213940.dist-info → thds_adls-4.1.20250724233711.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.1.20250722213940.dist-info → thds_adls-4.1.20250724233711.dist-info}/top_level.txt +0 -0
thds/adls/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ from .cached import download_directory, download_to_cache, upload_through_cache
|
|
|
5
5
|
from .copy import copy_file, copy_files, wait_for_copy # noqa: F401
|
|
6
6
|
from .errors import BlobNotFoundError # noqa: F401
|
|
7
7
|
from .fqn import * # noqa: F401,F403
|
|
8
|
-
from .global_client import
|
|
8
|
+
from .global_client import get_global_fs_client # noqa: F401
|
|
9
9
|
from .impl import * # noqa: F401,F403
|
|
10
10
|
from .ro_cache import Cache, global_cache # noqa: F401
|
|
11
11
|
from .upload import upload # noqa: F401
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing as ty
|
|
3
|
+
from functools import partial
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Concatenate, ParamSpec
|
|
6
|
+
|
|
7
|
+
P = ParamSpec("P")
|
|
8
|
+
T = ty.TypeVar("T")
|
|
9
|
+
F = ty.TypeVar("F", bound=ty.Callable)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _pid_swallower(
|
|
13
|
+
func: ty.Callable[P, T],
|
|
14
|
+
pid: int,
|
|
15
|
+
*args: P.args,
|
|
16
|
+
**kwargs: P.kwargs,
|
|
17
|
+
) -> T:
|
|
18
|
+
return func(*args, **kwargs)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _pid_sending_wrapper(
|
|
22
|
+
caching_pid_swallower: ty.Callable[Concatenate[int, P], T],
|
|
23
|
+
*args: P.args,
|
|
24
|
+
**kwargs: P.kwargs,
|
|
25
|
+
) -> T:
|
|
26
|
+
return caching_pid_swallower(os.getpid(), *args, **kwargs)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def fork_safe_cached(
|
|
30
|
+
cache_deco: ty.Callable[[F], F],
|
|
31
|
+
func: ty.Callable[P, T],
|
|
32
|
+
) -> ty.Callable[P, T]:
|
|
33
|
+
"""Decorator to make a fork-safe cached.locking function by wrapping it in a function that
|
|
34
|
+
always calls os.getpid() to invalidate the cache on new processes."""
|
|
35
|
+
return partial(
|
|
36
|
+
_pid_sending_wrapper,
|
|
37
|
+
cache_deco(ty.cast(F, partial(_pid_swallower, func))),
|
|
38
|
+
)
|
thds/adls/download.py
CHANGED
|
@@ -158,6 +158,12 @@ IoRequest = ty.Union[_IoRequest, azcopy.download.DownloadRequest]
|
|
|
158
158
|
IoResponse = ty.Union[FileProperties, None]
|
|
159
159
|
|
|
160
160
|
|
|
161
|
+
def _assert_fp(fp: ty.Optional[FileProperties], fqn: AdlsFqn) -> None:
|
|
162
|
+
assert fp, f"FileProperties for {fqn} should not be None."
|
|
163
|
+
assert fp.name, f"FileProperties for {fqn} should have a name."
|
|
164
|
+
assert fp.name == fqn.path, (fp, fqn)
|
|
165
|
+
|
|
166
|
+
|
|
161
167
|
_dl_scope = scope.Scope("adls.download")
|
|
162
168
|
|
|
163
169
|
|
|
@@ -218,6 +224,7 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
|
|
|
218
224
|
# expectations from ADLS itself.
|
|
219
225
|
file_properties = yield _IoRequest.FILE_PROPERTIES
|
|
220
226
|
if file_properties:
|
|
227
|
+
_assert_fp(file_properties, fqn)
|
|
221
228
|
# critically, we expect the _first_ one in this list to be the fastest to verify.
|
|
222
229
|
expected_hash = next(iter(hashes.extract_hashes_from_props(file_properties).values()), None)
|
|
223
230
|
|
|
@@ -247,6 +254,7 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
|
|
|
247
254
|
|
|
248
255
|
logger.debug("Unable to find a cached version anywhere that we looked...")
|
|
249
256
|
file_properties = yield _IoRequest.FILE_PROPERTIES
|
|
257
|
+
_assert_fp(file_properties, fqn)
|
|
250
258
|
|
|
251
259
|
# if any of the remote hashes match the expected hash, verify that one.
|
|
252
260
|
# otherwise, verify the first remote hash in the list, since that's the fastest one.
|
|
@@ -286,19 +294,14 @@ def _prep_download_coroutine(
|
|
|
286
294
|
local_path: StrOrPath,
|
|
287
295
|
expected_hash: ty.Optional[hashing.Hash] = None,
|
|
288
296
|
cache: ty.Optional[Cache] = None,
|
|
289
|
-
) -> ty.Tuple[
|
|
290
|
-
ty.Generator[IoRequest, IoResponse, _FileResult],
|
|
291
|
-
IoRequest,
|
|
292
|
-
ty.Optional[FileProperties],
|
|
293
|
-
DataLakeFileClient,
|
|
294
|
-
]:
|
|
297
|
+
) -> ty.Tuple[ty.Generator[IoRequest, IoResponse, _FileResult], IoRequest, DataLakeFileClient]:
|
|
295
298
|
co = _download_or_use_verified_cached_coroutine(
|
|
296
299
|
AdlsFqn(ty.cast(str, fs_client.account_name), fs_client.file_system_name, remote_key),
|
|
297
300
|
local_path,
|
|
298
301
|
expected_hash=expected_hash,
|
|
299
302
|
cache=cache,
|
|
300
303
|
)
|
|
301
|
-
return co, co.send(None),
|
|
304
|
+
return co, co.send(None), fs_client.get_file_client(remote_key)
|
|
302
305
|
|
|
303
306
|
|
|
304
307
|
def _excs_to_retry() -> ty.Callable[[Exception], bool]:
|
|
@@ -333,6 +336,7 @@ def _log_nonfatal_hash_error_exc(exc: Exception, url: str) -> None:
|
|
|
333
336
|
|
|
334
337
|
|
|
335
338
|
@_dl_scope.bound
|
|
339
|
+
@fretry.retry_regular(fretry.is_exc(errors.ContentLengthMismatchError), fretry.n_times(2))
|
|
336
340
|
def download_or_use_verified(
|
|
337
341
|
fs_client: FileSystemClient,
|
|
338
342
|
remote_key: str,
|
|
@@ -348,14 +352,16 @@ def download_or_use_verified(
|
|
|
348
352
|
"""
|
|
349
353
|
file_properties = None
|
|
350
354
|
try:
|
|
351
|
-
co, co_request,
|
|
355
|
+
co, co_request, dl_file_client = _prep_download_coroutine(
|
|
352
356
|
fs_client, remote_key, local_path, expected_hash, cache
|
|
353
357
|
)
|
|
358
|
+
assert dl_file_client.path_name == remote_key
|
|
354
359
|
_dl_scope.enter(dl_file_client) # on __exit__, will release the connection to the pool
|
|
355
360
|
while True:
|
|
356
361
|
if co_request == _IoRequest.FILE_PROPERTIES:
|
|
357
362
|
if not file_properties:
|
|
358
363
|
# only fetch these if they haven't already been requested
|
|
364
|
+
assert dl_file_client.path_name == remote_key
|
|
359
365
|
file_properties = dl_file_client.get_file_properties()
|
|
360
366
|
co_request = co.send(file_properties)
|
|
361
367
|
elif isinstance(co_request, azcopy.download.DownloadRequest):
|
|
@@ -398,7 +404,7 @@ async def async_download_or_use_verified(
|
|
|
398
404
|
) -> ty.Optional[Path]:
|
|
399
405
|
file_properties = None
|
|
400
406
|
try:
|
|
401
|
-
co, co_request,
|
|
407
|
+
co, co_request, dl_file_client = _prep_download_coroutine(
|
|
402
408
|
fs_client, remote_key, local_path, expected_hash, cache
|
|
403
409
|
)
|
|
404
410
|
await _async_dl_scope.async_enter(dl_file_client) # type: ignore[arg-type]
|
thds/adls/global_client.py
CHANGED
|
@@ -4,7 +4,7 @@ from azure.storage.filedatalake import DataLakeServiceClient, FileSystemClient
|
|
|
4
4
|
|
|
5
5
|
from thds.core import cache, config
|
|
6
6
|
|
|
7
|
-
from . import conf
|
|
7
|
+
from . import _fork_protector, conf
|
|
8
8
|
from .shared_credential import SharedCredential
|
|
9
9
|
|
|
10
10
|
DEFAULT_CONNECTION_POOL_SIZE = config.item("default_connection_pool_size", default=100, parse=int)
|
|
@@ -53,9 +53,7 @@ best approach for all applications.
|
|
|
53
53
|
This avoids creating a client at a module level and is
|
|
54
54
|
thread-safe.
|
|
55
55
|
"""
|
|
56
|
-
|
|
57
|
-
# deprecated name - prefer get_global_fs_client
|
|
58
|
-
get_global_fs_client = get_global_client
|
|
56
|
+
get_global_fs_client = _fork_protector.fork_safe_cached(cache.locking, adls_fs_client)
|
|
59
57
|
|
|
60
58
|
|
|
61
59
|
def adls_blob_service_client(
|
|
@@ -84,4 +82,6 @@ def adls_blob_container_client(
|
|
|
84
82
|
return get_global_blob_service_client(storage_account, connpool_size).get_container_client(container)
|
|
85
83
|
|
|
86
84
|
|
|
87
|
-
get_global_blob_container_client =
|
|
85
|
+
get_global_blob_container_client = _fork_protector.fork_safe_cached(
|
|
86
|
+
cache.locking, adls_blob_container_client
|
|
87
|
+
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
thds/adls/__init__.py,sha256=
|
|
1
|
+
thds/adls/__init__.py,sha256=PL0BRhiLhW_xY_2hhBgd8v3_NS1zpR4Kdd28zjNHBgo,1017
|
|
2
|
+
thds/adls/_fork_protector.py,sha256=WML1Ul-G_MCziBOyRp6GoNq1IGktn8-OQVjny1IZ1bs,950
|
|
2
3
|
thds/adls/_progress.py,sha256=D6XIipzG_xwmxs_08LuiYFfThGqHTU2KiIyjNduiOFY,6656
|
|
3
4
|
thds/adls/_upload.py,sha256=mhTdWiQroaugYuwQg7R8CEgdfCYF4xvJthlsqO0jlnE,4692
|
|
4
5
|
thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
|
|
@@ -7,13 +8,13 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
|
|
|
7
8
|
thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
|
|
8
9
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
9
10
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
10
|
-
thds/adls/download.py,sha256=
|
|
11
|
+
thds/adls/download.py,sha256=VmvkI3c0bAxVF2B7dBqHpSL18a701ddFNFCwpJSfdF4,19223
|
|
11
12
|
thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
|
|
12
13
|
thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
|
|
13
14
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
14
15
|
thds/adls/file_properties.py,sha256=dhRtbsMNOYfExkEiy76wrLfrJ6IMQeN1Z3LIxgKceqY,2042
|
|
15
16
|
thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
|
|
16
|
-
thds/adls/global_client.py,sha256=
|
|
17
|
+
thds/adls/global_client.py,sha256=q6oG1OOsfl4fbti81u8TE9d4Jx9-phlYgsGSc4032w8,3721
|
|
17
18
|
thds/adls/hashes.py,sha256=-wRRATGmww7k2RD5Zmhq_Fq7Z2JihLy1njeHFekU15c,5316
|
|
18
19
|
thds/adls/impl.py,sha256=cNf1vmeS46X_wvyVdDJ8qFfowHn2QwtU5C80BmDtu5Y,43247
|
|
19
20
|
thds/adls/md5.py,sha256=hGT8AIX32VUsnRCbm8cel9OlxAiRrgjwNWQTqRDHM_k,374
|
|
@@ -35,8 +36,8 @@ thds/adls/azcopy/upload.py,sha256=RQLDJzS6qsMM12t5bykWJWBXs0UrmImrEFnPMxX2UlM,27
|
|
|
35
36
|
thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
|
|
36
37
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
37
38
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
38
|
-
thds_adls-4.1.
|
|
39
|
-
thds_adls-4.1.
|
|
40
|
-
thds_adls-4.1.
|
|
41
|
-
thds_adls-4.1.
|
|
42
|
-
thds_adls-4.1.
|
|
39
|
+
thds_adls-4.1.20250724233711.dist-info/METADATA,sha256=__N8wqRc6f1Ib1mvztYicgsh5F7OISGYwNZcpgtsjZo,587
|
|
40
|
+
thds_adls-4.1.20250724233711.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
41
|
+
thds_adls-4.1.20250724233711.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
42
|
+
thds_adls-4.1.20250724233711.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
43
|
+
thds_adls-4.1.20250724233711.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-4.1.20250722213940.dist-info → thds_adls-4.1.20250724233711.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.1.20250722213940.dist-info → thds_adls-4.1.20250724233711.dist-info}/top_level.txt
RENAMED
|
File without changes
|