thds.adls 4.1.20250722163644__py3-none-any.whl → 4.1.20250722195957__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

thds/adls/__init__.py CHANGED
@@ -5,7 +5,7 @@ from .cached import download_directory, download_to_cache, upload_through_cache
5
5
  from .copy import copy_file, copy_files, wait_for_copy # noqa: F401
6
6
  from .errors import BlobNotFoundError # noqa: F401
7
7
  from .fqn import * # noqa: F401,F403
8
- from .global_client import get_global_client, get_global_fs_client # noqa: F401
8
+ from .global_client import get_global_fs_client # noqa: F401
9
9
  from .impl import * # noqa: F401,F403
10
10
  from .ro_cache import Cache, global_cache # noqa: F401
11
11
  from .upload import upload # noqa: F401
@@ -0,0 +1,38 @@
1
+ import os
2
+ import typing as ty
3
+ from functools import partial
4
+
5
+ from typing_extensions import Concatenate, ParamSpec
6
+
7
+ P = ParamSpec("P")
8
+ T = ty.TypeVar("T")
9
+ F = ty.TypeVar("F", bound=ty.Callable)
10
+
11
+
12
+ def _pid_swallower(
13
+ func: ty.Callable[P, T],
14
+ pid: int,
15
+ *args: P.args,
16
+ **kwargs: P.kwargs,
17
+ ) -> T:
18
+ return func(*args, **kwargs)
19
+
20
+
21
+ def _pid_sending_wrapper(
22
+ caching_pid_swallower: ty.Callable[Concatenate[int, P], T],
23
+ *args: P.args,
24
+ **kwargs: P.kwargs,
25
+ ) -> T:
26
+ return caching_pid_swallower(os.getpid(), *args, **kwargs)
27
+
28
+
29
+ def fork_safe_cached(
30
+ cache_deco: ty.Callable[[F], F],
31
+ func: ty.Callable[P, T],
32
+ ) -> ty.Callable[P, T]:
33
+ """Decorator to make a fork-safe cached.locking function by wrapping it in a function that
34
+ always calls os.getpid() to invalidate the cache on new processes."""
35
+ return partial(
36
+ _pid_sending_wrapper,
37
+ cache_deco(ty.cast(F, partial(_pid_swallower, func))),
38
+ )
thds/adls/download.py CHANGED
@@ -158,6 +158,12 @@ IoRequest = ty.Union[_IoRequest, azcopy.download.DownloadRequest]
158
158
  IoResponse = ty.Union[FileProperties, None]
159
159
 
160
160
 
161
+ def _assert_fp(fp: ty.Optional[FileProperties], fqn: AdlsFqn) -> None:
162
+ assert fp, f"FileProperties for {fqn} should not be None."
163
+ assert fp.name, f"FileProperties for {fqn} should have a name."
164
+ assert fp.name == fqn.path, (fp, fqn)
165
+
166
+
161
167
  _dl_scope = scope.Scope("adls.download")
162
168
 
163
169
 
@@ -218,6 +224,7 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
218
224
  # expectations from ADLS itself.
219
225
  file_properties = yield _IoRequest.FILE_PROPERTIES
220
226
  if file_properties:
227
+ _assert_fp(file_properties, fqn)
221
228
  # critically, we expect the _first_ one in this list to be the fastest to verify.
222
229
  expected_hash = next(iter(hashes.extract_hashes_from_props(file_properties).values()), None)
223
230
 
@@ -247,6 +254,7 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
247
254
 
248
255
  logger.debug("Unable to find a cached version anywhere that we looked...")
249
256
  file_properties = yield _IoRequest.FILE_PROPERTIES
257
+ _assert_fp(file_properties, fqn)
250
258
 
251
259
  # if any of the remote hashes match the expected hash, verify that one.
252
260
  # otherwise, verify the first remote hash in the list, since that's the fastest one.
@@ -286,19 +294,14 @@ def _prep_download_coroutine(
286
294
  local_path: StrOrPath,
287
295
  expected_hash: ty.Optional[hashing.Hash] = None,
288
296
  cache: ty.Optional[Cache] = None,
289
- ) -> ty.Tuple[
290
- ty.Generator[IoRequest, IoResponse, _FileResult],
291
- IoRequest,
292
- ty.Optional[FileProperties],
293
- DataLakeFileClient,
294
- ]:
297
+ ) -> ty.Tuple[ty.Generator[IoRequest, IoResponse, _FileResult], IoRequest, DataLakeFileClient]:
295
298
  co = _download_or_use_verified_cached_coroutine(
296
299
  AdlsFqn(ty.cast(str, fs_client.account_name), fs_client.file_system_name, remote_key),
297
300
  local_path,
298
301
  expected_hash=expected_hash,
299
302
  cache=cache,
300
303
  )
301
- return co, co.send(None), None, fs_client.get_file_client(remote_key)
304
+ return co, co.send(None), fs_client.get_file_client(remote_key)
302
305
 
303
306
 
304
307
  def _excs_to_retry() -> ty.Callable[[Exception], bool]:
@@ -333,6 +336,7 @@ def _log_nonfatal_hash_error_exc(exc: Exception, url: str) -> None:
333
336
 
334
337
 
335
338
  @_dl_scope.bound
339
+ @fretry.retry_regular(fretry.is_exc(errors.ContentLengthMismatchError), fretry.n_times(2))
336
340
  def download_or_use_verified(
337
341
  fs_client: FileSystemClient,
338
342
  remote_key: str,
@@ -348,14 +352,16 @@ def download_or_use_verified(
348
352
  """
349
353
  file_properties = None
350
354
  try:
351
- co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
355
+ co, co_request, dl_file_client = _prep_download_coroutine(
352
356
  fs_client, remote_key, local_path, expected_hash, cache
353
357
  )
358
+ assert dl_file_client.path_name == remote_key
354
359
  _dl_scope.enter(dl_file_client) # on __exit__, will release the connection to the pool
355
360
  while True:
356
361
  if co_request == _IoRequest.FILE_PROPERTIES:
357
362
  if not file_properties:
358
363
  # only fetch these if they haven't already been requested
364
+ assert dl_file_client.path_name == remote_key
359
365
  file_properties = dl_file_client.get_file_properties()
360
366
  co_request = co.send(file_properties)
361
367
  elif isinstance(co_request, azcopy.download.DownloadRequest):
@@ -398,7 +404,7 @@ async def async_download_or_use_verified(
398
404
  ) -> ty.Optional[Path]:
399
405
  file_properties = None
400
406
  try:
401
- co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
407
+ co, co_request, dl_file_client = _prep_download_coroutine(
402
408
  fs_client, remote_key, local_path, expected_hash, cache
403
409
  )
404
410
  await _async_dl_scope.async_enter(dl_file_client) # type: ignore[arg-type]
@@ -4,7 +4,7 @@ from azure.storage.filedatalake import DataLakeServiceClient, FileSystemClient
4
4
 
5
5
  from thds.core import cache, config
6
6
 
7
- from . import conf
7
+ from . import _fork_protector, conf
8
8
  from .shared_credential import SharedCredential
9
9
 
10
10
  DEFAULT_CONNECTION_POOL_SIZE = config.item("default_connection_pool_size", default=100, parse=int)
@@ -53,9 +53,7 @@ best approach for all applications.
53
53
  This avoids creating a client at a module level and is
54
54
  thread-safe.
55
55
  """
56
- get_global_client = cache.locking(adls_fs_client)
57
- # deprecated name - prefer get_global_fs_client
58
- get_global_fs_client = get_global_client
56
+ get_global_fs_client = _fork_protector.fork_safe_cached(cache.locking, adls_fs_client)
59
57
 
60
58
 
61
59
  def adls_blob_service_client(
@@ -84,4 +82,6 @@ def adls_blob_container_client(
84
82
  return get_global_blob_service_client(storage_account, connpool_size).get_container_client(container)
85
83
 
86
84
 
87
- get_global_blob_container_client = cache.locking(adls_blob_container_client)
85
+ get_global_blob_container_client = _fork_protector.fork_safe_cached(
86
+ cache.locking, adls_blob_container_client
87
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.adls
3
- Version: 4.1.20250722163644
3
+ Version: 4.1.20250722195957
4
4
  Summary: ADLS tools
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  License: MIT
@@ -1,4 +1,5 @@
1
- thds/adls/__init__.py,sha256=g2Zb0EAAH-JzPMYHAub9liU4qa5pfqQDnILfEhmObGo,1036
1
+ thds/adls/__init__.py,sha256=PL0BRhiLhW_xY_2hhBgd8v3_NS1zpR4Kdd28zjNHBgo,1017
2
+ thds/adls/_fork_protector.py,sha256=WML1Ul-G_MCziBOyRp6GoNq1IGktn8-OQVjny1IZ1bs,950
2
3
  thds/adls/_progress.py,sha256=D6XIipzG_xwmxs_08LuiYFfThGqHTU2KiIyjNduiOFY,6656
3
4
  thds/adls/_upload.py,sha256=mhTdWiQroaugYuwQg7R8CEgdfCYF4xvJthlsqO0jlnE,4692
4
5
  thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
@@ -7,13 +8,13 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
7
8
  thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
8
9
  thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
9
10
  thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
10
- thds/adls/download.py,sha256=WOpMXGUbWImBdkM4tSW7qnCbu7G_cRXKF5pFQVLPPxs,18772
11
+ thds/adls/download.py,sha256=VmvkI3c0bAxVF2B7dBqHpSL18a701ddFNFCwpJSfdF4,19223
11
12
  thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
12
13
  thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
13
14
  thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
14
15
  thds/adls/file_properties.py,sha256=dhRtbsMNOYfExkEiy76wrLfrJ6IMQeN1Z3LIxgKceqY,2042
15
16
  thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
16
- thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
17
+ thds/adls/global_client.py,sha256=q6oG1OOsfl4fbti81u8TE9d4Jx9-phlYgsGSc4032w8,3721
17
18
  thds/adls/hashes.py,sha256=-wRRATGmww7k2RD5Zmhq_Fq7Z2JihLy1njeHFekU15c,5316
18
19
  thds/adls/impl.py,sha256=cNf1vmeS46X_wvyVdDJ8qFfowHn2QwtU5C80BmDtu5Y,43247
19
20
  thds/adls/md5.py,sha256=hGT8AIX32VUsnRCbm8cel9OlxAiRrgjwNWQTqRDHM_k,374
@@ -35,8 +36,8 @@ thds/adls/azcopy/upload.py,sha256=RQLDJzS6qsMM12t5bykWJWBXs0UrmImrEFnPMxX2UlM,27
35
36
  thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
36
37
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
37
38
  thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
38
- thds_adls-4.1.20250722163644.dist-info/METADATA,sha256=xWf451Yppxmxiq03oR_ngRxa3jrmU7KV-yrukcy3UEI,587
39
- thds_adls-4.1.20250722163644.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- thds_adls-4.1.20250722163644.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
41
- thds_adls-4.1.20250722163644.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
42
- thds_adls-4.1.20250722163644.dist-info/RECORD,,
39
+ thds_adls-4.1.20250722195957.dist-info/METADATA,sha256=-BwbAJUU6FGtm6UBu9W1xS2de7H3pa_ecdVUKSGkBf4,587
40
+ thds_adls-4.1.20250722195957.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
+ thds_adls-4.1.20250722195957.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
42
+ thds_adls-4.1.20250722195957.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
43
+ thds_adls-4.1.20250722195957.dist-info/RECORD,,