thds.adls 4.1.20250722150725__py3-none-any.whl → 4.1.20250722163644__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

thds/adls/__init__.py CHANGED
@@ -5,7 +5,7 @@ from .cached import download_directory, download_to_cache, upload_through_cache
5
5
  from .copy import copy_file, copy_files, wait_for_copy # noqa: F401
6
6
  from .errors import BlobNotFoundError # noqa: F401
7
7
  from .fqn import * # noqa: F401,F403
8
- from .global_client import get_global_fs_client # noqa: F401
8
+ from .global_client import get_global_client, get_global_fs_client # noqa: F401
9
9
  from .impl import * # noqa: F401,F403
10
10
  from .ro_cache import Cache, global_cache # noqa: F401
11
11
  from .upload import upload # noqa: F401
thds/adls/download.py CHANGED
@@ -158,12 +158,6 @@ IoRequest = ty.Union[_IoRequest, azcopy.download.DownloadRequest]
158
158
  IoResponse = ty.Union[FileProperties, None]
159
159
 
160
160
 
161
- def _assert_fp(fp: ty.Optional[FileProperties], fqn: AdlsFqn) -> None:
162
- assert fp, f"FileProperties for {fqn} should not be None."
163
- assert fp.name, f"FileProperties for {fqn} should have a name."
164
- assert fp.name == fqn.path, (fp, fqn)
165
-
166
-
167
161
  _dl_scope = scope.Scope("adls.download")
168
162
 
169
163
 
@@ -224,7 +218,6 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
224
218
  # expectations from ADLS itself.
225
219
  file_properties = yield _IoRequest.FILE_PROPERTIES
226
220
  if file_properties:
227
- _assert_fp(file_properties, fqn)
228
221
  # critically, we expect the _first_ one in this list to be the fastest to verify.
229
222
  expected_hash = next(iter(hashes.extract_hashes_from_props(file_properties).values()), None)
230
223
 
@@ -254,7 +247,6 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
254
247
 
255
248
  logger.debug("Unable to find a cached version anywhere that we looked...")
256
249
  file_properties = yield _IoRequest.FILE_PROPERTIES
257
- _assert_fp(file_properties, fqn)
258
250
 
259
251
  # if any of the remote hashes match the expected hash, verify that one.
260
252
  # otherwise, verify the first remote hash in the list, since that's the fastest one.
@@ -294,14 +286,19 @@ def _prep_download_coroutine(
294
286
  local_path: StrOrPath,
295
287
  expected_hash: ty.Optional[hashing.Hash] = None,
296
288
  cache: ty.Optional[Cache] = None,
297
- ) -> ty.Tuple[ty.Generator[IoRequest, IoResponse, _FileResult], IoRequest, DataLakeFileClient]:
289
+ ) -> ty.Tuple[
290
+ ty.Generator[IoRequest, IoResponse, _FileResult],
291
+ IoRequest,
292
+ ty.Optional[FileProperties],
293
+ DataLakeFileClient,
294
+ ]:
298
295
  co = _download_or_use_verified_cached_coroutine(
299
296
  AdlsFqn(ty.cast(str, fs_client.account_name), fs_client.file_system_name, remote_key),
300
297
  local_path,
301
298
  expected_hash=expected_hash,
302
299
  cache=cache,
303
300
  )
304
- return co, co.send(None), fs_client.get_file_client(remote_key)
301
+ return co, co.send(None), None, fs_client.get_file_client(remote_key)
305
302
 
306
303
 
307
304
  def _excs_to_retry() -> ty.Callable[[Exception], bool]:
@@ -336,7 +333,6 @@ def _log_nonfatal_hash_error_exc(exc: Exception, url: str) -> None:
336
333
 
337
334
 
338
335
  @_dl_scope.bound
339
- @fretry.retry_regular(fretry.is_exc(errors.ContentLengthMismatchError), fretry.n_times(2))
340
336
  def download_or_use_verified(
341
337
  fs_client: FileSystemClient,
342
338
  remote_key: str,
@@ -352,16 +348,14 @@ def download_or_use_verified(
352
348
  """
353
349
  file_properties = None
354
350
  try:
355
- co, co_request, dl_file_client = _prep_download_coroutine(
351
+ co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
356
352
  fs_client, remote_key, local_path, expected_hash, cache
357
353
  )
358
- assert dl_file_client.path_name == remote_key
359
354
  _dl_scope.enter(dl_file_client) # on __exit__, will release the connection to the pool
360
355
  while True:
361
356
  if co_request == _IoRequest.FILE_PROPERTIES:
362
357
  if not file_properties:
363
358
  # only fetch these if they haven't already been requested
364
- assert dl_file_client.path_name == remote_key
365
359
  file_properties = dl_file_client.get_file_properties()
366
360
  co_request = co.send(file_properties)
367
361
  elif isinstance(co_request, azcopy.download.DownloadRequest):
@@ -404,7 +398,7 @@ async def async_download_or_use_verified(
404
398
  ) -> ty.Optional[Path]:
405
399
  file_properties = None
406
400
  try:
407
- co, co_request, dl_file_client = _prep_download_coroutine(
401
+ co, co_request, file_properties, dl_file_client = _prep_download_coroutine(
408
402
  fs_client, remote_key, local_path, expected_hash, cache
409
403
  )
410
404
  await _async_dl_scope.async_enter(dl_file_client) # type: ignore[arg-type]
@@ -4,7 +4,7 @@ from azure.storage.filedatalake import DataLakeServiceClient, FileSystemClient
4
4
 
5
5
  from thds.core import cache, config
6
6
 
7
- from . import _fork_protector, conf
7
+ from . import conf
8
8
  from .shared_credential import SharedCredential
9
9
 
10
10
  DEFAULT_CONNECTION_POOL_SIZE = config.item("default_connection_pool_size", default=100, parse=int)
@@ -53,7 +53,9 @@ best approach for all applications.
53
53
  This avoids creating a client at a module level and is
54
54
  thread-safe.
55
55
  """
56
- get_global_fs_client = _fork_protector.fork_safe_cached(cache.locking, adls_fs_client)
56
+ get_global_client = cache.locking(adls_fs_client)
57
+ # deprecated name - prefer get_global_fs_client
58
+ get_global_fs_client = get_global_client
57
59
 
58
60
 
59
61
  def adls_blob_service_client(
@@ -82,6 +84,4 @@ def adls_blob_container_client(
82
84
  return get_global_blob_service_client(storage_account, connpool_size).get_container_client(container)
83
85
 
84
86
 
85
- get_global_blob_container_client = _fork_protector.fork_safe_cached(
86
- cache.locking, adls_blob_container_client
87
- )
87
+ get_global_blob_container_client = cache.locking(adls_blob_container_client)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.adls
3
- Version: 4.1.20250722150725
3
+ Version: 4.1.20250722163644
4
4
  Summary: ADLS tools
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  License: MIT
@@ -1,5 +1,4 @@
1
- thds/adls/__init__.py,sha256=PL0BRhiLhW_xY_2hhBgd8v3_NS1zpR4Kdd28zjNHBgo,1017
2
- thds/adls/_fork_protector.py,sha256=WML1Ul-G_MCziBOyRp6GoNq1IGktn8-OQVjny1IZ1bs,950
1
+ thds/adls/__init__.py,sha256=g2Zb0EAAH-JzPMYHAub9liU4qa5pfqQDnILfEhmObGo,1036
3
2
  thds/adls/_progress.py,sha256=D6XIipzG_xwmxs_08LuiYFfThGqHTU2KiIyjNduiOFY,6656
4
3
  thds/adls/_upload.py,sha256=mhTdWiQroaugYuwQg7R8CEgdfCYF4xvJthlsqO0jlnE,4692
5
4
  thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
@@ -8,13 +7,13 @@ thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
8
7
  thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
9
8
  thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
10
9
  thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
11
- thds/adls/download.py,sha256=VmvkI3c0bAxVF2B7dBqHpSL18a701ddFNFCwpJSfdF4,19223
10
+ thds/adls/download.py,sha256=WOpMXGUbWImBdkM4tSW7qnCbu7G_cRXKF5pFQVLPPxs,18772
12
11
  thds/adls/download_lock.py,sha256=tgT48l4C5_qmArGeq05gl7VlxT22dZBH2Xwxx0itE9o,3176
13
12
  thds/adls/errors.py,sha256=6NMLHtVNsWBRDXaes9yzHj9cwKOD9t1dwL4BltdtjhU,1895
14
13
  thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
15
14
  thds/adls/file_properties.py,sha256=dhRtbsMNOYfExkEiy76wrLfrJ6IMQeN1Z3LIxgKceqY,2042
16
15
  thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
17
- thds/adls/global_client.py,sha256=q6oG1OOsfl4fbti81u8TE9d4Jx9-phlYgsGSc4032w8,3721
16
+ thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
18
17
  thds/adls/hashes.py,sha256=-wRRATGmww7k2RD5Zmhq_Fq7Z2JihLy1njeHFekU15c,5316
19
18
  thds/adls/impl.py,sha256=cNf1vmeS46X_wvyVdDJ8qFfowHn2QwtU5C80BmDtu5Y,43247
20
19
  thds/adls/md5.py,sha256=hGT8AIX32VUsnRCbm8cel9OlxAiRrgjwNWQTqRDHM_k,374
@@ -36,8 +35,8 @@ thds/adls/azcopy/upload.py,sha256=RQLDJzS6qsMM12t5bykWJWBXs0UrmImrEFnPMxX2UlM,27
36
35
  thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1587
37
36
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
38
37
  thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
39
- thds_adls-4.1.20250722150725.dist-info/METADATA,sha256=utnc6NxPXsuheuoeZ3IjXHJNvk1h48K3-A0ugQnDcsc,587
40
- thds_adls-4.1.20250722150725.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
- thds_adls-4.1.20250722150725.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
42
- thds_adls-4.1.20250722150725.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
43
- thds_adls-4.1.20250722150725.dist-info/RECORD,,
38
+ thds_adls-4.1.20250722163644.dist-info/METADATA,sha256=xWf451Yppxmxiq03oR_ngRxa3jrmU7KV-yrukcy3UEI,587
39
+ thds_adls-4.1.20250722163644.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ thds_adls-4.1.20250722163644.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
41
+ thds_adls-4.1.20250722163644.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
42
+ thds_adls-4.1.20250722163644.dist-info/RECORD,,
@@ -1,38 +0,0 @@
1
- import os
2
- import typing as ty
3
- from functools import partial
4
-
5
- from typing_extensions import Concatenate, ParamSpec
6
-
7
- P = ParamSpec("P")
8
- T = ty.TypeVar("T")
9
- F = ty.TypeVar("F", bound=ty.Callable)
10
-
11
-
12
- def _pid_swallower(
13
- func: ty.Callable[P, T],
14
- pid: int,
15
- *args: P.args,
16
- **kwargs: P.kwargs,
17
- ) -> T:
18
- return func(*args, **kwargs)
19
-
20
-
21
- def _pid_sending_wrapper(
22
- caching_pid_swallower: ty.Callable[Concatenate[int, P], T],
23
- *args: P.args,
24
- **kwargs: P.kwargs,
25
- ) -> T:
26
- return caching_pid_swallower(os.getpid(), *args, **kwargs)
27
-
28
-
29
- def fork_safe_cached(
30
- cache_deco: ty.Callable[[F], F],
31
- func: ty.Callable[P, T],
32
- ) -> ty.Callable[P, T]:
33
- """Decorator to make a fork-safe cached.locking function by wrapping it in a function that
34
- always calls os.getpid() to invalidate the cache on new processes."""
35
- return partial(
36
- _pid_sending_wrapper,
37
- cache_deco(ty.cast(F, partial(_pid_swallower, func))),
38
- )