thds.adls 3.1.20250227181712__py3-none-any.whl → 3.1.20250227213257__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

thds/adls/_upload.py CHANGED
@@ -26,7 +26,7 @@ def _get_checksum_content_settings(data: AnyStrSrc) -> ty.Optional[ContentSettin
26
26
  """
27
27
  md5 = try_md5(data)
28
28
  if md5:
29
- return ContentSettings(content_md5=md5)
29
+ return ContentSettings(content_md5=bytearray(md5))
30
30
  return None
31
31
 
32
32
 
@@ -178,7 +178,8 @@ async def async_fastpath(
178
178
  except (subprocess.SubprocessError, FileNotFoundError):
179
179
  logger.warning("Falling back to Python SDK for download")
180
180
 
181
- reader = await dl_file_client.download_file(
181
+ reader = await dl_file_client.download_file( # type: ignore[misc]
182
+ # TODO - check above type ignore
182
183
  max_concurrency=conf.DOWNLOAD_FILE_MAX_CONCURRENCY(),
183
184
  connection_timeout=conf.CONNECTION_TIMEOUT(),
184
185
  )
@@ -1,5 +1,10 @@
1
+ import typing as ty
1
2
  from pathlib import Path
2
3
 
4
+ from thds.adls import source
5
+ from thds.core.source import Source
6
+ from thds.core.source.tree import SourceTree
7
+
3
8
  from .download import download_or_use_verified
4
9
  from .fqn import AdlsFqn
5
10
  from .global_client import get_global_fs_client
@@ -46,3 +51,20 @@ def download_directory(fqn: AdlsFqn) -> Path:
46
51
  fs.fetch_directory(fqn.path, cached_dir_root)
47
52
  assert cached_dir_root.is_dir(), "Directory should have been downloaded to the cache."
48
53
  return cached_dir_root
54
+
55
+
56
+ def upload_directory_through_cache(dest: UriIsh, src_path: Path) -> SourceTree:
57
+ if not src_path.is_dir():
58
+ raise ValueError(f"If you want to upload a file, use {upload_through_cache.__name__} instead")
59
+
60
+ dest = parse_any(dest)
61
+
62
+ def _upload_directory(dir_path: Path) -> ty.Iterable[Source]:
63
+ for item in dir_path.iterdir():
64
+ if item.is_dir(): # recur
65
+ yield from _upload_directory(item)
66
+ elif item.is_file(): # upload
67
+ file_dest = dest / str(item.relative_to(src_path))
68
+ yield source.from_adls(upload_through_cache(file_dest, item))
69
+
70
+ return SourceTree(sources=list(_upload_directory(src_path)))
thds/adls/download.py CHANGED
@@ -208,7 +208,8 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
208
208
  # we don't know what we expect, so attempt to retrieve an
209
209
  # expectation from ADLS itself.
210
210
  file_properties = yield _IoRequest.FILE_PROPERTIES
211
- md5b64 = _remote_md5b64(file_properties)
211
+ md5b64 = _remote_md5b64(file_properties) # type: ignore[arg-type]
212
+ # TODO - check above type ignore
212
213
 
213
214
  def attempt_cache_hit() -> ty.Optional[_FileResult]:
214
215
  if not md5b64:
@@ -259,7 +260,8 @@ def _download_or_use_verified_cached_coroutine( # noqa: C901
259
260
  file_properties = yield _IoRequest.FILE_PROPERTIES
260
261
  # no point in downloading if we've asked for hash X but ADLS only has hash Y.
261
262
  with _verify_md5s_before_and_after_download(
262
- _remote_md5b64(file_properties),
263
+ _remote_md5b64(file_properties), # type: ignore[arg-type]
264
+ # TODO - check above type ignore
263
265
  md5b64,
264
266
  fqn,
265
267
  local_path,
@@ -293,7 +295,7 @@ def _prep_download_coroutine(
293
295
  DataLakeFileClient,
294
296
  ]:
295
297
  co = _download_or_use_verified_cached_coroutine(
296
- AdlsFqn(fs_client.account_name, fs_client.file_system_name, remote_key),
298
+ AdlsFqn(ty.cast(str, fs_client.account_name), fs_client.file_system_name, remote_key),
297
299
  local_path,
298
300
  md5b64=md5b64,
299
301
  cache=cache,
@@ -306,7 +308,8 @@ def _set_md5_if_missing(
306
308
  ) -> ty.Optional[ContentSettings]:
307
309
  if not file_properties or file_properties.content_settings.content_md5:
308
310
  return None
309
- file_properties.content_settings.content_md5 = b64decode(md5b64)
311
+ file_properties.content_settings.content_md5 = b64decode(md5b64) # type: ignore[assignment]
312
+ # TODO - check above type ignore
310
313
  return file_properties.content_settings
311
314
 
312
315
 
@@ -370,7 +373,8 @@ async def async_download_or_use_verified(
370
373
  if co_request == _IoRequest.FILE_PROPERTIES:
371
374
  if not file_properties:
372
375
  # only fetch these if they haven't already been requested
373
- file_properties = await dl_file_client.get_file_properties()
376
+ file_properties = await dl_file_client.get_file_properties() # type: ignore[misc]
377
+ # TODO - check above type ignore
374
378
  co_request = co.send(file_properties)
375
379
  elif isinstance(co_request, azcopy.download.DownloadRequest):
376
380
  # coroutine is requesting download
@@ -384,7 +388,10 @@ async def async_download_or_use_verified(
384
388
  try:
385
389
  logger.info(f"Setting missing MD5 for {remote_key}")
386
390
  assert file_properties
387
- await dl_file_client.set_http_headers(cs, **match_etag(file_properties))
391
+ await dl_file_client.set_http_headers( # type: ignore[misc]
392
+ cs, **match_etag(file_properties)
393
+ )
394
+ # TODO - check above type ignore
388
395
  except HttpResponseError as hre:
389
396
  logger.info(f"Unable to set MD5 for {remote_key}: {hre}")
390
397
  return si.value.hit
@@ -9,7 +9,7 @@ from .global_client import get_global_blob_container_client, get_global_fs_clien
9
9
 
10
10
  def is_directory(info: FileProperties) -> bool:
11
11
  # from https://github.com/Azure/azure-sdk-for-python/issues/24814#issuecomment-1159280840
12
- return str(info.metadata.get("hdi_isfolder", "")).lower() == "true"
12
+ return str(info.get("metadata", dict()).get("hdi_isfolder", "")).lower() == "true"
13
13
 
14
14
 
15
15
  def get_file_properties(fqn: AdlsFqn) -> FileProperties:
thds/adls/impl.py CHANGED
@@ -14,7 +14,6 @@ from typing import (
14
14
  AsyncIterator,
15
15
  Awaitable,
16
16
  Callable,
17
- Dict,
18
17
  Iterable,
19
18
  List,
20
19
  Mapping,
@@ -457,7 +456,7 @@ class ADLSFileSystem:
457
456
  incl_subdirs: bool = False,
458
457
  batch_size: Optional[int] = None,
459
458
  recursive: bool = True,
460
- path_filter: Optional[Callable[[FileProperties], bool]] = None,
459
+ path_filter: Optional[Callable[[PathProperties], bool]] = None,
461
460
  ) -> List[FileProperties]:
462
461
  """Returns a list of `FileProperties` for files in a remote directory.
463
462
 
@@ -492,7 +491,7 @@ class ADLSFileSystem:
492
491
  file_system_client: FileSystemClient,
493
492
  remote_paths: Iterable[str],
494
493
  batch_size: Optional[int] = None,
495
- ) -> List[Dict[str, Any]]:
494
+ ) -> List[FileProperties]:
496
495
  """Returns a list of `FileProperties` for each file in a list of remote file paths.
497
496
 
498
497
  See :meth:`~ADLSFileSystem.get_files_info` for more details.
@@ -691,10 +690,11 @@ class ADLSFileSystem:
691
690
  self, it: AsyncIterable[T], size: Optional[int] = None
692
691
  ) -> AsyncIterator[List[T]]:
693
692
  """Async batch generator"""
693
+ # TODO - look at type ignores here
694
694
  batch_size = size if size is not None else self.default_batch_size
695
- async with stream.chunks(it, batch_size).stream() as streamer:
695
+ async with stream.chunks(it, batch_size).stream() as streamer: # type: ignore[arg-type,var-annotated]
696
696
  async for chunk in streamer:
697
- yield chunk
697
+ yield chunk # type: ignore[misc]
698
698
 
699
699
  def fetch_files(self, remote_paths: Union[Iterable[str], Mapping[str, Union[Path, str]]]):
700
700
  return self._run(self._fetch_files, remote_paths)
thds/adls/sas_tokens.py CHANGED
@@ -43,6 +43,8 @@ def gen_blob_sas_token(
43
43
 
44
44
  `expiry` is in seconds.
45
45
  """
46
+ expiry_datetime: ty.Union[ty.Optional[str], datetime.datetime]
47
+
46
48
  if isinstance(account_key, UserDelegationKey):
47
49
  expiry_datetime = account_key.signed_expiry
48
50
  else:
@@ -1,15 +1,18 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: thds.adls
3
- Version: 3.1.20250227181712
3
+ Version: 3.1.20250227213257
4
4
  Summary: ADLS tools
5
- Author: Trilliant Health
5
+ Author-email: Trilliant Health <info@trillianthealth.com>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/TrilliantHealth/trilliant-data-science
8
+ Requires-Python: >=3.8
6
9
  Description-Content-Type: text/markdown
7
10
  Requires-Dist: aiohttp>=3.8.1
8
11
  Requires-Dist: aiostream>=0.4.5
9
12
  Requires-Dist: azure-identity>=1.9
10
13
  Requires-Dist: azure-storage-file-datalake>=12.6
11
14
  Requires-Dist: filelock>=3.0
12
- Requires-Dist: thds.core>=1.32
15
+ Requires-Dist: thds-core
13
16
 
14
17
  # adls Library
15
18
 
@@ -1,32 +1,31 @@
1
1
  thds/adls/__init__.py,sha256=er14MoCC9PlJMxWVS4G1hAeMJItaJj4EAsrTZlvlb0M,797
2
2
  thds/adls/_progress.py,sha256=ZzCHn_G7nHakioNFxdvoJZRr-jN6ymsp5JXf-iReROM,6580
3
- thds/adls/_upload.py,sha256=q6Sk0CRnNcAjUOUPiBj4CfO4tJD196SQY0lT25CTSE4,4364
3
+ thds/adls/_upload.py,sha256=XyP6tDM7s-A3G0SPSVlXRT4IZYsPqpOE4TeqtxP5i-I,4375
4
4
  thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
5
- thds/adls/cached_up_down.py,sha256=qTMrsHHrDq3YZ0gaLMFIPnGNraz0ctG9yoFd3HCVit4,1896
5
+ thds/adls/cached_up_down.py,sha256=CEqg-Q9FB_fvgCsN0C_dn9CJqnEpnF0lA7gCdd9s6gg,2721
6
6
  thds/adls/conf.py,sha256=q1SPrgb46NpobVzwt_Oyv71-BvsIbZLq9nRWS3LZjz0,1990
7
7
  thds/adls/copy.py,sha256=jUWbGvTpb4B3yRGS0nhGSbDzqRPzUqYgH0z1lFRJB3k,6365
8
8
  thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
9
9
  thds/adls/defaults.py,sha256=VxyaGz1gCcz2rGTR9acNRybbxGOYhTldvD_-SdGLBh0,652
10
- thds/adls/download.py,sha256=Vfd9DEHUmSKPKpKxf9WjkqAzZFZwIgjo4fgM5aIvW0g,16455
10
+ thds/adls/download.py,sha256=JUF-J6bfaSzDM389errg8tHPssGt-K1yp3O_pznls3o,16852
11
11
  thds/adls/download_lock.py,sha256=_JZj-kjCUfHk9FvrmEuYpJYknmbam5eReFhGNDgzdLQ,2520
12
12
  thds/adls/errors.py,sha256=B_rMsQvQnNmP_sf-x8kmGsv2vIeOh4G9kVbdNVyk350,1469
13
13
  thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
14
- thds/adls/file_properties.py,sha256=JFkobkxcAaRGAh3TJ9mfc3X872gqrGuw4Xk_HSTj1sY,1582
14
+ thds/adls/file_properties.py,sha256=V3VEjEG3PNyeQaJ159Kco5l7c2EiyjE0yygtmVVjd6E,1597
15
15
  thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
16
16
  thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
17
- thds/adls/impl.py,sha256=x1nSqc8W4NeuX8-JGOp2MRkK8ff6GnelTWedGxPs-qY,42494
17
+ thds/adls/impl.py,sha256=4qa70w1sehzp60CI6lW82NLDK-lsM1uUfhPmZnYJaw0,42589
18
18
  thds/adls/md5.py,sha256=qOX4_7WUj1QkbH_IwREcQNHvvZccOj-HpHZBfsKn1gY,1846
19
- thds/adls/meta.json,sha256=MhZqsLGrHq1cqZB6OydUf5yDbfo_GySrRRtjdpImBQc,195
20
19
  thds/adls/named_roots.py,sha256=7SLbAoQQpV_mrFZaUPjYoS-F9dxQxN5Hg4M3YPirF_w,751
21
20
  thds/adls/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
21
  thds/adls/ro_cache.py,sha256=F0uXol0t95mcRuBukNg3A7wt7XXQxpD5Sy09d9sl8f0,4825
23
- thds/adls/sas_tokens.py,sha256=tO7uUh5EVp2F_NITzz1Vks78KCZwVxT7C0faLj8UJqA,1889
22
+ thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
24
23
  thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
25
24
  thds/adls/source.py,sha256=1JYliDqafqpLewOQ2XMKp4T2WD7b5Rl7XYViyLYtzX8,2437
26
25
  thds/adls/source_tree.py,sha256=CiyCwgItF186aOoYiICkWt2oMLGMD6v526HRi8XWHtM,2219
27
26
  thds/adls/uri.py,sha256=pDH956p_VEHnjLLUnjWY6sGgRqksp9gdpc9KOW4gEP0,1205
28
27
  thds/adls/azcopy/__init__.py,sha256=nTNbgz2GcEiGeswYbAgy4oPhivnzl_5crF3HqCdWWiw,31
29
- thds/adls/azcopy/download.py,sha256=_I0smR2f9pu0xIiQC9C97Xb-GHMGwn5dqbEhz41H-qo,6548
28
+ thds/adls/azcopy/download.py,sha256=Fig6q-bRZBeoA5L-URl5BACfMH9-9_sc4T2sa9yiZyQ,6611
30
29
  thds/adls/resource/__init__.py,sha256=IZ7_aRf1b3jEp7wXOxqHop0gV2gUcf9SOLeEEjIWlCU,1669
31
30
  thds/adls/resource/core.py,sha256=BVM91xsZ_B_CoGTc9DDD3FnGy8g6X-9eFpa86ZCzuZI,2717
32
31
  thds/adls/resource/file_pointers.py,sha256=PLru_3lwut_ZvrX5Keu-wJkPOt5o7UGf-OOT4ixaXME,2049
@@ -34,8 +33,8 @@ thds/adls/resource/up_down.py,sha256=3uNlTvm2gVhSyYdQTBwsGecOgwtINQfINckR-awwV0Y
34
33
  thds/adls/tools/download.py,sha256=vvBO8lSDl9oPugv75qpCkoemT9pOM9BV6yeExlkyG08,1594
35
34
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
36
35
  thds/adls/tools/upload.py,sha256=eMk4pdug1aCMPDDWpIE3Zoq77i5APp9Uuh-sVCCDNJE,493
37
- thds_adls-3.1.20250227181712.dist-info/METADATA,sha256=O8W1_KMHFvwQZ--B8xPvFDMLL774NvfbhpxvEKkACWE,397
38
- thds_adls-3.1.20250227181712.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
39
- thds_adls-3.1.20250227181712.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
40
- thds_adls-3.1.20250227181712.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
41
- thds_adls-3.1.20250227181712.dist-info/RECORD,,
36
+ thds_adls-3.1.20250227213257.dist-info/METADATA,sha256=ITESaWWIR-e2DCqoKNHKVGd0kolDejghHQqZ82F5qOc,543
37
+ thds_adls-3.1.20250227213257.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
38
+ thds_adls-3.1.20250227213257.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
39
+ thds_adls-3.1.20250227213257.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
40
+ thds_adls-3.1.20250227213257.dist-info/RECORD,,
thds/adls/meta.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "git_commit": "f4211224c05a5de6426bfd3efe4d62e3c0a7d64d",
3
- "git_branch": "main",
4
- "git_is_clean": true,
5
- "pyproject_version": "3.1.20250227181712",
6
- "thds_user": "runner",
7
- "misc": {}
8
- }