thds.adls 3.1.20250217180321__py3-none-any.whl → 3.1.20250218201547__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

thds/adls/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from thds.core import meta
2
2
 
3
- from . import abfss, defaults, etag, fqn, named_roots, resource, source, uri # noqa: F401
3
+ from . import abfss, defaults, etag, fqn, named_roots, resource, source, source_tree, uri # noqa: F401
4
4
  from .cached_up_down import download_directory, download_to_cache, upload_through_cache # noqa: F401
5
5
  from .copy import copy_file, copy_files, wait_for_copy # noqa: F401
6
6
  from .errors import BlobNotFoundError # noqa: F401
thds/adls/fqn.py CHANGED
@@ -1,10 +1,9 @@
1
1
  import re
2
2
  from functools import reduce
3
- from typing import NamedTuple, TypeVar, Union
3
+ from typing import NamedTuple, Tuple, TypeVar, Union
4
4
 
5
- ADLS_SCHEME = (
6
- "adls://" # this is our invention, but ADLS does not appear to define one suitable for general use.
7
- )
5
+ ADLS_SCHEME = "adls://"
6
+ # this is our invention, but ADLS does not appear to define one suitable for general use.
8
7
 
9
8
 
10
9
  def join(*parts: str) -> str:
@@ -104,6 +103,13 @@ def parent(fqn: FR) -> FR:
104
103
  return AdlsFqn(fqn.sa, fqn.container, join(*fqn.path.split("/")[:-1])) # type: ignore
105
104
 
106
105
 
106
+ def split(fqn: FR) -> Tuple[str, ...]:
107
+ if isinstance(fqn, AdlsRoot):
108
+ return fqn.sa, fqn.container
109
+ assert isinstance(fqn, AdlsFqn)
110
+ return tuple(filter(None, (fqn.sa, fqn.container, *fqn.path.split("/"))))
111
+
112
+
107
113
  SA_REGEX = re.compile(r"^[\w]{3,24}$")
108
114
  # https://github.com/MicrosoftDocs/azure-docs/issues/64497#issuecomment-714380739
109
115
  CONT_REGEX = re.compile(r"^\w[\w\-]{2,63}$")
thds/adls/meta.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
- "git_commit": "d1dbeed714e4b8a66fc91ae65b2f721ae3c7211b",
2
+ "git_commit": "6aa6700b782d76b65b65256eaa49bd70e8217015",
3
3
  "git_branch": "main",
4
4
  "git_is_clean": true,
5
- "pyproject_version": "3.1.20250217180321",
5
+ "pyproject_version": "3.1.20250218201547",
6
6
  "thds_user": "runner",
7
7
  "misc": {}
8
8
  }
@@ -46,7 +46,7 @@ def from_source(source: source.Source) -> AdlsHashedResource:
46
46
  def to_source(resource: AdlsHashedResource) -> source.Source:
47
47
  return source.from_uri(
48
48
  str(resource.fqn),
49
- hash=source.Hash("md5", hashing.db64(resource.md5b64)),
49
+ hash=hashing.Hash("md5", hashing.db64(resource.md5b64)),
50
50
  )
51
51
 
52
52
 
@@ -0,0 +1,57 @@
1
+ import typing as ty
2
+ from dataclasses import dataclass
3
+
4
+ from azure.storage.blob import ContainerClient
5
+
6
+ from thds.core import hashing
7
+ from thds.core.source.tree import SourceTree
8
+
9
+ from . import fqn, global_client, source, uri
10
+
11
+
12
+ @dataclass
13
+ class BlobMeta:
14
+ path: str
15
+ size: int
16
+ md5: ty.Optional[hashing.Hash]
17
+
18
+
19
+ # https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.containerclient?view=azure-python#azure-storage-blob-containerclient-list-blobs
20
+ # https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobproperties?view=azure-python
21
+ # https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.contentsettings?view=azure-python
22
+ def list_blob_meta(
23
+ container_client: ContainerClient, root_dir: str, match_suffix: str = ""
24
+ ) -> ty.List[BlobMeta]:
25
+ """Gets the path (relative to the SA/container root), size, and MD5 hash of all blobs in a directory."""
26
+ return [
27
+ BlobMeta(
28
+ blob_props.name, # type: ignore
29
+ blob_props.size, # type: ignore
30
+ (
31
+ hashing.Hash("md5", bytes(blob_props.content_settings.content_md5))
32
+ if blob_props.content_settings.content_md5
33
+ else None
34
+ ),
35
+ )
36
+ for blob_props in container_client.list_blobs(name_starts_with=root_dir)
37
+ if blob_props.size > 0 # type: ignore
38
+ # container client lists directories as blobs with size 0
39
+ and blob_props.name.endswith(match_suffix) # type: ignore
40
+ ]
41
+
42
+
43
+ def from_path(adls_path: uri.UriIsh, match_suffix: str = "") -> SourceTree:
44
+ """Creates a SourceTree object where the logical root is the final piece of the
45
+ provided adls path.
46
+ """
47
+ root_fqn = uri.parse_any(adls_path)
48
+
49
+ container_client = global_client.get_global_blob_container_client(root_fqn.sa, root_fqn.container)
50
+ container_root = root_fqn.root()
51
+ return SourceTree(
52
+ sources=[
53
+ source.from_adls(container_root / blob_meta.path, hash=blob_meta.md5)
54
+ for blob_meta in list_blob_meta(container_client, root_fqn.path, match_suffix=match_suffix)
55
+ ],
56
+ higher_logical_root=fqn.split(root_fqn)[-1],
57
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: thds.adls
3
- Version: 3.1.20250217180321
3
+ Version: 3.1.20250218201547
4
4
  Summary: ADLS tools
5
5
  Author: Trilliant Health
6
6
  Description-Content-Type: text/markdown
@@ -9,7 +9,7 @@ Requires-Dist: aiostream>=0.4.5
9
9
  Requires-Dist: azure-identity>=1.9
10
10
  Requires-Dist: azure-storage-file-datalake>=12.6
11
11
  Requires-Dist: filelock>=3.0
12
- Requires-Dist: thds.core>=1.31
12
+ Requires-Dist: thds.core>=1.32
13
13
 
14
14
  # adls Library
15
15
 
@@ -1,4 +1,4 @@
1
- thds/adls/__init__.py,sha256=Tc23Gy292GO7hC8qwxiAnVE3jGqLcEjHed2MKlSSpvI,784
1
+ thds/adls/__init__.py,sha256=er14MoCC9PlJMxWVS4G1hAeMJItaJj4EAsrTZlvlb0M,797
2
2
  thds/adls/_progress.py,sha256=SVOiVRvfvOfUjp-y-UUgA6n0FhAnpKesxltaQ16kmOk,6394
3
3
  thds/adls/_upload.py,sha256=q6Sk0CRnNcAjUOUPiBj4CfO4tJD196SQY0lT25CTSE4,4364
4
4
  thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
@@ -12,27 +12,28 @@ thds/adls/download_lock.py,sha256=_JZj-kjCUfHk9FvrmEuYpJYknmbam5eReFhGNDgzdLQ,25
12
12
  thds/adls/errors.py,sha256=IQfkdLVg0feAUd-xQzkS4-3Lp5_Ld9V9-SqiXeWLRtA,1335
13
13
  thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
14
14
  thds/adls/file_properties.py,sha256=JFkobkxcAaRGAh3TJ9mfc3X872gqrGuw4Xk_HSTj1sY,1582
15
- thds/adls/fqn.py,sha256=pGPBAWKrXorXs1DGkH-XXeDgwwQk3S_heQE7VkPAbW4,5598
15
+ thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
16
16
  thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
17
17
  thds/adls/impl.py,sha256=x1nSqc8W4NeuX8-JGOp2MRkK8ff6GnelTWedGxPs-qY,42494
18
18
  thds/adls/md5.py,sha256=qOX4_7WUj1QkbH_IwREcQNHvvZccOj-HpHZBfsKn1gY,1846
19
- thds/adls/meta.json,sha256=86WmgGgPKi8ZL0dYW19JTjs0rK58zLr2A_bAlvpkaBc,195
19
+ thds/adls/meta.json,sha256=9mFILKWuoXbJo2n0GOcOki9KkeCM84bOwctQogfI-Jo,195
20
20
  thds/adls/named_roots.py,sha256=7SLbAoQQpV_mrFZaUPjYoS-F9dxQxN5Hg4M3YPirF_w,751
21
21
  thds/adls/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  thds/adls/ro_cache.py,sha256=F0uXol0t95mcRuBukNg3A7wt7XXQxpD5Sy09d9sl8f0,4825
23
23
  thds/adls/sas_tokens.py,sha256=tO7uUh5EVp2F_NITzz1Vks78KCZwVxT7C0faLj8UJqA,1889
24
24
  thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
25
25
  thds/adls/source.py,sha256=1JYliDqafqpLewOQ2XMKp4T2WD7b5Rl7XYViyLYtzX8,2437
26
+ thds/adls/source_tree.py,sha256=CiyCwgItF186aOoYiICkWt2oMLGMD6v526HRi8XWHtM,2219
26
27
  thds/adls/uri.py,sha256=pDH956p_VEHnjLLUnjWY6sGgRqksp9gdpc9KOW4gEP0,1205
27
28
  thds/adls/resource/__init__.py,sha256=IZ7_aRf1b3jEp7wXOxqHop0gV2gUcf9SOLeEEjIWlCU,1669
28
- thds/adls/resource/core.py,sha256=uU31tAVkwDcLC_TerZWq58xi4D791vNLDUfP0Y1BAoo,2716
29
+ thds/adls/resource/core.py,sha256=BVM91xsZ_B_CoGTc9DDD3FnGy8g6X-9eFpa86ZCzuZI,2717
29
30
  thds/adls/resource/file_pointers.py,sha256=PLru_3lwut_ZvrX5Keu-wJkPOt5o7UGf-OOT4ixaXME,2049
30
31
  thds/adls/resource/up_down.py,sha256=3uNlTvm2gVhSyYdQTBwsGecOgwtINQfINckR-awwV0Y,9907
31
32
  thds/adls/tools/download.py,sha256=ZZ8t1g9MRRy-aENGjr10E4aAqyVkKq2OqpyH70pCD_8,965
32
33
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
33
34
  thds/adls/tools/upload.py,sha256=eMk4pdug1aCMPDDWpIE3Zoq77i5APp9Uuh-sVCCDNJE,493
34
- thds.adls-3.1.20250217180321.dist-info/METADATA,sha256=FLx2KCf8yGNlmlXASxJzjIcQdbh4itJZ90OE7qPBN-M,397
35
- thds.adls-3.1.20250217180321.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
36
- thds.adls-3.1.20250217180321.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
37
- thds.adls-3.1.20250217180321.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
38
- thds.adls-3.1.20250217180321.dist-info/RECORD,,
35
+ thds.adls-3.1.20250218201547.dist-info/METADATA,sha256=EDR-b7AK1xPKvEq7rNK5xJgcWPhz32aL0mr5IwL645k,397
36
+ thds.adls-3.1.20250218201547.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
37
+ thds.adls-3.1.20250218201547.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
38
+ thds.adls-3.1.20250218201547.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
39
+ thds.adls-3.1.20250218201547.dist-info/RECORD,,