thds.adls 3.1.20250217180321__py3-none-any.whl → 3.1.20250218201547__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/__init__.py +1 -1
- thds/adls/fqn.py +10 -4
- thds/adls/meta.json +2 -2
- thds/adls/resource/core.py +1 -1
- thds/adls/source_tree.py +57 -0
- {thds.adls-3.1.20250217180321.dist-info → thds.adls-3.1.20250218201547.dist-info}/METADATA +2 -2
- {thds.adls-3.1.20250217180321.dist-info → thds.adls-3.1.20250218201547.dist-info}/RECORD +10 -9
- {thds.adls-3.1.20250217180321.dist-info → thds.adls-3.1.20250218201547.dist-info}/WHEEL +0 -0
- {thds.adls-3.1.20250217180321.dist-info → thds.adls-3.1.20250218201547.dist-info}/entry_points.txt +0 -0
- {thds.adls-3.1.20250217180321.dist-info → thds.adls-3.1.20250218201547.dist-info}/top_level.txt +0 -0
thds/adls/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from thds.core import meta
|
|
2
2
|
|
|
3
|
-
from . import abfss, defaults, etag, fqn, named_roots, resource, source, uri # noqa: F401
|
|
3
|
+
from . import abfss, defaults, etag, fqn, named_roots, resource, source, source_tree, uri # noqa: F401
|
|
4
4
|
from .cached_up_down import download_directory, download_to_cache, upload_through_cache # noqa: F401
|
|
5
5
|
from .copy import copy_file, copy_files, wait_for_copy # noqa: F401
|
|
6
6
|
from .errors import BlobNotFoundError # noqa: F401
|
thds/adls/fqn.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from functools import reduce
|
|
3
|
-
from typing import NamedTuple, TypeVar, Union
|
|
3
|
+
from typing import NamedTuple, Tuple, TypeVar, Union
|
|
4
4
|
|
|
5
|
-
ADLS_SCHEME =
|
|
6
|
-
|
|
7
|
-
)
|
|
5
|
+
ADLS_SCHEME = "adls://"
|
|
6
|
+
# this is our invention, but ADLS does not appear to define one suitable for general use.
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
def join(*parts: str) -> str:
|
|
@@ -104,6 +103,13 @@ def parent(fqn: FR) -> FR:
|
|
|
104
103
|
return AdlsFqn(fqn.sa, fqn.container, join(*fqn.path.split("/")[:-1])) # type: ignore
|
|
105
104
|
|
|
106
105
|
|
|
106
|
+
def split(fqn: FR) -> Tuple[str, ...]:
|
|
107
|
+
if isinstance(fqn, AdlsRoot):
|
|
108
|
+
return fqn.sa, fqn.container
|
|
109
|
+
assert isinstance(fqn, AdlsFqn)
|
|
110
|
+
return tuple(filter(None, (fqn.sa, fqn.container, *fqn.path.split("/"))))
|
|
111
|
+
|
|
112
|
+
|
|
107
113
|
SA_REGEX = re.compile(r"^[\w]{3,24}$")
|
|
108
114
|
# https://github.com/MicrosoftDocs/azure-docs/issues/64497#issuecomment-714380739
|
|
109
115
|
CONT_REGEX = re.compile(r"^\w[\w\-]{2,63}$")
|
thds/adls/meta.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
|
-
"git_commit": "
|
|
2
|
+
"git_commit": "6aa6700b782d76b65b65256eaa49bd70e8217015",
|
|
3
3
|
"git_branch": "main",
|
|
4
4
|
"git_is_clean": true,
|
|
5
|
-
"pyproject_version": "3.1.
|
|
5
|
+
"pyproject_version": "3.1.20250218201547",
|
|
6
6
|
"thds_user": "runner",
|
|
7
7
|
"misc": {}
|
|
8
8
|
}
|
thds/adls/resource/core.py
CHANGED
|
@@ -46,7 +46,7 @@ def from_source(source: source.Source) -> AdlsHashedResource:
|
|
|
46
46
|
def to_source(resource: AdlsHashedResource) -> source.Source:
|
|
47
47
|
return source.from_uri(
|
|
48
48
|
str(resource.fqn),
|
|
49
|
-
hash=
|
|
49
|
+
hash=hashing.Hash("md5", hashing.db64(resource.md5b64)),
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
thds/adls/source_tree.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import typing as ty
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
from azure.storage.blob import ContainerClient
|
|
5
|
+
|
|
6
|
+
from thds.core import hashing
|
|
7
|
+
from thds.core.source.tree import SourceTree
|
|
8
|
+
|
|
9
|
+
from . import fqn, global_client, source, uri
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class BlobMeta:
|
|
14
|
+
path: str
|
|
15
|
+
size: int
|
|
16
|
+
md5: ty.Optional[hashing.Hash]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.containerclient?view=azure-python#azure-storage-blob-containerclient-list-blobs
|
|
20
|
+
# https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.blobproperties?view=azure-python
|
|
21
|
+
# https://learn.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.contentsettings?view=azure-python
|
|
22
|
+
def list_blob_meta(
|
|
23
|
+
container_client: ContainerClient, root_dir: str, match_suffix: str = ""
|
|
24
|
+
) -> ty.List[BlobMeta]:
|
|
25
|
+
"""Gets the path (relative to the SA/container root), size, and MD5 hash of all blobs in a directory."""
|
|
26
|
+
return [
|
|
27
|
+
BlobMeta(
|
|
28
|
+
blob_props.name, # type: ignore
|
|
29
|
+
blob_props.size, # type: ignore
|
|
30
|
+
(
|
|
31
|
+
hashing.Hash("md5", bytes(blob_props.content_settings.content_md5))
|
|
32
|
+
if blob_props.content_settings.content_md5
|
|
33
|
+
else None
|
|
34
|
+
),
|
|
35
|
+
)
|
|
36
|
+
for blob_props in container_client.list_blobs(name_starts_with=root_dir)
|
|
37
|
+
if blob_props.size > 0 # type: ignore
|
|
38
|
+
# container client lists directories as blobs with size 0
|
|
39
|
+
and blob_props.name.endswith(match_suffix) # type: ignore
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def from_path(adls_path: uri.UriIsh, match_suffix: str = "") -> SourceTree:
|
|
44
|
+
"""Creates a SourceTree object where the logical root is the final piece of the
|
|
45
|
+
provided adls path.
|
|
46
|
+
"""
|
|
47
|
+
root_fqn = uri.parse_any(adls_path)
|
|
48
|
+
|
|
49
|
+
container_client = global_client.get_global_blob_container_client(root_fqn.sa, root_fqn.container)
|
|
50
|
+
container_root = root_fqn.root()
|
|
51
|
+
return SourceTree(
|
|
52
|
+
sources=[
|
|
53
|
+
source.from_adls(container_root / blob_meta.path, hash=blob_meta.md5)
|
|
54
|
+
for blob_meta in list_blob_meta(container_client, root_fqn.path, match_suffix=match_suffix)
|
|
55
|
+
],
|
|
56
|
+
higher_logical_root=fqn.split(root_fqn)[-1],
|
|
57
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: thds.adls
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.20250218201547
|
|
4
4
|
Summary: ADLS tools
|
|
5
5
|
Author: Trilliant Health
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,7 +9,7 @@ Requires-Dist: aiostream>=0.4.5
|
|
|
9
9
|
Requires-Dist: azure-identity>=1.9
|
|
10
10
|
Requires-Dist: azure-storage-file-datalake>=12.6
|
|
11
11
|
Requires-Dist: filelock>=3.0
|
|
12
|
-
Requires-Dist: thds.core>=1.
|
|
12
|
+
Requires-Dist: thds.core>=1.32
|
|
13
13
|
|
|
14
14
|
# adls Library
|
|
15
15
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
thds/adls/__init__.py,sha256=
|
|
1
|
+
thds/adls/__init__.py,sha256=er14MoCC9PlJMxWVS4G1hAeMJItaJj4EAsrTZlvlb0M,797
|
|
2
2
|
thds/adls/_progress.py,sha256=SVOiVRvfvOfUjp-y-UUgA6n0FhAnpKesxltaQ16kmOk,6394
|
|
3
3
|
thds/adls/_upload.py,sha256=q6Sk0CRnNcAjUOUPiBj4CfO4tJD196SQY0lT25CTSE4,4364
|
|
4
4
|
thds/adls/abfss.py,sha256=ZRJOLjDuXmS4bIbQAQpQxWWWeu74N9NKEKCNfXQek80,726
|
|
@@ -12,27 +12,28 @@ thds/adls/download_lock.py,sha256=_JZj-kjCUfHk9FvrmEuYpJYknmbam5eReFhGNDgzdLQ,25
|
|
|
12
12
|
thds/adls/errors.py,sha256=IQfkdLVg0feAUd-xQzkS4-3Lp5_Ld9V9-SqiXeWLRtA,1335
|
|
13
13
|
thds/adls/etag.py,sha256=ct7jpHhNFcKzbekn5rZ3m6DhjK48A7qOZGwDiHkc-pc,242
|
|
14
14
|
thds/adls/file_properties.py,sha256=JFkobkxcAaRGAh3TJ9mfc3X872gqrGuw4Xk_HSTj1sY,1582
|
|
15
|
-
thds/adls/fqn.py,sha256=
|
|
15
|
+
thds/adls/fqn.py,sha256=0zHmHhBWN7GEfKRB3fBC1NVhaiIHHifBdCRanyT01X8,5822
|
|
16
16
|
thds/adls/global_client.py,sha256=f4VJw5y_Yh__8gQUcdSYTh1aU6iEPlauMchVirSAwDQ,3716
|
|
17
17
|
thds/adls/impl.py,sha256=x1nSqc8W4NeuX8-JGOp2MRkK8ff6GnelTWedGxPs-qY,42494
|
|
18
18
|
thds/adls/md5.py,sha256=qOX4_7WUj1QkbH_IwREcQNHvvZccOj-HpHZBfsKn1gY,1846
|
|
19
|
-
thds/adls/meta.json,sha256=
|
|
19
|
+
thds/adls/meta.json,sha256=9mFILKWuoXbJo2n0GOcOki9KkeCM84bOwctQogfI-Jo,195
|
|
20
20
|
thds/adls/named_roots.py,sha256=7SLbAoQQpV_mrFZaUPjYoS-F9dxQxN5Hg4M3YPirF_w,751
|
|
21
21
|
thds/adls/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
thds/adls/ro_cache.py,sha256=F0uXol0t95mcRuBukNg3A7wt7XXQxpD5Sy09d9sl8f0,4825
|
|
23
23
|
thds/adls/sas_tokens.py,sha256=tO7uUh5EVp2F_NITzz1Vks78KCZwVxT7C0faLj8UJqA,1889
|
|
24
24
|
thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
|
|
25
25
|
thds/adls/source.py,sha256=1JYliDqafqpLewOQ2XMKp4T2WD7b5Rl7XYViyLYtzX8,2437
|
|
26
|
+
thds/adls/source_tree.py,sha256=CiyCwgItF186aOoYiICkWt2oMLGMD6v526HRi8XWHtM,2219
|
|
26
27
|
thds/adls/uri.py,sha256=pDH956p_VEHnjLLUnjWY6sGgRqksp9gdpc9KOW4gEP0,1205
|
|
27
28
|
thds/adls/resource/__init__.py,sha256=IZ7_aRf1b3jEp7wXOxqHop0gV2gUcf9SOLeEEjIWlCU,1669
|
|
28
|
-
thds/adls/resource/core.py,sha256=
|
|
29
|
+
thds/adls/resource/core.py,sha256=BVM91xsZ_B_CoGTc9DDD3FnGy8g6X-9eFpa86ZCzuZI,2717
|
|
29
30
|
thds/adls/resource/file_pointers.py,sha256=PLru_3lwut_ZvrX5Keu-wJkPOt5o7UGf-OOT4ixaXME,2049
|
|
30
31
|
thds/adls/resource/up_down.py,sha256=3uNlTvm2gVhSyYdQTBwsGecOgwtINQfINckR-awwV0Y,9907
|
|
31
32
|
thds/adls/tools/download.py,sha256=ZZ8t1g9MRRy-aENGjr10E4aAqyVkKq2OqpyH70pCD_8,965
|
|
32
33
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
33
34
|
thds/adls/tools/upload.py,sha256=eMk4pdug1aCMPDDWpIE3Zoq77i5APp9Uuh-sVCCDNJE,493
|
|
34
|
-
thds.adls-3.1.
|
|
35
|
-
thds.adls-3.1.
|
|
36
|
-
thds.adls-3.1.
|
|
37
|
-
thds.adls-3.1.
|
|
38
|
-
thds.adls-3.1.
|
|
35
|
+
thds.adls-3.1.20250218201547.dist-info/METADATA,sha256=EDR-b7AK1xPKvEq7rNK5xJgcWPhz32aL0mr5IwL645k,397
|
|
36
|
+
thds.adls-3.1.20250218201547.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
37
|
+
thds.adls-3.1.20250218201547.dist-info/entry_points.txt,sha256=uTqreT1AIwqJboMfLv5w6sviM8mNbAkln765gIjzoA4,152
|
|
38
|
+
thds.adls-3.1.20250218201547.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
39
|
+
thds.adls-3.1.20250218201547.dist-info/RECORD,,
|
|
File without changes
|
{thds.adls-3.1.20250217180321.dist-info → thds.adls-3.1.20250218201547.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds.adls-3.1.20250217180321.dist-info → thds.adls-3.1.20250218201547.dist-info}/top_level.txt
RENAMED
|
File without changes
|