thds.adls 4.2.20250910232237__py3-none-any.whl → 4.2.20250911213450__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.adls might be problematic. Click here for more details.
- thds/adls/copy.py +10 -8
- {thds_adls-4.2.20250910232237.dist-info → thds_adls-4.2.20250911213450.dist-info}/METADATA +1 -1
- {thds_adls-4.2.20250910232237.dist-info → thds_adls-4.2.20250911213450.dist-info}/RECORD +6 -6
- {thds_adls-4.2.20250910232237.dist-info → thds_adls-4.2.20250911213450.dist-info}/WHEEL +0 -0
- {thds_adls-4.2.20250910232237.dist-info → thds_adls-4.2.20250911213450.dist-info}/entry_points.txt +0 -0
- {thds_adls-4.2.20250910232237.dist-info → thds_adls-4.2.20250911213450.dist-info}/top_level.txt +0 -0
thds/adls/copy.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Functions for copying blobs across remote locations."""
|
|
2
2
|
|
|
3
|
+
import concurrent.futures
|
|
3
4
|
import datetime
|
|
4
5
|
import random
|
|
5
6
|
import time
|
|
@@ -12,6 +13,7 @@ from thds.core import cache, log, parallel, thunks
|
|
|
12
13
|
from .file_properties import exists, get_blob_properties, get_file_properties, is_directory
|
|
13
14
|
from .fqn import AdlsFqn
|
|
14
15
|
from .global_client import get_global_blob_container_client, get_global_blob_service_client
|
|
16
|
+
from .hashes import extract_hashes_from_props
|
|
15
17
|
from .sas_tokens import gen_blob_sas_token, get_user_delegation_key
|
|
16
18
|
from .uri import UriIsh, parse_any
|
|
17
19
|
|
|
@@ -55,15 +57,13 @@ def _copy_file(
|
|
|
55
57
|
dest.path
|
|
56
58
|
)
|
|
57
59
|
|
|
58
|
-
def
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
)
|
|
62
|
-
return True
|
|
63
|
-
return False
|
|
60
|
+
def hashes_exist_and_are_equal() -> bool:
|
|
61
|
+
src_blob_props = src_blob_client.get_blob_properties()
|
|
62
|
+
dest_blob_props = dest_blob_client.get_blob_properties()
|
|
63
|
+
return extract_hashes_from_props(src_blob_props) == extract_hashes_from_props(dest_blob_props)
|
|
64
64
|
|
|
65
65
|
if dest_blob_client.exists():
|
|
66
|
-
if
|
|
66
|
+
if hashes_exist_and_are_equal():
|
|
67
67
|
# no point in copying if the files are the same
|
|
68
68
|
logger.info(
|
|
69
69
|
"%s already exists with the same md5 as the file at %s, no copy will occur", dest, src
|
|
@@ -179,6 +179,8 @@ def copy_files(
|
|
|
179
179
|
# would be cool to do this async, but using threads for quicker dev
|
|
180
180
|
return list(
|
|
181
181
|
parallel.yield_results(
|
|
182
|
-
[thunks.thunking(copy_wrapper)(src, dest) for src, dest in src_dest_fqn_pairs]
|
|
182
|
+
[thunks.thunking(copy_wrapper)(src, dest) for src, dest in src_dest_fqn_pairs],
|
|
183
|
+
executor_cm=concurrent.futures.ThreadPoolExecutor(max_workers=30),
|
|
184
|
+
# max_workers=30 prevents hitting system thread count limits (speaking from experience)
|
|
183
185
|
)
|
|
184
186
|
)
|
|
@@ -5,7 +5,7 @@ thds/adls/_upload.py,sha256=mhTdWiQroaugYuwQg7R8CEgdfCYF4xvJthlsqO0jlnE,4692
|
|
|
5
5
|
thds/adls/abfss.py,sha256=nCFfcdwLiwtz_MdbrpLJ9WOhoz0zHIVxsf-tarorEwM,727
|
|
6
6
|
thds/adls/cached.py,sha256=up1F5JOVXdmwdZ8RAB2UDgiy6ooLg8IMULohBh75VpQ,3034
|
|
7
7
|
thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
|
|
8
|
-
thds/adls/copy.py,sha256=
|
|
8
|
+
thds/adls/copy.py,sha256=aD6AquUR8r5W9SXd6Nm1qPrFH_fYpLC5dZk6HjPJnSQ,6611
|
|
9
9
|
thds/adls/dbfs.py,sha256=pPAjbIZRKJsaXKQljDMUgqS_zy1yKeEZHGMueXbuv3g,2219
|
|
10
10
|
thds/adls/defaults.py,sha256=GGq5Pn4r-8cX4bZItp4nnwWAAz7S07pzPoOegw0y5Fw,676
|
|
11
11
|
thds/adls/download.py,sha256=LHxkv073T-pCmIRmiXIgYEXIEpEm_OIZKvNHp1e4_k4,19124
|
|
@@ -38,8 +38,8 @@ thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1
|
|
|
38
38
|
thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
|
|
39
39
|
thds/adls/tools/ls_fast.py,sha256=Nowc-efAL_Y4ybPwZzKIeh7KGIjfecRzdWvJZcBzq_8,585
|
|
40
40
|
thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
|
|
41
|
-
thds_adls-4.2.
|
|
42
|
-
thds_adls-4.2.
|
|
43
|
-
thds_adls-4.2.
|
|
44
|
-
thds_adls-4.2.
|
|
45
|
-
thds_adls-4.2.
|
|
41
|
+
thds_adls-4.2.20250911213450.dist-info/METADATA,sha256=zjbZ7L6pmwqDupZaXi91IOkyRg8MVUYFvhHCysIC23g,587
|
|
42
|
+
thds_adls-4.2.20250911213450.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
43
|
+
thds_adls-4.2.20250911213450.dist-info/entry_points.txt,sha256=rtVF0A2MMTYUsBScF6b3AlOuk2Vm02QK7Tc2bDcDpk0,200
|
|
44
|
+
thds_adls-4.2.20250911213450.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
45
|
+
thds_adls-4.2.20250911213450.dist-info/RECORD,,
|
|
File without changes
|
{thds_adls-4.2.20250910232237.dist-info → thds_adls-4.2.20250911213450.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_adls-4.2.20250910232237.dist-info → thds_adls-4.2.20250911213450.dist-info}/top_level.txt
RENAMED
|
File without changes
|