thds.adls 4.3.20251008013223__py3-none-any.whl → 4.3.20251008224101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.adls might be problematic. Click here for more details.

thds/adls/_upload.py CHANGED
@@ -38,17 +38,22 @@ def _try_default_hash(data: hashes.AnyStrSrc) -> ty.Optional[hashing.Hash]:
38
38
  return None
39
39
 
40
40
 
41
+ UploadSrc = ty.Union[Path, bytes, ty.IO[bytes], ty.Iterable[bytes]]
42
+
43
+
44
+ def upload_src_len(upload_src: UploadSrc, default: int = 0) -> int:
45
+ if isinstance(upload_src, Path) and upload_src.exists():
46
+ return upload_src.stat().st_size
47
+ try:
48
+ return len(upload_src) # type: ignore
49
+ except TypeError as te:
50
+ logger.debug(f"failed to get length? {repr(te)} for {upload_src!r}")
51
+ return default
52
+
53
+
41
54
  def _too_small_to_skip_upload(data: hashes.AnyStrSrc, min_size_for_remote_check: int) -> bool:
42
- def _len() -> int:
43
- if isinstance(data, Path) and data.exists():
44
- return data.stat().st_size
45
- try:
46
- return len(data) # type: ignore
47
- except TypeError as te:
48
- logger.debug(f"failed to get length? {repr(te)} for {data}")
49
- return min_size_for_remote_check + 1
50
-
51
- return _len() < min_size_for_remote_check
55
+ len_ = upload_src_len(data) or min_size_for_remote_check + 1
56
+ return len_ < min_size_for_remote_check
52
57
 
53
58
 
54
59
  class UploadDecision(ty.NamedTuple):
thds/adls/source.py CHANGED
@@ -30,7 +30,9 @@ def _adls_uri_source_download_handler(uri: str) -> ty.Optional[source.Downloader
30
30
  source.register_download_handler("thds.adls", _adls_uri_source_download_handler)
31
31
 
32
32
 
33
- def from_adls(uri_or_fqn: ty.Union[str, AdlsFqn], hash: ty.Optional[Hash] = None) -> source.Source:
33
+ def from_adls(
34
+ uri_or_fqn: ty.Union[str, AdlsFqn], hash: ty.Optional[Hash] = None, size: int = 0
35
+ ) -> source.Source:
34
36
  """Flexible, public interface to creating Sources from any ADLS-like reference.
35
37
 
36
38
  Does NOT automatically fetch a checksumming hash from the ADLS URI if it's not
@@ -40,7 +42,7 @@ def from_adls(uri_or_fqn: ty.Union[str, AdlsFqn], hash: ty.Optional[Hash] = None
40
42
  r_fqn = resolve_any(uri_or_fqn)
41
43
  if not r_fqn:
42
44
  raise ValueError(f"Could not resolve {uri_or_fqn} to an ADLS FQN")
43
- return source.Source(str(r_fqn), hash)
45
+ return source.Source(str(r_fqn), hash, size)
44
46
 
45
47
 
46
48
  source.register_from_uri_handler(
@@ -55,13 +57,15 @@ def get_with_hash(fqn_or_uri: ty.Union[AdlsFqn, str]) -> source.Source:
55
57
  """
56
58
  fqn = AdlsFqn.parse(fqn_or_uri) if isinstance(fqn_or_uri, str) else fqn_or_uri
57
59
  with blob_not_found_translation(fqn):
58
- uri_hashes = hashes.extract_hashes_from_props(get_file_properties(fqn))
60
+ props = get_file_properties(fqn)
61
+ uri_hashes = hashes.extract_hashes_from_props(props)
59
62
  if not uri_hashes:
60
63
  raise ValueError(
61
64
  f"ADLS file {fqn} must have a hash to use this function. "
62
65
  "If you know the hash, use `from_adls` with the hash parameter."
63
66
  )
64
- return from_adls(fqn, next(iter(uri_hashes.values())))
67
+ size = int(props.get("size")) or 0
68
+ return from_adls(fqn, next(iter(uri_hashes.values())), size)
65
69
 
66
70
 
67
71
  def with_md5b64(uri_or_fqn: ty.Union[str, AdlsFqn], *, md5b64: str = "") -> source.Source:
thds/adls/source_tree.py CHANGED
@@ -60,7 +60,7 @@ def from_path(adls_path: uri.UriIsh, match_suffix: str = "") -> SourceTree:
60
60
  container_root = root_fqn.root()
61
61
  return SourceTree(
62
62
  sources=[
63
- source.from_adls(container_root / blob_meta.path, hash=blob_meta.hash)
63
+ source.from_adls(container_root / blob_meta.path, hash=blob_meta.hash, size=blob_meta.size)
64
64
  for blob_meta in list_blob_meta(container_client, root_fqn.path, match_suffix=match_suffix)
65
65
  ],
66
66
  higher_logical_root=fqn.split(root_fqn)[-1],
thds/adls/upload.py CHANGED
@@ -15,7 +15,7 @@ from thds.core import files, fretry, link, log, scope, source, tmp
15
15
 
16
16
  from . import azcopy, hashes
17
17
  from ._progress import report_upload_progress
18
- from ._upload import upload_decision_and_metadata
18
+ from ._upload import UploadSrc, upload_decision_and_metadata, upload_src_len
19
19
  from .conf import UPLOAD_FILE_MAX_CONCURRENCY
20
20
  from .file_lock import file_lock
21
21
  from .fqn import AdlsFqn
@@ -26,9 +26,6 @@ logger = log.getLogger(__name__)
26
26
  _SLOW_CONNECTION_WORKAROUND = 14400 # seconds
27
27
 
28
28
 
29
- UploadSrc = ty.Union[Path, bytes, ty.IO[ty.AnyStr], ty.Iterable[bytes]]
30
-
31
-
32
29
  def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Optional[Path]:
33
30
  @scope.bound
34
31
  def _try_write_through() -> bool:
@@ -41,8 +38,8 @@ def _write_through_local_cache(local_cache_path: Path, data: UploadSrc) -> ty.Op
41
38
  out = scope.enter(tmp.temppath_same_fs(local_cache_path))
42
39
  if hasattr(data, "read") and hasattr(data, "seek"):
43
40
  with open(out, "wb") as f:
44
- f.write(data.read()) # type: ignore
45
- data.seek(0) # type: ignore
41
+ f.write(data.read())
42
+ data.seek(0)
46
43
  link.link_or_copy(out, local_cache_path)
47
44
  return True
48
45
 
@@ -107,6 +104,7 @@ def upload(
107
104
  blob_container_client = get_global_blob_container_client(dest_.sa, dest_.container)
108
105
  blob_client = blob_container_client.get_blob_client(dest_.path)
109
106
  decision = upload_decision_and_metadata(blob_client.get_blob_properties, src) # type: ignore [arg-type]
107
+ n_bytes = upload_src_len(src, default=0)
110
108
 
111
109
  def source_from_meta() -> source.Source:
112
110
  best_hash = next(iter(hashes.extract_hashes_from_metadata(decision.metadata)), None)
@@ -114,17 +112,14 @@ def upload(
114
112
  assert best_hash, "A hash should always be calculable for a local path."
115
113
  return source.from_file(src, hash=best_hash, uri=str(dest_))
116
114
 
117
- return source.from_uri(str(dest_), hash=best_hash)
115
+ return source.from_uri(str(dest_), hash=best_hash, size=n_bytes)
118
116
 
119
117
  if decision.upload_required:
120
118
  # set up some bookkeeping
121
- n_bytes = None # if we pass 0 to upload_blob, it truncates the write now
122
119
  bytes_src: ty.Union[bytes, ty.IO, ty.Iterable[bytes]]
123
120
  if isinstance(src, Path):
124
- n_bytes = src.stat().st_size
125
121
  bytes_src = scope.enter(open(src, "rb"))
126
122
  elif isinstance(src, bytes):
127
- n_bytes = len(src)
128
123
  bytes_src = src
129
124
  else:
130
125
  bytes_src = src
@@ -132,7 +127,7 @@ def upload(
132
127
  if "metadata" in upload_data_kwargs:
133
128
  decision.metadata.update(upload_data_kwargs.pop("metadata"))
134
129
 
135
- if azcopy.upload.should_use_azcopy(n_bytes or 0) and isinstance(src, Path):
130
+ if azcopy.upload.should_use_azcopy(n_bytes) and isinstance(src, Path):
136
131
  logger.info("Using azcopy to upload %s to %s", src, dest_)
137
132
  try:
138
133
  azcopy.upload.run(
@@ -140,7 +135,7 @@ def upload(
140
135
  src, dest_, content_type=content_type, metadata=decision.metadata, overwrite=True
141
136
  ),
142
137
  dest_,
143
- n_bytes or 0,
138
+ n_bytes,
144
139
  )
145
140
  return source_from_meta()
146
141
 
@@ -158,9 +153,11 @@ def upload(
158
153
  # This is both faster, as well as simpler to reason about, and
159
154
  # in fact was the behavior I had been assuming all along...
160
155
  blob_client.upload_blob(
161
- report_upload_progress(ty.cast(ty.IO, bytes_src), str(dest_), n_bytes or 0),
156
+ report_upload_progress(ty.cast(ty.IO, bytes_src), str(dest_), n_bytes),
162
157
  overwrite=True,
163
- length=n_bytes,
158
+ length=(
159
+ n_bytes if n_bytes > 0 else None
160
+ ), # if we pass 0 to upload_blob, it truncates the write now
164
161
  content_settings=upload_content_settings,
165
162
  connection_timeout=_SLOW_CONNECTION_WORKAROUND,
166
163
  max_concurrency=UPLOAD_FILE_MAX_CONCURRENCY(),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.adls
3
- Version: 4.3.20251008013223
3
+ Version: 4.3.20251008224101
4
4
  Summary: ADLS tools
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  License: MIT
@@ -1,7 +1,7 @@
1
1
  thds/adls/__init__.py,sha256=Z450uhM5BIxgve9qkz_Lw7biVWFKp3Uz_iKaSPYEf1Y,1073
2
2
  thds/adls/_fork_protector.py,sha256=WML1Ul-G_MCziBOyRp6GoNq1IGktn8-OQVjny1IZ1bs,950
3
3
  thds/adls/_progress.py,sha256=0cmFZ2sOEogQY25AkMAhcunXO69FdlbkD1rU_zmLAp0,6648
4
- thds/adls/_upload.py,sha256=mhTdWiQroaugYuwQg7R8CEgdfCYF4xvJthlsqO0jlnE,4692
4
+ thds/adls/_upload.py,sha256=rN-nuRZur9VvSUywaQezUi_qedj-xU30fTBG6rasuIA,4853
5
5
  thds/adls/abfss.py,sha256=nCFfcdwLiwtz_MdbrpLJ9WOhoz0zHIVxsf-tarorEwM,727
6
6
  thds/adls/cached.py,sha256=up1F5JOVXdmwdZ8RAB2UDgiy6ooLg8IMULohBh75VpQ,3034
7
7
  thds/adls/conf.py,sha256=nTw3X1ilC3A_905jZH-rWXFsESeHAKQn5IghvfX2VIo,1991
@@ -24,9 +24,9 @@ thds/adls/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  thds/adls/ro_cache.py,sha256=P-UVFZhqnE5wojqYmRVWZcqjl-pM1xVMm9VAm3nXlnA,4769
25
25
  thds/adls/sas_tokens.py,sha256=mArbB_GYohevOmArw_1gKqVUWpv6kG8Hsbvdrhbtnbg,1957
26
26
  thds/adls/shared_credential.py,sha256=-x42aXoIM001KW59oS8PpuXQd4-F2vg-1gB6OMHlpk4,4602
27
- thds/adls/source.py,sha256=8HVMYuxDn1XYGwFFSBowMlvQ6r2Jm2CQlpu4h85JvsE,2559
28
- thds/adls/source_tree.py,sha256=gl2JLjxAduo4cGQBb8LqBnmRHHk2wqIC5yt-sqkXOEo,2589
29
- thds/adls/upload.py,sha256=7eWPpPuIgCBb3Svk05K1UNv0376q8wDTQkpTaKLtg-w,6694
27
+ thds/adls/source.py,sha256=5t9bc3tUxdh4QdbRvvVd9xF9_Rv4v7CDAqE09M3rE1g,2657
28
+ thds/adls/source_tree.py,sha256=FqVXgvfYPiowrWhRsXBItjvB7t41JRI3sCFVAHxjwgI,2610
29
+ thds/adls/upload.py,sha256=XJvygYzn1r6pZAl460s5j-kY9dgfQ6qeoddID7R6OQ0,6621
30
30
  thds/adls/uri.py,sha256=9MXuW_KfpPvzBc4ERxuTJ3vvi_6yr7e1kMAW9mx2zXM,1414
31
31
  thds/adls/azcopy/__init__.py,sha256=qn2dmT92EHcrtaQ8uwRoUgvtF6Fu3NQbhZItOBdIBmY,45
32
32
  thds/adls/azcopy/download.py,sha256=FOtYyYh7ZXNWNdkj04yTV26lxcKOVj-YhS2p_EclYxA,6526
@@ -38,8 +38,8 @@ thds/adls/tools/download.py,sha256=CW2cWbCRdUqisVVVoqqvqk5Ved7pPGTkwnZj3uV0jy4,1
38
38
  thds/adls/tools/ls.py,sha256=OgEaIfTK359twlZIj-A0AW_nv81Z6zi0b9Tw6OJJfWA,1083
39
39
  thds/adls/tools/ls_fast.py,sha256=Nowc-efAL_Y4ybPwZzKIeh7KGIjfecRzdWvJZcBzq_8,585
40
40
  thds/adls/tools/upload.py,sha256=5WyWkpuVp2PETZ3O3ODlq8LXszSHU73ZMnIDZXPJdC8,442
41
- thds_adls-4.3.20251008013223.dist-info/METADATA,sha256=YNBtKknKx9Hv62k3anREQ5VBI6_KY5LoZ6h68aNLMt8,587
42
- thds_adls-4.3.20251008013223.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- thds_adls-4.3.20251008013223.dist-info/entry_points.txt,sha256=rtVF0A2MMTYUsBScF6b3AlOuk2Vm02QK7Tc2bDcDpk0,200
44
- thds_adls-4.3.20251008013223.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
45
- thds_adls-4.3.20251008013223.dist-info/RECORD,,
41
+ thds_adls-4.3.20251008224101.dist-info/METADATA,sha256=cz-SynGzdU2LP2ndI7NyyY5o0JpOHUGOqU0vpxt69tg,587
42
+ thds_adls-4.3.20251008224101.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ thds_adls-4.3.20251008224101.dist-info/entry_points.txt,sha256=rtVF0A2MMTYUsBScF6b3AlOuk2Vm02QK7Tc2bDcDpk0,200
44
+ thds_adls-4.3.20251008224101.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
45
+ thds_adls-4.3.20251008224101.dist-info/RECORD,,