thds.mops 3.9.20251007062727__py3-none-any.whl → 3.9.20251008224110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

@@ -1,22 +1,45 @@
1
+ import subprocess
1
2
  import tempfile
2
3
 
4
+ import kubernetes
3
5
  from kubernetes import client, utils
6
+ from packaging import version
4
7
 
5
8
 
6
9
  def format_yaml(yaml_template_str: str, **template_values: str) -> str:
7
10
  return yaml_template_str.format(**template_values)
8
11
 
9
12
 
13
+ def kubectl_apply_file(yaml_path: str) -> None:
14
+ subprocess.run(["kubectl", "apply", "-f", yaml_path], check=True)
15
+
16
+
17
+ def kubectl_apply(yaml_string: str) -> None:
18
+ with tempfile.NamedTemporaryFile("w", prefix="kubectl-yaml") as f:
19
+ f.write(yaml_string)
20
+ f.flush()
21
+ kubectl_apply_file(f.name)
22
+
23
+
24
+ def apply_yaml(yaml_path: str) -> None:
25
+ if version.parse(kubernetes.__version__) < version.parse("32.0.0"):
26
+ kubectl_apply_file(yaml_path) # best effort
27
+ return
28
+
29
+ # NOTE: Prior to 32.0.0, this function doesn't actually server-side apply.
30
+ # https://github.com/kubernetes-client/python/pull/2252
31
+ # Hence the check above to use kubectl for older versions.
32
+ utils.create_from_yaml(client.ApiClient(), yaml_path)
33
+
34
+
10
35
  def create_yaml_template(yaml_str: str, **template_values: str) -> None:
11
36
  """Format a YAML template with the given keyword arguments, then apply it to the Kubernetes cluster.
12
37
 
13
38
  You must already have set up your SDK config.
14
39
 
15
- NOTE: This function doesn't actually apply, and can't until the next release of the K8S SDK:
16
- https://github.com/kubernetes-client/python/pull/2252
17
40
  """
18
41
  formatted_yaml = format_yaml(yaml_str, **template_values)
19
42
  with tempfile.NamedTemporaryFile("w", prefix="kubectl-yaml") as f:
20
43
  f.write(formatted_yaml)
21
44
  f.flush()
22
- utils.create_from_yaml(client.ApiClient(), f.name)
45
+ apply_yaml(f.name)
@@ -33,6 +33,7 @@ have a Source object returned to it while it performs low-level deserialization.
33
33
  """
34
34
 
35
35
  import io
36
+ import json
36
37
  import sys
37
38
  import typing as ty
38
39
  from functools import partial
@@ -71,19 +72,44 @@ def _hashref_uri(hash: hashing.Hash, type: ty.Literal["local", "remote"]) -> str
71
72
  return to_uri(local_hashref)
72
73
 
73
74
 
74
- def _read_hashref(hashref_uri: str) -> str:
75
+ class _HashrefMeta(ty.NamedTuple):
76
+ size: int
77
+
78
+ @classmethod
79
+ def empty(cls) -> "_HashrefMeta":
80
+ return cls(size=0)
81
+
82
+ def serialize(self) -> str:
83
+ serialized = json.dumps(self._asdict())
84
+ return serialized
85
+
86
+ @classmethod
87
+ def deserialize(cls, serialized: ty.Union[str, ty.Sequence[str]]) -> "_HashrefMeta":
88
+ s = serialized if isinstance(serialized, str) else serialized[0]
89
+ try:
90
+ return cls(**json.loads(s))
91
+ except json.JSONDecodeError:
92
+ logger.warning("Failed to deserialize hashref metadata '%s'", serialized)
93
+ return cls.empty()
94
+
95
+
96
+ def _read_hashref(hashref_uri: str) -> ty.Tuple[str, _HashrefMeta]:
75
97
  """Return URI represented by this hashref. Performs IO."""
76
98
  uri_bytes = io.BytesIO()
77
99
  lookup_blob_store(hashref_uri).readbytesinto(hashref_uri, uri_bytes)
78
- uri = uri_bytes.getvalue().decode()
100
+ content = uri_bytes.getvalue().decode()
101
+ uri, *rest = content.split("\n")
79
102
  assert uri, f"Hashref from {hashref_uri} is empty"
80
- return uri
103
+ if not rest:
104
+ return uri, _HashrefMeta.empty()
105
+ return uri, _HashrefMeta.deserialize(rest)
81
106
 
82
107
 
83
- def _write_hashref(hashref_uri: str, uri: str) -> None:
108
+ def _write_hashref(hashref_uri: str, uri: str, size: int) -> None:
84
109
  """Write URI to this hashref. Performs IO."""
85
110
  assert uri, f"Should never encode hashref ({hashref_uri}) pointing to empty URI"
86
- lookup_blob_store(hashref_uri).putbytes(hashref_uri, uri.encode(), type_hint="text/plain")
111
+ content = "\n".join([uri, _HashrefMeta(size=size).serialize()])
112
+ lookup_blob_store(hashref_uri).putbytes(hashref_uri, content.encode(), type_hint="text/plain")
87
113
 
88
114
 
89
115
  def source_from_hashref(hash: hashing.Hash) -> Source:
@@ -92,7 +118,9 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
92
118
  local_file_hashref_uri = _hashref_uri(hash, "local")
93
119
  remote_hashref_uri = _hashref_uri(hash, "remote")
94
120
 
95
- def remote_uri(allow_blob_not_found: bool = True) -> str:
121
+ def remote_uri_and_meta(
122
+ allow_blob_not_found: bool = True,
123
+ ) -> ty.Tuple[str, _HashrefMeta]:
96
124
  try:
97
125
  return _read_hashref(remote_hashref_uri)
98
126
  except Exception as e:
@@ -102,7 +130,7 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
102
130
  # 'remote' blob not found is sometimes fine, but anything else is weird
103
131
  # and we should raise.
104
132
  raise
105
- return ""
133
+ return "", _HashrefMeta.empty()
106
134
 
107
135
  try:
108
136
  # we might be on the same machine where this was originally invoked.
@@ -110,7 +138,9 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
110
138
  # Then, there's no need to bother grabbing the remote_uri
111
139
  # - but for debugging's sake, it's quite nice to actually
112
140
  # have the full remote URI as well even if we're ultimately going to use the local copy.
113
- return source.from_file(_read_hashref(local_file_hashref_uri), hash=hash, uri=remote_uri())
141
+ local_uri, _ = _read_hashref(local_file_hashref_uri)
142
+ remote_uri, _ = remote_uri_and_meta()
143
+ return source.from_file(local_uri, hash=hash, uri=remote_uri)
114
144
  except FileNotFoundError:
115
145
  # we are not on the same machine as the local ref. assume we need the remote URI.
116
146
  pass
@@ -120,14 +150,17 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
120
150
  raise
121
151
 
122
152
  # no local file, so we assume there must be a remote URI.
123
- return source.from_uri(remote_uri(False), hash=hash)
153
+ remote_uri, meta = remote_uri_and_meta(False)
154
+ return source.from_uri(remote_uri, hash=hash, size=meta.size)
124
155
 
125
156
 
126
- def _upload_and_create_remote_hashref(local_path: Path, remote_uri: str, hash: hashing.Hash) -> None:
157
+ def _upload_and_create_remote_hashref(
158
+ local_path: Path, remote_uri: str, hash: hashing.Hash, size: int
159
+ ) -> None:
127
160
  # exists only to provide a local (non-serializable) closure around local_path and remote_uri.
128
161
  lookup_blob_store(remote_uri).putfile(local_path, remote_uri)
129
162
  # make sure we never overwrite a hashref until it's actually going to be valid.
130
- _write_hashref(_hashref_uri(hash, "remote"), remote_uri)
163
+ _write_hashref(_hashref_uri(hash, "remote"), remote_uri, size)
131
164
 
132
165
 
133
166
  def _auto_remote_uri(hash: hashing.Hash) -> str:
@@ -155,7 +188,7 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
155
188
  deferred_work.add(
156
189
  __name__ + "-localhashref",
157
190
  source_.hash,
158
- partial(_write_hashref, _hashref_uri(source_.hash, "local"), str(local_path)),
191
+ partial(_write_hashref, _hashref_uri(source_.hash, "local"), str(local_path), source_.size),
159
192
  )
160
193
  # then also register pending upload - if the URI is a local file, we need to determine a
161
194
  # remote URI for this thing automagically; otherwise, use whatever was already
@@ -164,7 +197,9 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
164
197
  deferred_work.add(
165
198
  __name__ + "-remotehashref",
166
199
  source_.hash,
167
- partial(_upload_and_create_remote_hashref, local_path, remote_uri, source_.hash),
200
+ partial(
201
+ _upload_and_create_remote_hashref, local_path, remote_uri, source_.hash, source_.size
202
+ ),
168
203
  )
169
204
  else:
170
205
  # prepare to (later, if necessary) create a remote hashref, because this Source
@@ -172,7 +207,7 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
172
207
  deferred_work.add(
173
208
  __name__,
174
209
  source_.hash,
175
- partial(_write_hashref, _hashref_uri(source_.hash, "remote"), source_.uri),
210
+ partial(_write_hashref, _hashref_uri(source_.hash, "remote"), source_.uri, source_.size),
176
211
  )
177
212
 
178
213
  return hashing.Hash(algo=sys.intern(source_.hash.algo), bytes=source_.hash.bytes)
@@ -192,15 +227,17 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
192
227
  # just that mops must detect Sources in the return value and must force an upload on them.
193
228
  # In essence, this creates a bifurcated code path for Sources during serialization; if
194
229
  # we're "on the way out", we avoid uploading until it is clear that the data will be used
195
- # in a remote environment. Whereas "on the way back", we must always upload, and nothing
196
- # can or should be deferred; upload should happen at the time of serialization.
230
+ # in a remote environment. Whereas "on the way back", we must always upload -- there, we
231
+ # defer uploads until everything is serialized, then we perform all deferred uploads in
232
+ # parallel, prior to writing the serialized result.
197
233
  #
198
234
  # Nevertheless, a local caller should still be able to short-circuit the _download_ by
199
235
  # using a locally-created File, if on the same machine where the local file was created.
200
236
 
201
237
 
202
238
  class SourceResult(ty.NamedTuple):
203
- """Contains the fully-specified local URI and remote URI, plus (probably) a Hash.
239
+ """Contains the fully-specified local URI and remote URI, plus (probably) a Hash
240
+ and a size.
204
241
 
205
242
  Everything is defined right here. No need for any kind of dynamic lookup, and
206
243
  optimization buys us nothing, since memoization only operates on arguments.
@@ -210,6 +247,10 @@ class SourceResult(ty.NamedTuple):
210
247
  hash: ty.Optional[hashing.Hash]
211
248
  file_uri: str
212
249
 
250
+ size: int = 0
251
+ # instances of older versions of this namedtuple will be missing this field.
252
+ # we supply a default for backward-compatibility.
253
+
213
254
 
214
255
  class DuplicateSourceBasenameError(ValueError):
215
256
  """This is not a catchable error - it will be raised inside the mops result-wrapping
@@ -247,7 +288,7 @@ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = t
247
288
  else:
248
289
  file_uri = ""
249
290
  logger.debug("Creating a SourceResult for a URI that is presumed to already be uploaded.")
250
- return SourceResult(source_.uri, source_.hash, file_uri)
291
+ return SourceResult(source_.uri, source_.hash, file_uri, source_.size)
251
292
 
252
293
  # by definition, if this is a file URI, it now needs to be uploaded, because we could
253
294
  # be transferring back to an orchestrator on a different machine, but also because a
@@ -277,15 +318,17 @@ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = t
277
318
  partial(_put_file_to_blob_store, local_path, remote_uri),
278
319
  )
279
320
  # upload must _always_ happen on remotely-returned Sources, as detailed above.
280
- return SourceResult(remote_uri, source_.hash, source_.uri)
321
+ return SourceResult(remote_uri, source_.hash, source_.uri, source_.size)
281
322
 
282
323
 
283
- def source_from_source_result(remote_uri: str, hash: ty.Optional[hashing.Hash], file_uri: str) -> Source:
324
+ def source_from_source_result(
325
+ remote_uri: str, hash: ty.Optional[hashing.Hash], file_uri: str, size: int
326
+ ) -> Source:
284
327
  """Call when deserializing a remote function return value on the orchestrator side, to
285
328
  replace all SourceResults with the intended Source object.
286
329
  """
287
330
  if not file_uri:
288
- return source.from_uri(remote_uri, hash=hash)
331
+ return source.from_uri(remote_uri, hash=hash, size=size)
289
332
 
290
333
  local_path = source.path_from_uri(file_uri)
291
334
 
@@ -305,7 +348,7 @@ def source_from_source_result(remote_uri: str, hash: ty.Optional[hashing.Hash],
305
348
  logger.warning(
306
349
  f"Unable to reuse destination local path {local_path} when constructing Source {remote_uri}: {e}"
307
350
  )
308
- return source.from_uri(remote_uri, hash=hash)
351
+ return source.from_uri(remote_uri, hash=hash, size=size)
309
352
 
310
353
 
311
354
  def create_source_at_uri(filename: StrOrPath, destination_uri: str) -> Source:
@@ -14,7 +14,7 @@ from pathlib import Path
14
14
  from thds.core import hashing, log, source
15
15
 
16
16
  from ..core.script_support import add_main_module_function, get_main_module_function
17
- from ..core.source import source_from_hashref, source_from_source_result
17
+ from ..core.source import SourceResult, source_from_hashref, source_from_source_result
18
18
  from ..core.uris import get_bytes, lookup_blob_store
19
19
 
20
20
  logger = log.getLogger(__name__)
@@ -131,17 +131,15 @@ class UnpickleSourceHashrefArgument(ty.NamedTuple):
131
131
  return source_from_hashref(self.hash)
132
132
 
133
133
 
134
- class UnpickleSourceResult(ty.NamedTuple):
134
+ class UnpickleSourceResult(SourceResult):
135
135
  """Stability for this is not critical, as it will only ever exist in the result
136
136
  payload, which does not participate in memoization.
137
137
  """
138
138
 
139
- remote_uri: str
140
- hash: ty.Optional[hashing.Hash]
141
- file_uri: str
142
-
143
139
  def __call__(self) -> source.Source:
144
- return source_from_source_result(*self)
140
+ return source_from_source_result(
141
+ remote_uri=self.remote_uri, hash=self.hash, file_uri=self.file_uri, size=self.size
142
+ )
145
143
 
146
144
 
147
145
  class UnpickleFunctionWithLogicKey(ty.NamedTuple):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.mops
3
- Version: 3.9.20251007062727
3
+ Version: 3.9.20251008224110
4
4
  Summary: ML Ops tools for Trilliant Health
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
@@ -18,7 +18,7 @@ thds/mops/impure/runner.py,sha256=UI1NZWMZ_5TQHfFKLnoiSm2zDR3zCunTKFmJoybkyCo,28
18
18
  thds/mops/k8s/__init__.py,sha256=zl4GVcCFRvPscyo6gvv5Lx0OKB7d3QjtVFjYurnxMuE,764
19
19
  thds/mops/k8s/_launch.py,sha256=hgPty47CdwryPHKMmEnoxSsSvcSpXhHYSVYnLC2QJb0,10956
20
20
  thds/mops/k8s/_shared.py,sha256=MR-s6ijWUHZGjxK_fsOpHuRDB6kuofjo5xiIb7ul2VM,86
21
- thds/mops/k8s/apply_yaml.py,sha256=hVW6dIVbNdzHdbGlc2VAPGkdByv_rH2oPybyIm7tKIM,820
21
+ thds/mops/k8s/apply_yaml.py,sha256=zMiQ_k1KcdlIg6diqp6FJD_74I_01YD3cZWA9lqu1fo,1462
22
22
  thds/mops/k8s/auth.py,sha256=0zs4TQgkD6VPrhDD43xt7JGwP6uWf3ctySGLcPKN7iw,1691
23
23
  thds/mops/k8s/batching.py,sha256=Djt17ffxWyTq4Q7XcAKQdCe9JIIfPahHwm0wqgFqevI,8368
24
24
  thds/mops/k8s/config.py,sha256=_znocX5BW8kfG_Cbq6f3apx5FqSihD7Tmic-SBkVjMQ,2992
@@ -58,7 +58,7 @@ thds/mops/pure/core/pipeline_id_mask.py,sha256=AVAy06TdNAmivxGec1gahBYvkJCn7yn-g
58
58
  thds/mops/pure/core/script_support.py,sha256=4VCBL5AfGSHcZWpOxMw6nnAbQyk1B-979G_OjvUg9B0,953
59
59
  thds/mops/pure/core/serialize_big_objs.py,sha256=YcOS1ccs82ZWO7nTbeumErMzYVe4hgXCTsfvMggYmd8,2332
60
60
  thds/mops/pure/core/serialize_paths.py,sha256=JoVXFGSA68QbL4oY8tQbp9MoizTCKj_nPRCuA3i03i8,6122
61
- thds/mops/pure/core/source.py,sha256=R36ajrCU1JdWF-8iD8YqAiP-q39ypZqf2DeBsqC9lYo,15105
61
+ thds/mops/pure/core/source.py,sha256=i6SRgOFfkdaidx6uEq6poGFUU1zQWiqaXPIG-l5zY7Q,16580
62
62
  thds/mops/pure/core/types.py,sha256=_3gDwztDKV4Xeyw2jvyMRJAjmR6gRsmfYmsRCcZMUwI,5436
63
63
  thds/mops/pure/core/uris.py,sha256=qO9_f-ro7kax6haNOPTPe81-_aUSRFELeeZH4PMTTU4,2694
64
64
  thds/mops/pure/core/use_runner.py,sha256=m1Mu1XDr3xRf_u_VSiHfTG4TH6fnSg0IqwmtbLKG_oc,2103
@@ -88,7 +88,7 @@ thds/mops/pure/pickling/__init__.py,sha256=WNdG8PdJCk-kYaXkvvPa--hjYGoUlBXG3w2X8
88
88
  thds/mops/pure/pickling/_pickle.py,sha256=YB8xbqDiwdk8ccnVZ2_4kQn98V2JSrFqw2E3J-jEHlA,8081
89
89
  thds/mops/pure/pickling/memoize_only.py,sha256=oI5CMy6IEJc46Gb_BGWNUuAe3fysS7HxRSTajN0WssI,837
90
90
  thds/mops/pure/pickling/mprunner.py,sha256=VWYS_PXLgYJetK69CCZ0-b1109-QBHWssC0MskHww94,8831
91
- thds/mops/pure/pickling/pickles.py,sha256=CSlnjLssE0Ad8YzqyaKqWCSNyW5LiMFKiXO6hWAZmvU,5097
91
+ thds/mops/pure/pickling/pickles.py,sha256=KYkPexi5mGWjrv9uZxt4iWuBUPyYlME2FQIwRiPlPqc,5134
92
92
  thds/mops/pure/pickling/remote.py,sha256=7JXZRGnLI5y5dqElIDrhIlaRv6Q_zQ_78aqNhO7O4KY,8478
93
93
  thds/mops/pure/pickling/sha256_b64.py,sha256=HL0cPixHPZYuZDVDBscxsnI-3a2amWEfw-LseOX-PyY,2916
94
94
  thds/mops/pure/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -109,8 +109,8 @@ thds/mops/pure/tools/summarize/cli.py,sha256=7kDtn24ok8oBO3jFjlMmOK3jnZYpMoE_5Y8
109
109
  thds/mops/pure/tools/summarize/run_summary.py,sha256=w45qiQr7elrHDiK9Hgs85gtU3gwLuXa447ih1Y23BBY,5776
110
110
  thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
111
  thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
112
- thds_mops-3.9.20251007062727.dist-info/METADATA,sha256=XPhDxj2piPOxhXs_SV5dtp3fjsev-jY539-mM9fEu6U,2225
113
- thds_mops-3.9.20251007062727.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
114
- thds_mops-3.9.20251007062727.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
115
- thds_mops-3.9.20251007062727.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
116
- thds_mops-3.9.20251007062727.dist-info/RECORD,,
112
+ thds_mops-3.9.20251008224110.dist-info/METADATA,sha256=XSLkz2hWjPoo5hbsr8buF_n883pZ43NaHu5lkRjmngI,2225
113
+ thds_mops-3.9.20251008224110.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
114
+ thds_mops-3.9.20251008224110.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
115
+ thds_mops-3.9.20251008224110.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
116
+ thds_mops-3.9.20251008224110.dist-info/RECORD,,