thds.mops 3.9.20250919153256__py3-none-any.whl → 3.10.20251104012416__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

@@ -154,6 +154,10 @@ class ConfigTree(ty.Generic[V]):
154
154
  def __setitem__(self, key: str, value: V) -> None:
155
155
  self.setv(value, pathable=key)
156
156
 
157
+ def __contains__(self, key: str) -> bool:
158
+ """Only answers the specific question - does this exact key exist in the config?"""
159
+ return key in self.registry
160
+
157
161
  def load_config(self, config: ty.Mapping[str, ty.Any]) -> None:
158
162
  """Loads things with an inner key matching this name into the config.
159
163
 
@@ -1,22 +1,45 @@
1
+ import subprocess
1
2
  import tempfile
2
3
 
4
+ import kubernetes
3
5
  from kubernetes import client, utils
6
+ from packaging import version
4
7
 
5
8
 
6
9
  def format_yaml(yaml_template_str: str, **template_values: str) -> str:
7
10
  return yaml_template_str.format(**template_values)
8
11
 
9
12
 
13
+ def kubectl_apply_file(yaml_path: str) -> None:
14
+ subprocess.run(["kubectl", "apply", "-f", yaml_path], check=True)
15
+
16
+
17
+ def kubectl_apply(yaml_string: str) -> None:
18
+ with tempfile.NamedTemporaryFile("w", prefix="kubectl-yaml") as f:
19
+ f.write(yaml_string)
20
+ f.flush()
21
+ kubectl_apply_file(f.name)
22
+
23
+
24
+ def apply_yaml(yaml_path: str) -> None:
25
+ if version.parse(kubernetes.__version__) < version.parse("32.0.0"):
26
+ kubectl_apply_file(yaml_path) # best effort
27
+ return
28
+
29
+ # NOTE: Prior to 32.0.0, this function doesn't actually server-side apply.
30
+ # https://github.com/kubernetes-client/python/pull/2252
31
+ # Hence the check above to use kubectl for older versions.
32
+ utils.create_from_yaml(client.ApiClient(), yaml_path, apply=True)
33
+
34
+
10
35
  def create_yaml_template(yaml_str: str, **template_values: str) -> None:
11
36
  """Format a YAML template with the given keyword arguments, then apply it to the Kubernetes cluster.
12
37
 
13
38
  You must already have set up your SDK config.
14
39
 
15
- NOTE: This function doesn't actually apply, and can't until the next release of the K8S SDK:
16
- https://github.com/kubernetes-client/python/pull/2252
17
40
  """
18
41
  formatted_yaml = format_yaml(yaml_str, **template_values)
19
42
  with tempfile.NamedTemporaryFile("w", prefix="kubectl-yaml") as f:
20
43
  f.write(formatted_yaml)
21
44
  f.flush()
22
- utils.create_from_yaml(client.ApiClient(), f.name)
45
+ apply_yaml(f.name)
@@ -44,20 +44,20 @@ class _FutureInterpretationShim(ty.Generic[R_0, R]):
44
44
  def __hash__(self) -> int:
45
45
  return hash(self._id)
46
46
 
47
- def __call__(self, r_0: ty.Optional[R_0], last_seen_at: float) -> ty.Optional[Self]:
47
+ def interpret(self, r_0: ty.Optional[R_0], last_seen_at: float) -> ty.Optional[Self]:
48
48
  """First and foremost - this _must_ be treated as an object that the creator
49
49
  is ultimately responsible for calling on a semi-regular basis. It represents a
50
50
  likely deadlock for the holder of the Future if it is never called.
51
51
 
52
- Return False if the Future is still in progress and should not be unregistered.
53
- Return True if the Future is done and should be unregistered.
52
+ Return None if the Future is still in progress and should not be unregistered.
53
+ Return self if the Future is done and should be unregistered.
54
54
  """
55
55
  try:
56
56
  interpretation = self._interpreter(r_0, last_seen_at)
57
57
  if isinstance(interpretation, NotYetDone):
58
58
  return None # do nothing and do not unregister - the status is still in progress.
59
59
 
60
- self.future.set_result(interpretation)
60
+ self.future.set_result(interpretation) # resolved successfully!
61
61
  except Exception as e:
62
62
  self.future.set_exception(e)
63
63
 
@@ -71,7 +71,7 @@ K = ty.TypeVar("K") # Key type for the UncertainFuturesTracker
71
71
  class _FuturesState(ty.Generic[R_0]):
72
72
  """Represents a single 'observable' that may have multiple Futures (and therefore interpretations) associated with it."""
73
73
 
74
- futshims: list[_FutureInterpretationShim[R_0, ty.Any]]
74
+ futshims: set[_FutureInterpretationShim[R_0, ty.Any]]
75
75
  last_seen_at: float
76
76
 
77
77
 
@@ -94,10 +94,16 @@ class UncertainFuturesTracker(ty.Generic[K, R_0]):
94
94
  never resolve the Future, then a caller may be waiting for it forever. Therefore, we
95
95
  ask the original requestor of the Future to specify how long they are willing to wait
96
96
  to get a result, after which point we will resolve the Future as an exception.
97
+
98
+ Notably, once we have seen an object, we will not ever remove it from our tracking list.
99
+ This implies a certain amount of memory growth over time, but it avoids race conditions
100
+ between producers and consumers of the Futures.
97
101
  """
98
102
 
99
103
  def __init__(self, allowed_stale_seconds: float) -> None:
100
104
  self._keyed_futures_state = collections.OrderedDict[K, _FuturesState[R_0]]()
105
+ # ordered from least-recently-seen to most-recently-seen, so that we can easily garbage collect
106
+ # potentially stale Futures, which will be at the front of the OrderedDict.
101
107
  self._lock = threading.Lock() # i don't trust ordered dict operations to be thread-safe.
102
108
  self._check_stale_seconds = allowed_stale_seconds
103
109
 
@@ -105,16 +111,11 @@ class UncertainFuturesTracker(ty.Generic[K, R_0]):
105
111
  futshim = _FutureInterpretationShim(interpreter)
106
112
  with self._lock:
107
113
  if key not in self._keyed_futures_state:
108
- self._keyed_futures_state[key] = _FuturesState(
109
- [futshim],
110
- last_seen_at=official_timer() + self._check_stale_seconds,
111
- # we provide a double margin for objects that we have never seen before.
112
- )
113
- self._keyed_futures_state.move_to_end(key, last=False)
114
- # never seen and therefore should be at the beginning (most stale)
114
+ self._keyed_futures_state[key] = _FuturesState({futshim}, last_seen_at=official_timer())
115
+ # never seen and therefore should be at the end (least stale)
115
116
  else:
116
117
  # maintain our ordered dict so we can handle garbage collection of stale Futures.
117
- self._keyed_futures_state[key].futshims.append(futshim)
118
+ self._keyed_futures_state[key].futshims.add(futshim)
118
119
 
119
120
  return futshim.future
120
121
 
@@ -126,10 +127,10 @@ class UncertainFuturesTracker(ty.Generic[K, R_0]):
126
127
  If `key` is None, we will update all Futures that have been created so far.
127
128
  """
128
129
 
129
- def check_resolution(fut_state: _FuturesState[R_0], inner_r_0: ty.Optional[R_0]) -> None:
130
+ def interpret_event(fut_state: _FuturesState[R_0], inner_r_0: ty.Optional[R_0]) -> None:
130
131
  for future_shim_that_is_done in core.parallel.yield_results(
131
132
  [
132
- core.thunks.thunking(futshim)(inner_r_0, fut_state.last_seen_at)
133
+ core.thunks.thunking(futshim.interpret)(inner_r_0, fut_state.last_seen_at)
133
134
  for futshim in fut_state.futshims
134
135
  ],
135
136
  progress_logger=core.log.getLogger(__name__).debug,
@@ -137,24 +138,33 @@ class UncertainFuturesTracker(ty.Generic[K, R_0]):
137
138
  ):
138
139
  if future_shim_that_is_done is not None:
139
140
  # the Future is done, so we can remove it from the list of Futures.
140
- fut_state.futshims.remove(future_shim_that_is_done)
141
+ fut_state.futshims.discard(future_shim_that_is_done)
141
142
 
142
143
  if key is not None:
143
144
  with self._lock:
144
145
  if key not in self._keyed_futures_state:
145
- self._keyed_futures_state[key] = _FuturesState(list(), last_seen_at=official_timer())
146
+ self._keyed_futures_state[key] = _FuturesState(set(), last_seen_at=official_timer())
146
147
  else:
147
148
  # maintain our ordered dict so we can handle garbage collection of stale Futures.
148
149
  self._keyed_futures_state.move_to_end(key)
149
150
  self._keyed_futures_state[key].last_seen_at = official_timer()
150
151
 
151
- fut_state = self._keyed_futures_state[key]
152
- check_resolution(fut_state, r_0)
152
+ if fut_state := self._keyed_futures_state.get(key):
153
+ interpret_event(fut_state, r_0)
153
154
 
155
+ #
154
156
  # 'garbage collect' any Futures that haven't been updated in a while.
155
- for futs_state in self._keyed_futures_state.values():
156
- if futs_state.last_seen_at + self._check_stale_seconds < official_timer():
157
- check_resolution(futs_state, None)
157
+ #
158
+ with self._lock:
159
+ safe_futures = list(self._keyed_futures_state.values())
160
+ # this avoids holding the lock, but also avoids RuntimeError: OrderedDict mutated during iteration
161
+ now = official_timer()
162
+ for futs_state in safe_futures:
163
+ if now > futs_state.last_seen_at + self._check_stale_seconds:
164
+ interpret_event(futs_state, None)
165
+ # None means we have no new information about the object.
166
+ # the interpreter must decide what to do with that, plus the last seen time.
167
+
158
168
  else: # these are ordered, so once we see one that's not stale, we can stop checking.
159
169
  # this prevents us from having to do O(N) checks for every update.
160
170
  break
thds/mops/k8s/watch.py CHANGED
@@ -112,9 +112,14 @@ def callback_events(
112
112
  ) -> None:
113
113
  """Suitable for use with a daemon thread."""
114
114
  for namespace, obj, event in event_yielder:
115
- should_exit = on_event(namespace, obj, event)
116
- if should_exit:
117
- break
115
+ try:
116
+ should_exit = on_event(namespace, obj, event)
117
+ if should_exit:
118
+ break
119
+ except Exception:
120
+ logger.exception(
121
+ "Exception in k8s watch event callback [probably a bug in mops] - continuing..."
122
+ )
118
123
 
119
124
 
120
125
  def _default_get_name(obj: ty.Any) -> str:
thds/mops/parallel.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import concurrent
2
2
  import typing as ty
3
3
 
4
- from thds.core import parallel
4
+ from thds.core import inspect, log, parallel
5
5
  from thds.core.parallel import ( # noqa: F401; for backward-compatibility, since these came from here originally.
6
6
  IterableWithLen,
7
7
  IteratorWithLen,
@@ -29,6 +29,7 @@ def parallel_yield_results(
29
29
  error_fmt=ERROR,
30
30
  success_fmt=DONE,
31
31
  named=named,
32
+ progress_logger=log.getLogger(inspect.caller_module_name(__name__) or __name__).info,
32
33
  )
33
34
 
34
35
 
@@ -50,6 +50,11 @@ class _MagicApi:
50
50
  pipeline_id: str = "",
51
51
  calls: ty.Collection[ty.Callable] = tuple(),
52
52
  ) -> ty.Callable[[ty.Callable[P, R]], sauce.Magic[P, R]]:
53
+ """This is the main pure.magic() decorator. It is designed to be applied directly
54
+ at the site of function definition, i.e. on the `def`. We dynamically capture
55
+ the fully qualified name of the function being decorated and use that to
56
+ look up the appropriate 'magic' configuration at the time of each call to the function.
57
+ """
53
58
  return sauce.make_magic(_get_config(), shim_or_builder, blob_root, pipeline_id, calls)
54
59
 
55
60
  @staticmethod
@@ -58,10 +63,29 @@ class _MagicApi:
58
63
  *,
59
64
  blob_root: uris.UriResolvable = "",
60
65
  pipeline_id: str = "",
66
+ config_path: str = "",
61
67
  ) -> ty.Callable[[F], F]: # cleaner type for certain use cases
68
+ """This alternative API is designed for more dynamic use cases - rather than
69
+ decorating a function def directly, you can use this to create a more generic
70
+ decorator that can be applied within other code (not at module-level).
71
+
72
+ However, you must never apply pure.magic.deco to the same function from multiple
73
+ places, as this means that you have multiple different uses sharing the same
74
+ configuration path, which will lead to subtle bugs.
75
+
76
+ We attempt to detect this and raise an error if it happens. If it does, you should
77
+ provide an explicit unique config_path for each usage.
78
+ """
62
79
  return ty.cast(
63
80
  ty.Callable[[F], F],
64
- _MagicApi.__call__(shim_or_builder, blob_root=blob_root, pipeline_id=pipeline_id),
81
+ sauce.make_magic(
82
+ _get_config(),
83
+ shim_or_builder,
84
+ blob_root=blob_root,
85
+ pipeline_id=pipeline_id,
86
+ calls=tuple(),
87
+ config_path=config_path,
88
+ ),
65
89
  )
66
90
 
67
91
  @staticmethod
@@ -122,6 +146,10 @@ class _MagicApi:
122
146
  m_config.blob_root.load_config(all_config)
123
147
  m_config.pipeline_id.load_config(all_config)
124
148
 
149
+ @staticmethod
150
+ def config_path(func: ty.Callable) -> str:
151
+ return sauce.make_magic_config_path(func)
152
+
125
153
 
126
154
  magic: ty.Final = _MagicApi()
127
155
  # we only instantiate this so we can have a call to magic() that is not __init__.
@@ -6,7 +6,7 @@ import typing as ty
6
6
 
7
7
  from typing_extensions import ParamSpec
8
8
 
9
- from thds.core import futures, stack_context
9
+ from thds.core import futures, log, stack_context
10
10
  from thds.mops._utils import config_tree
11
11
 
12
12
  from ..core import file_blob_store, pipeline_id, pipeline_id_mask, uris
@@ -38,6 +38,8 @@ class _MagicConfig:
38
38
  self.shim_bld[""] = make_builder(samethread_shim) # default Shim
39
39
  self.pipeline_id[""] = "magic" # default pipeline_id
40
40
 
41
+ self.all_registered_paths: set[str] = set()
42
+
41
43
  def __repr__(self) -> str:
42
44
  return f"MagicConfig(shim_bld={self.shim_bld}, blob_root={self.blob_root}, pipeline_id={self.pipeline_id})"
43
45
 
@@ -68,15 +70,17 @@ class Magic(ty.Generic[P, R]):
68
70
  self,
69
71
  func: ty.Callable[P, R],
70
72
  config: _MagicConfig,
73
+ magic_config_path: str,
71
74
  calls: ty.Collection[ty.Callable] = frozenset(),
72
75
  ):
73
76
  functools.update_wrapper(self, func)
74
- self._func_config_path = full_name_and_callable(func)[0].replace("--", ".")
77
+ self._magic_config_path = magic_config_path
75
78
 
76
79
  self.config = config
80
+
77
81
  if p_id := pipeline_id_mask.extract_from_docstr(func, require=False):
78
82
  # this allows the docstring pipeline id to become 'the most specific' config.
79
- self.config.pipeline_id.setv(p_id, self._func_config_path)
83
+ self.config.pipeline_id.setv(p_id, self._magic_config_path)
80
84
  self._shim = stack_context.StackContext[ty.Union[None, ShimName, ShimOrBuilder]](
81
85
  str(func) + "_SHIM", None # none means nothing has been set stack-local
82
86
  )
@@ -104,7 +108,7 @@ class Magic(ty.Generic[P, R]):
104
108
  def _shim_builder_or_off(self) -> ty.Optional[ShimBuilder]:
105
109
  if stack_local_shim := self._shim():
106
110
  return to_shim_builder(stack_local_shim)
107
- return self.config.shim_bld.getv(self._func_config_path)
111
+ return self.config.shim_bld.getv(self._magic_config_path)
108
112
 
109
113
  def _is_off(self) -> bool:
110
114
  return self._shim_builder_or_off is None
@@ -117,11 +121,11 @@ class Magic(ty.Generic[P, R]):
117
121
  return sb(f, args, kwargs)
118
122
 
119
123
  def _get_blob_root(self) -> str:
120
- return self.config.blob_root.getv(self._func_config_path)()
124
+ return self.config.blob_root.getv(self._magic_config_path)()
121
125
 
122
126
  @property
123
127
  def _pipeline_id(self) -> str:
124
- return self.config.pipeline_id.getv(self._func_config_path)
128
+ return self.config.pipeline_id.getv(self._magic_config_path)
125
129
 
126
130
  def submit(self, *args: P.args, **kwargs: P.kwargs) -> futures.PFuture[R]:
127
131
  """A futures-based interface that doesn't block on the result of the wrapped
@@ -138,26 +142,80 @@ class Magic(ty.Generic[P, R]):
138
142
 
139
143
  def __repr__(self) -> str:
140
144
  return (
141
- f"Magic('{self._func_config_path}', shim={self._shim_builder_or_off},"
145
+ f"Magic('{self._magic_config_path}', shim={self._shim_builder_or_off},"
142
146
  f" blob_root='{self._get_blob_root()}', pipeline_id='{self._pipeline_id}')"
143
147
  )
144
148
 
145
149
 
150
+ def make_magic_config_path(func: ty.Callable) -> str:
151
+ return full_name_and_callable(func)[0].replace("--", ".")
152
+
153
+
154
+ class MagicReregistrationError(ValueError):
155
+ pass
156
+
157
+
146
158
  def make_magic(
147
159
  config: _MagicConfig,
148
160
  shim_or_builder: ty.Union[ShimName, ShimOrBuilder, None],
149
161
  blob_root: uris.UriResolvable,
150
162
  pipeline_id: str,
151
163
  calls: ty.Collection[ty.Callable],
164
+ *,
165
+ config_path: str = "",
152
166
  ) -> ty.Callable[[ty.Callable[P, R]], Magic[P, R]]:
167
+ """config_path is a dot-separated path that must be unique throughout your application.
168
+
169
+ By default it will be set to the thds.other.module.function_name of the decorated function.
170
+ """
171
+ error_logger = log.auto(__name__, "thds.mops.pure._magic.api").error
172
+ err_msg = (
173
+ "You are probably using pure.magic(.deco) from multiple places on the same function. You will need to specify a unique config_path for each usage."
174
+ if not config_path
175
+ else f"You supplied a config_path ({config_path}) but you reused the decorator on different functions with the same config_path."
176
+ )
177
+ err_msg += " See the comment in mops.pure._magic.sauce for more details."
178
+
179
+ def must_not_remagic_same_func(msg: str) -> None:
180
+ error_logger(f"{msg}; {err_msg}")
181
+ # if you see either of the above messages, consider whether you really need the magic
182
+ # configurability of pure.magic, or whether it might be better to instantiate and use
183
+ # MemoizingPicklingRunner directly without configurability. The reason overwriting
184
+ # configs, by applying pure.magic to the same callable from more than one location is
185
+ # disallowed is that you will get 'spooky action at a distance' between different parts
186
+ # of your application that are overwriting the base config for the same function.
187
+ # Another approach would be to use a wrapper `def` with a static @pure.magic decorator
188
+ # on it that calls the inner function, so that they are completely different functions
189
+ # as far as pure.magic is concerned.
190
+ raise MagicReregistrationError(msg)
191
+
192
+ magic_config_path_cache: set[str] = set()
193
+ # the reason for this cache is that there are cases where you may want to apply the _exact
194
+ # same_ base config to the same function multiple times - just for ease of use. And
195
+ # since this is the exact same config, we should allow it and treat it as though you
196
+ # had only applied it once. Of course, if you later try to configure these
197
+ # applications separately, it won't work - these _are_ the same magic config path, so
198
+ # they're bound together via that config.
199
+
153
200
  def deco(func: ty.Callable[P, R]) -> Magic[P, R]:
154
- fully_qualified_name = full_name_and_callable(func)[0].replace("--", ".")
201
+ fully_qualified_name = make_magic_config_path(func)
202
+ magic_config_path = config_path or fully_qualified_name
203
+
204
+ def deco_being_reapplied_to_same_func() -> bool:
205
+ return fully_qualified_name in magic_config_path_cache
206
+
207
+ if magic_config_path in config.all_registered_paths and not deco_being_reapplied_to_same_func():
208
+ must_not_remagic_same_func(f"Cannot re-register {magic_config_path} using pure.magic")
209
+
155
210
  if shim_or_builder is not None:
156
- config.shim_bld[fully_qualified_name] = to_shim_builder(shim_or_builder)
211
+ config.shim_bld[magic_config_path] = to_shim_builder(shim_or_builder)
157
212
  if blob_root: # could be empty string
158
- config.blob_root[fully_qualified_name] = uris.to_lazy_uri(blob_root)
213
+ config.blob_root[magic_config_path] = uris.to_lazy_uri(blob_root)
159
214
  if pipeline_id: # could be empty string
160
- config.pipeline_id[fully_qualified_name] = pipeline_id
161
- return Magic(func, config, calls)
215
+ config.pipeline_id[magic_config_path] = pipeline_id
216
+
217
+ magic_config_path_cache.add(fully_qualified_name)
218
+ config.all_registered_paths.add(magic_config_path)
219
+ return Magic(func, config, magic_config_path, calls)
162
220
 
163
221
  return deco
@@ -33,6 +33,7 @@ have a Source object returned to it while it performs low-level deserialization.
33
33
  """
34
34
 
35
35
  import io
36
+ import json
36
37
  import sys
37
38
  import typing as ty
38
39
  from functools import partial
@@ -71,19 +72,44 @@ def _hashref_uri(hash: hashing.Hash, type: ty.Literal["local", "remote"]) -> str
71
72
  return to_uri(local_hashref)
72
73
 
73
74
 
74
- def _read_hashref(hashref_uri: str) -> str:
75
+ class _HashrefMeta(ty.NamedTuple):
76
+ size: int
77
+
78
+ @classmethod
79
+ def empty(cls) -> "_HashrefMeta":
80
+ return cls(size=0)
81
+
82
+ def serialize(self) -> str:
83
+ serialized = json.dumps(self._asdict())
84
+ return serialized
85
+
86
+ @classmethod
87
+ def deserialize(cls, serialized: ty.Union[str, ty.Sequence[str]]) -> "_HashrefMeta":
88
+ s = serialized if isinstance(serialized, str) else serialized[0]
89
+ try:
90
+ return cls(**json.loads(s))
91
+ except json.JSONDecodeError:
92
+ logger.warning("Failed to deserialize hashref metadata '%s'", serialized)
93
+ return cls.empty()
94
+
95
+
96
+ def _read_hashref(hashref_uri: str) -> ty.Tuple[str, _HashrefMeta]:
75
97
  """Return URI represented by this hashref. Performs IO."""
76
98
  uri_bytes = io.BytesIO()
77
99
  lookup_blob_store(hashref_uri).readbytesinto(hashref_uri, uri_bytes)
78
- uri = uri_bytes.getvalue().decode()
100
+ content = uri_bytes.getvalue().decode()
101
+ uri, *rest = content.split("\n")
79
102
  assert uri, f"Hashref from {hashref_uri} is empty"
80
- return uri
103
+ if not rest:
104
+ return uri, _HashrefMeta.empty()
105
+ return uri, _HashrefMeta.deserialize(rest)
81
106
 
82
107
 
83
- def _write_hashref(hashref_uri: str, uri: str) -> None:
108
+ def _write_hashref(hashref_uri: str, uri: str, size: int) -> None:
84
109
  """Write URI to this hashref. Performs IO."""
85
110
  assert uri, f"Should never encode hashref ({hashref_uri}) pointing to empty URI"
86
- lookup_blob_store(hashref_uri).putbytes(hashref_uri, uri.encode(), type_hint="text/plain")
111
+ content = "\n".join([uri, _HashrefMeta(size=size).serialize()])
112
+ lookup_blob_store(hashref_uri).putbytes(hashref_uri, content.encode(), type_hint="text/plain")
87
113
 
88
114
 
89
115
  def source_from_hashref(hash: hashing.Hash) -> Source:
@@ -92,7 +118,9 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
92
118
  local_file_hashref_uri = _hashref_uri(hash, "local")
93
119
  remote_hashref_uri = _hashref_uri(hash, "remote")
94
120
 
95
- def remote_uri(allow_blob_not_found: bool = True) -> str:
121
+ def remote_uri_and_meta(
122
+ allow_blob_not_found: bool = True,
123
+ ) -> ty.Tuple[str, _HashrefMeta]:
96
124
  try:
97
125
  return _read_hashref(remote_hashref_uri)
98
126
  except Exception as e:
@@ -102,7 +130,7 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
102
130
  # 'remote' blob not found is sometimes fine, but anything else is weird
103
131
  # and we should raise.
104
132
  raise
105
- return ""
133
+ return "", _HashrefMeta.empty()
106
134
 
107
135
  try:
108
136
  # we might be on the same machine where this was originally invoked.
@@ -110,7 +138,9 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
110
138
  # Then, there's no need to bother grabbing the remote_uri
111
139
  # - but for debugging's sake, it's quite nice to actually
112
140
  # have the full remote URI as well even if we're ultimately going to use the local copy.
113
- return source.from_file(_read_hashref(local_file_hashref_uri), hash=hash, uri=remote_uri())
141
+ local_uri, _ = _read_hashref(local_file_hashref_uri)
142
+ remote_uri, _ = remote_uri_and_meta()
143
+ return source.from_file(local_uri, hash=hash, uri=remote_uri)
114
144
  except FileNotFoundError:
115
145
  # we are not on the same machine as the local ref. assume we need the remote URI.
116
146
  pass
@@ -120,14 +150,17 @@ def source_from_hashref(hash: hashing.Hash) -> Source:
120
150
  raise
121
151
 
122
152
  # no local file, so we assume there must be a remote URI.
123
- return source.from_uri(remote_uri(False), hash=hash)
153
+ remote_uri, meta = remote_uri_and_meta(False)
154
+ return source.from_uri(remote_uri, hash=hash, size=meta.size)
124
155
 
125
156
 
126
- def _upload_and_create_remote_hashref(local_path: Path, remote_uri: str, hash: hashing.Hash) -> None:
157
+ def _upload_and_create_remote_hashref(
158
+ local_path: Path, remote_uri: str, hash: hashing.Hash, size: int
159
+ ) -> None:
127
160
  # exists only to provide a local (non-serializable) closure around local_path and remote_uri.
128
161
  lookup_blob_store(remote_uri).putfile(local_path, remote_uri)
129
162
  # make sure we never overwrite a hashref until it's actually going to be valid.
130
- _write_hashref(_hashref_uri(hash, "remote"), remote_uri)
163
+ _write_hashref(_hashref_uri(hash, "remote"), remote_uri, size)
131
164
 
132
165
 
133
166
  def _auto_remote_uri(hash: hashing.Hash) -> str:
@@ -155,7 +188,7 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
155
188
  deferred_work.add(
156
189
  __name__ + "-localhashref",
157
190
  source_.hash,
158
- partial(_write_hashref, _hashref_uri(source_.hash, "local"), str(local_path)),
191
+ partial(_write_hashref, _hashref_uri(source_.hash, "local"), str(local_path), source_.size),
159
192
  )
160
193
  # then also register pending upload - if the URI is a local file, we need to determine a
161
194
  # remote URI for this thing automagically; otherwise, use whatever was already
@@ -164,7 +197,9 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
164
197
  deferred_work.add(
165
198
  __name__ + "-remotehashref",
166
199
  source_.hash,
167
- partial(_upload_and_create_remote_hashref, local_path, remote_uri, source_.hash),
200
+ partial(
201
+ _upload_and_create_remote_hashref, local_path, remote_uri, source_.hash, source_.size
202
+ ),
168
203
  )
169
204
  else:
170
205
  # prepare to (later, if necessary) create a remote hashref, because this Source
@@ -172,7 +207,7 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
172
207
  deferred_work.add(
173
208
  __name__,
174
209
  source_.hash,
175
- partial(_write_hashref, _hashref_uri(source_.hash, "remote"), source_.uri),
210
+ partial(_write_hashref, _hashref_uri(source_.hash, "remote"), source_.uri, source_.size),
176
211
  )
177
212
 
178
213
  return hashing.Hash(algo=sys.intern(source_.hash.algo), bytes=source_.hash.bytes)
@@ -192,15 +227,17 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
192
227
  # just that mops must detect Sources in the return value and must force an upload on them.
193
228
  # In essence, this creates a bifurcated code path for Sources during serialization; if
194
229
  # we're "on the way out", we avoid uploading until it is clear that the data will be used
195
- # in a remote environment. Whereas "on the way back", we must always upload, and nothing
196
- # can or should be deferred; upload should happen at the time of serialization.
230
+ # in a remote environment. Whereas "on the way back", we must always upload -- there, we
231
+ # defer uploads until everything is serialized, then we perform all deferred uploads in
232
+ # parallel, prior to writing the serialized result.
197
233
  #
198
234
  # Nevertheless, a local caller should still be able to short-circuit the _download_ by
199
235
  # using a locally-created File, if on the same machine where the local file was created.
200
236
 
201
237
 
202
238
  class SourceResult(ty.NamedTuple):
203
- """Contains the fully-specified local URI and remote URI, plus (probably) a Hash.
239
+ """Contains the fully-specified local URI and remote URI, plus (probably) a Hash
240
+ and a size.
204
241
 
205
242
  Everything is defined right here. No need for any kind of dynamic lookup, and
206
243
  optimization buys us nothing, since memoization only operates on arguments.
@@ -210,6 +247,10 @@ class SourceResult(ty.NamedTuple):
210
247
  hash: ty.Optional[hashing.Hash]
211
248
  file_uri: str
212
249
 
250
+ size: int = 0
251
+ # instances of older versions of this namedtuple will be missing this field.
252
+ # we supply a default for backward-compatibility.
253
+
213
254
 
214
255
  class DuplicateSourceBasenameError(ValueError):
215
256
  """This is not a catchable error - it will be raised inside the mops result-wrapping
@@ -247,7 +288,7 @@ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = t
247
288
  else:
248
289
  file_uri = ""
249
290
  logger.debug("Creating a SourceResult for a URI that is presumed to already be uploaded.")
250
- return SourceResult(source_.uri, source_.hash, file_uri)
291
+ return SourceResult(source_.uri, source_.hash, file_uri, source_.size)
251
292
 
252
293
  # by definition, if this is a file URI, it now needs to be uploaded, because we could
253
294
  # be transferring back to an orchestrator on a different machine, but also because a
@@ -277,15 +318,17 @@ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = t
277
318
  partial(_put_file_to_blob_store, local_path, remote_uri),
278
319
  )
279
320
  # upload must _always_ happen on remotely-returned Sources, as detailed above.
280
- return SourceResult(remote_uri, source_.hash, source_.uri)
321
+ return SourceResult(remote_uri, source_.hash, source_.uri, source_.size)
281
322
 
282
323
 
283
- def source_from_source_result(remote_uri: str, hash: ty.Optional[hashing.Hash], file_uri: str) -> Source:
324
+ def source_from_source_result(
325
+ remote_uri: str, hash: ty.Optional[hashing.Hash], file_uri: str, size: int
326
+ ) -> Source:
284
327
  """Call when deserializing a remote function return value on the orchestrator side, to
285
328
  replace all SourceResults with the intended Source object.
286
329
  """
287
330
  if not file_uri:
288
- return source.from_uri(remote_uri, hash=hash)
331
+ return source.from_uri(remote_uri, hash=hash, size=size)
289
332
 
290
333
  local_path = source.path_from_uri(file_uri)
291
334
 
@@ -305,7 +348,7 @@ def source_from_source_result(remote_uri: str, hash: ty.Optional[hashing.Hash],
305
348
  logger.warning(
306
349
  f"Unable to reuse destination local path {local_path} when constructing Source {remote_uri}: {e}"
307
350
  )
308
- return source.from_uri(remote_uri, hash=hash)
351
+ return source.from_uri(remote_uri, hash=hash, size=size)
309
352
 
310
353
 
311
354
  def create_source_at_uri(filename: StrOrPath, destination_uri: str) -> Source:
@@ -14,7 +14,7 @@ from pathlib import Path
14
14
  from thds.core import hashing, log, source
15
15
 
16
16
  from ..core.script_support import add_main_module_function, get_main_module_function
17
- from ..core.source import source_from_hashref, source_from_source_result
17
+ from ..core.source import SourceResult, source_from_hashref, source_from_source_result
18
18
  from ..core.uris import get_bytes, lookup_blob_store
19
19
 
20
20
  logger = log.getLogger(__name__)
@@ -131,17 +131,15 @@ class UnpickleSourceHashrefArgument(ty.NamedTuple):
131
131
  return source_from_hashref(self.hash)
132
132
 
133
133
 
134
- class UnpickleSourceResult(ty.NamedTuple):
134
+ class UnpickleSourceResult(SourceResult):
135
135
  """Stability for this is not critical, as it will only ever exist in the result
136
136
  payload, which does not participate in memoization.
137
137
  """
138
138
 
139
- remote_uri: str
140
- hash: ty.Optional[hashing.Hash]
141
- file_uri: str
142
-
143
139
  def __call__(self) -> source.Source:
144
- return source_from_source_result(*self)
140
+ return source_from_source_result(
141
+ remote_uri=self.remote_uri, hash=self.hash, file_uri=self.file_uri, size=self.size
142
+ )
145
143
 
146
144
 
147
145
  class UnpickleFunctionWithLogicKey(ty.NamedTuple):
@@ -5,7 +5,7 @@ import pickle
5
5
  import typing as ty
6
6
  from pathlib import Path
7
7
 
8
- from thds.core import config, log, pickle_visit, source
8
+ from thds.core import config, files, log, pickle_visit, source
9
9
  from thds.mops.pure.core.memo import function_memospace
10
10
  from thds.mops.pure.core.metadata import get_invoked_by
11
11
 
@@ -152,10 +152,7 @@ def log_function_execution(
152
152
  log_entry["uris_in_rvalue"] = sorted(source_uris)
153
153
 
154
154
  try:
155
- assert not log_file.exists(), f"Log file '{log_file}' should not already exist"
156
- with log_file.open("w") as f:
157
- json.dump(log_entry, f, indent=2)
155
+ with files.atomic_text_writer(log_file) as file:
156
+ json.dump(log_entry, file, indent=2)
158
157
  except Exception:
159
- logger.info(
160
- f"Unable to write mops function invocation log file at '{log_file}' - you may have multiple callers for the same invocation"
161
- )
158
+ logger.exception(f"Failed to write mops function invocation log file at '{log_file}'")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.mops
3
- Version: 3.9.20250919153256
3
+ Version: 3.10.20251104012416
4
4
  Summary: ML Ops tools for Trilliant Health
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
@@ -2,10 +2,10 @@ thds/mops/__about__.py,sha256=IW_3wy8wEdrVducoBdiVgD7oYOY4J8yO1ezBaPtrc6U,215
2
2
  thds/mops/__init__.py,sha256=dbujDxVVfHpWP7OyfjEdNVHLtKx99rsNQPYfjTKn5Lg,127
3
3
  thds/mops/_compat.py,sha256=fO1YYEu6LF1re-VXl4P_8RXXLeKt4BgI9NTlHTgNpLk,357
4
4
  thds/mops/config.py,sha256=T62YskXvzAfxNgpq2jMatHgoIHfRV_z4cvJ8Rl_TZ6E,2015
5
- thds/mops/parallel.py,sha256=ynzT7uEtF1sfUi7NS9fHg1I5EhQtSs3p5hNzP3xwAWE,931
5
+ thds/mops/parallel.py,sha256=F6vUhSTO--CY82vyYtWFtspmgd0RxoxQ_EUrCnTm93Q,1039
6
6
  thds/mops/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  thds/mops/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- thds/mops/_utils/config_tree.py,sha256=wMwkw81FDOo2Ld9rEmtobZMW2ZCUmxWSI2Bp_p0pwa8,7151
8
+ thds/mops/_utils/config_tree.py,sha256=Q9mPAAolYPqKe6dkvfetg-wmVARyD7WOx123OLQ8_sU,7326
9
9
  thds/mops/_utils/exception.py,sha256=Itj6ceieCdGrKZ2JdW_DIM88Wgvvw104cfbH1RNn6Go,394
10
10
  thds/mops/_utils/locked_cache.py,sha256=ROIkwu-_FcXlNyCreWQeE5cyL9XrNW7drWsolTgeajM,2523
11
11
  thds/mops/_utils/names.py,sha256=tPPaXCyduUXqmbdvIg3ygevERnKM3YIs868BeaKX5XY,824
@@ -18,7 +18,7 @@ thds/mops/impure/runner.py,sha256=UI1NZWMZ_5TQHfFKLnoiSm2zDR3zCunTKFmJoybkyCo,28
18
18
  thds/mops/k8s/__init__.py,sha256=zl4GVcCFRvPscyo6gvv5Lx0OKB7d3QjtVFjYurnxMuE,764
19
19
  thds/mops/k8s/_launch.py,sha256=hgPty47CdwryPHKMmEnoxSsSvcSpXhHYSVYnLC2QJb0,10956
20
20
  thds/mops/k8s/_shared.py,sha256=MR-s6ijWUHZGjxK_fsOpHuRDB6kuofjo5xiIb7ul2VM,86
21
- thds/mops/k8s/apply_yaml.py,sha256=hVW6dIVbNdzHdbGlc2VAPGkdByv_rH2oPybyIm7tKIM,820
21
+ thds/mops/k8s/apply_yaml.py,sha256=cGjnMkJ3Ny_D9CgN5FrHAiRZx8VRzA_U5AY4OTD8WxA,1474
22
22
  thds/mops/k8s/auth.py,sha256=0zs4TQgkD6VPrhDD43xt7JGwP6uWf3ctySGLcPKN7iw,1691
23
23
  thds/mops/k8s/batching.py,sha256=Djt17ffxWyTq4Q7XcAKQdCe9JIIfPahHwm0wqgFqevI,8368
24
24
  thds/mops/k8s/config.py,sha256=_znocX5BW8kfG_Cbq6f3apx5FqSihD7Tmic-SBkVjMQ,2992
@@ -31,16 +31,16 @@ thds/mops/k8s/namespace.py,sha256=Z6trVTU9WFashto4PqIhTcxu-foOF93W0TpgqCU7WIA,38
31
31
  thds/mops/k8s/node_selection.py,sha256=Gy2Jz8IxZblg2LmtGg8-MtKI4RmXz2AMXqFPP8OQyu0,2065
32
32
  thds/mops/k8s/retry.py,sha256=JVfP304kItpLs5nrONHE5UWkVWlrFGlV_oFQqhq3zHg,2846
33
33
  thds/mops/k8s/too_old_resource_version.py,sha256=S7ltVA-LrxUpQ8Q__AB0nQmezN8Mmnx5oKK62_baAKI,1500
34
- thds/mops/k8s/uncertain_future.py,sha256=60v9yVlhnCDN_yUv8l4Z4KafR4TsTGxN7dprkGI8pQQ,7152
34
+ thds/mops/k8s/uncertain_future.py,sha256=_ix-4EqZE_MY5sbLjT-lQ9GIa7woQ3iunPVtDapIhi8,7752
35
35
  thds/mops/k8s/wait_job.py,sha256=_X5lSn-3CE4V-_ra0kF1WtxkAiOgqSom8mU1-0hhMio,2445
36
36
  thds/mops/k8s/warn_image_backoff.py,sha256=ls_zLSnRbJjO4ICjq1Rk21EXh190l2dT6nKg-PT8Das,1934
37
- thds/mops/k8s/watch.py,sha256=4LBLZ9s9hp4jSOr3OTBDMWWCEDedgxsUoYt89zCLLsw,14020
37
+ thds/mops/k8s/watch.py,sha256=nH9HBoRCvt8FRhUav9at71QnTCh0plWga3rp7aUYb1E,14212
38
38
  thds/mops/k8s/tools/krsync.py,sha256=us7pXX0-bRMwD2oAno7Z6BJcPs6FgaUabHW0STyQJYg,1773
39
39
  thds/mops/k8s/tools/krsync.sh,sha256=fWgwkdzWnJeTbzEA_uBiIIi-bNU4nXAYj3dNovyRluU,747
40
40
  thds/mops/pure/__init__.py,sha256=3xLimQ2JWdeq1YgPs7bPwlwOspzPRwaR2w2KX7vfJU0,1624
41
41
  thds/mops/pure/_magic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- thds/mops/pure/_magic/api.py,sha256=kSlediIZQYsmeHB8plP6osjvUuSEVW4NWdY9ADia12Y,5094
43
- thds/mops/pure/_magic/sauce.py,sha256=xLaseOhoGgntmz6nZdj2te85sOaOiWExBLMGdeNmKsI,6870
42
+ thds/mops/pure/_magic/api.py,sha256=eG1NEl9FR_FE1yLfN0hiLd0uLWu-gKzw1FCIJR5pMbw,6424
43
+ thds/mops/pure/_magic/sauce.py,sha256=BfzQfEnQarcLuTbKyCKLU8F6BtJ-v6kn1SRIf675cTc,9804
44
44
  thds/mops/pure/_magic/shims.py,sha256=CXN8wlHv039oKRzDtp5YFDlwGXmmaheWLCi2I95gSeM,1212
45
45
  thds/mops/pure/adls/__init__.py,sha256=fw67xxwnizBurScMa-_zWb94lo5gamEVRt27V4bR0jc,54
46
46
  thds/mops/pure/adls/_files.py,sha256=9m35Y4elWF0DjgAXVp4oi5CaY6fXWt8n67PilWxWJns,821
@@ -58,7 +58,7 @@ thds/mops/pure/core/pipeline_id_mask.py,sha256=AVAy06TdNAmivxGec1gahBYvkJCn7yn-g
58
58
  thds/mops/pure/core/script_support.py,sha256=4VCBL5AfGSHcZWpOxMw6nnAbQyk1B-979G_OjvUg9B0,953
59
59
  thds/mops/pure/core/serialize_big_objs.py,sha256=YcOS1ccs82ZWO7nTbeumErMzYVe4hgXCTsfvMggYmd8,2332
60
60
  thds/mops/pure/core/serialize_paths.py,sha256=JoVXFGSA68QbL4oY8tQbp9MoizTCKj_nPRCuA3i03i8,6122
61
- thds/mops/pure/core/source.py,sha256=R36ajrCU1JdWF-8iD8YqAiP-q39ypZqf2DeBsqC9lYo,15105
61
+ thds/mops/pure/core/source.py,sha256=i6SRgOFfkdaidx6uEq6poGFUU1zQWiqaXPIG-l5zY7Q,16580
62
62
  thds/mops/pure/core/types.py,sha256=_3gDwztDKV4Xeyw2jvyMRJAjmR6gRsmfYmsRCcZMUwI,5436
63
63
  thds/mops/pure/core/uris.py,sha256=qO9_f-ro7kax6haNOPTPe81-_aUSRFELeeZH4PMTTU4,2694
64
64
  thds/mops/pure/core/use_runner.py,sha256=m1Mu1XDr3xRf_u_VSiHfTG4TH6fnSg0IqwmtbLKG_oc,2103
@@ -88,7 +88,7 @@ thds/mops/pure/pickling/__init__.py,sha256=WNdG8PdJCk-kYaXkvvPa--hjYGoUlBXG3w2X8
88
88
  thds/mops/pure/pickling/_pickle.py,sha256=YB8xbqDiwdk8ccnVZ2_4kQn98V2JSrFqw2E3J-jEHlA,8081
89
89
  thds/mops/pure/pickling/memoize_only.py,sha256=oI5CMy6IEJc46Gb_BGWNUuAe3fysS7HxRSTajN0WssI,837
90
90
  thds/mops/pure/pickling/mprunner.py,sha256=VWYS_PXLgYJetK69CCZ0-b1109-QBHWssC0MskHww94,8831
91
- thds/mops/pure/pickling/pickles.py,sha256=CSlnjLssE0Ad8YzqyaKqWCSNyW5LiMFKiXO6hWAZmvU,5097
91
+ thds/mops/pure/pickling/pickles.py,sha256=KYkPexi5mGWjrv9uZxt4iWuBUPyYlME2FQIwRiPlPqc,5134
92
92
  thds/mops/pure/pickling/remote.py,sha256=7JXZRGnLI5y5dqElIDrhIlaRv6Q_zQ_78aqNhO7O4KY,8478
93
93
  thds/mops/pure/pickling/sha256_b64.py,sha256=HL0cPixHPZYuZDVDBscxsnI-3a2amWEfw-LseOX-PyY,2916
94
94
  thds/mops/pure/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -106,11 +106,11 @@ thds/mops/pure/tools/sha256_b64_addressed.py,sha256=SECAiw3xSqpsrBBZix0MgJRTQrbH
106
106
  thds/mops/pure/tools/stress.py,sha256=N7C8kLpaGbImeEYlT5jsEl1metvsUu8cnfyQ8vFN0H8,2541
107
107
  thds/mops/pure/tools/summarize/__init__.py,sha256=MSmt_5Xg84uHqzTN38JwgseJK8rsJn_11A8WD99VtEo,61
108
108
  thds/mops/pure/tools/summarize/cli.py,sha256=7kDtn24ok8oBO3jFjlMmOK3jnZYpMoE_5Y8fmDH8Imc,11524
109
- thds/mops/pure/tools/summarize/run_summary.py,sha256=w45qiQr7elrHDiK9Hgs85gtU3gwLuXa447ih1Y23BBY,5776
109
+ thds/mops/pure/tools/summarize/run_summary.py,sha256=glEN_YxUGADzp2Ofvr4ZDeHvnZ1znNR7HD7EATn1sPI,5644
110
110
  thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
111
  thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
112
- thds_mops-3.9.20250919153256.dist-info/METADATA,sha256=qFAGHQOOzR874kPcYKsg8bfhifIqP3czu_CtsFF0Jgo,2225
113
- thds_mops-3.9.20250919153256.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
114
- thds_mops-3.9.20250919153256.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
115
- thds_mops-3.9.20250919153256.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
116
- thds_mops-3.9.20250919153256.dist-info/RECORD,,
112
+ thds_mops-3.10.20251104012416.dist-info/METADATA,sha256=3SGjSyqzuAPZqDy7pJ9V90TlWz6VzcVlpL3MQKEmXBk,2226
113
+ thds_mops-3.10.20251104012416.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
114
+ thds_mops-3.10.20251104012416.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
115
+ thds_mops-3.10.20251104012416.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
116
+ thds_mops-3.10.20251104012416.dist-info/RECORD,,