thds.mops 3.9.20250721225429__py3-none-any.whl → 3.9.20250722150738__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. thds/mops/impure/runner.py +1 -1
  2. thds/mops/k8s/__init__.py +3 -1
  3. thds/mops/k8s/{launch.py → _launch.py} +56 -57
  4. thds/mops/k8s/batching.py +198 -0
  5. thds/mops/k8s/config.py +1 -1
  6. thds/mops/k8s/counts.py +28 -0
  7. thds/mops/k8s/job_future.py +109 -0
  8. thds/mops/k8s/jobs.py +4 -0
  9. thds/mops/k8s/logging.py +37 -5
  10. thds/mops/k8s/uncertain_future.py +160 -0
  11. thds/mops/k8s/watch.py +120 -62
  12. thds/mops/pure/__init__.py +2 -1
  13. thds/mops/pure/_magic/sauce.py +11 -3
  14. thds/mops/pure/_magic/shims.py +2 -2
  15. thds/mops/pure/core/deferred_work.py +0 -8
  16. thds/mops/pure/core/entry/runner_registry.py +1 -10
  17. thds/mops/pure/core/lock/__init__.py +1 -0
  18. thds/mops/pure/core/lock/_acquire.py +2 -2
  19. thds/mops/pure/core/lock/maintain.py +22 -3
  20. thds/mops/pure/core/lock/write.py +19 -19
  21. thds/mops/pure/core/memo/__init__.py +1 -1
  22. thds/mops/pure/core/memo/results.py +5 -4
  23. thds/mops/pure/core/use_runner.py +21 -7
  24. thds/mops/pure/pickling/mprunner.py +21 -14
  25. thds/mops/pure/pickling/pickles.py +19 -8
  26. thds/mops/pure/pickling/remote.py +3 -1
  27. thds/mops/pure/runner/get_results.py +106 -0
  28. thds/mops/pure/runner/local.py +58 -87
  29. thds/mops/pure/runner/shim_builder.py +7 -7
  30. thds/mops/pure/runner/simple_shims.py +7 -0
  31. thds/mops/pure/runner/types.py +15 -4
  32. thds/mops/pure/tools/summarize/run_summary.py +9 -8
  33. {thds_mops-3.9.20250721225429.dist-info → thds_mops-3.9.20250722150738.dist-info}/METADATA +1 -1
  34. {thds_mops-3.9.20250721225429.dist-info → thds_mops-3.9.20250722150738.dist-info}/RECORD +37 -32
  35. {thds_mops-3.9.20250721225429.dist-info → thds_mops-3.9.20250722150738.dist-info}/WHEEL +0 -0
  36. {thds_mops-3.9.20250721225429.dist-info → thds_mops-3.9.20250722150738.dist-info}/entry_points.txt +0 -0
  37. {thds_mops-3.9.20250721225429.dist-info → thds_mops-3.9.20250722150738.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,160 @@
1
+ import collections
2
+ import threading
3
+ import time
4
+ import typing as ty
5
+
6
+ # we use concurrent.futures.Future as an implementation detail, but it's communicated
7
+ # as core.futures.PFuture to give us the flexibility to change the implementation later if needed.
8
+ from concurrent.futures import Future
9
+ from dataclasses import dataclass
10
+ from uuid import uuid4
11
+
12
+ from typing_extensions import Self
13
+
14
+ from thds import core
15
+
16
+ R_0 = ty.TypeVar("R_0", contravariant=True) # R-naught - the thing that might resolve a Future.
17
+ # a value for this type may never be None.
18
+
19
+ R = ty.TypeVar("R")
20
+ # the Result type of the Future. These are allowed to be None, since some Futures may
21
+ # resolve but not return a value.
22
+
23
+
24
+ class NotYetDone:
25
+ pass
26
+
27
+
28
+ _LastSeenAt = float # type alias for the last seen time of the Future, in seconds since epoch
29
+
30
+
31
+ FutureInterpreter = ty.Callable[[ty.Optional[R_0], _LastSeenAt], ty.Union[R, NotYetDone]]
32
+ # a FutureInterpreter is a function that takes an object R_0 and the time.monotonic() at
33
+ # which it was last seen, and returns either NotYetDone (if the status is still in progress) or
34
+ # the actual Future result of type R, or, if the status is failure,
35
+ # _raises_ an appropriate Exception.
36
+
37
+
38
+ class _FutureInterpretationShim(ty.Generic[R_0, R]):
39
+ def __init__(self, interpreter: FutureInterpreter[R_0, ty.Union[NotYetDone, R]]) -> None:
40
+ self.future = Future[R]()
41
+ self._interpreter = interpreter
42
+ self._id = uuid4().hex # has an id so it can be hashed and therefore easily found in a set
43
+
44
+ def __hash__(self) -> int:
45
+ return hash(self._id)
46
+
47
+ def __call__(self, r_0: ty.Optional[R_0], last_seen_at: float) -> ty.Optional[Self]:
48
+ """First and foremost - this _must_ be treated as an object that the creator
49
+ is ultimately responsible for calling on a semi-regular basis. It represents a
50
+ likely deadlock for the holder of the Future if it is never called.
51
+
52
+ Return False if the Future is still in progress and should not be unregistered.
53
+ Return True if the Future is done and should be unregistered.
54
+ """
55
+ try:
56
+ interpretation = self._interpreter(r_0, last_seen_at)
57
+ if isinstance(interpretation, NotYetDone):
58
+ return None # do nothing and do not unregister - the status is still in progress.
59
+
60
+ self.future.set_result(interpretation)
61
+ except Exception as e:
62
+ self.future.set_exception(e)
63
+
64
+ return self
65
+
66
+
67
+ K = ty.TypeVar("K") # Key type for the UncertainFuturesTracker
68
+
69
+
70
+ @dataclass
71
+ class _FuturesState(ty.Generic[R_0]):
72
+ """Represents a single 'observable' that may have multiple Futures (and therefore interpretations) associated with it."""
73
+
74
+ futshims: list[_FutureInterpretationShim[R_0, ty.Any]]
75
+ last_seen_at: float
76
+
77
+
78
+ def official_timer() -> float:
79
+ # we don't need any particular meaning to the time.
80
+ return time.monotonic()
81
+
82
+
83
+ class UncertainFuturesTracker(ty.Generic[K, R_0]):
84
+ """This class represents a kind of Future where we cannot be guaranteed that we will ever see
85
+ any further information about it, because we do not control the source of the data.
86
+
87
+ A good example would be a Kubernetes object that we are watching - we may _think_ that a Job will be created,
88
+ but there are race conditions galore in terms of actually looking for that object.
89
+
90
+ However, if we _do_ see it at a some point, then we can interpret future 'missingness'
91
+ as a tentative success.
92
+
93
+ The danger with this uncertainty is that Futures represent implicit deadlocks - if we
94
+ never resolve the Future, then a caller may be waiting for it forever. Therefore, we
95
+ ask the original requestor of the Future to specify how long they are willing to wait
96
+ to get a result, after which point we will resolve the Future as an exception.
97
+ """
98
+
99
+ def __init__(self, allowed_stale_seconds: float) -> None:
100
+ self._keyed_futures_state = collections.OrderedDict[K, _FuturesState[R_0]]()
101
+ self._lock = threading.Lock() # i don't trust ordered dict operations to be thread-safe.
102
+ self._check_stale_seconds = allowed_stale_seconds
103
+
104
+ def create(self, key: K, interpreter: FutureInterpreter[R_0, R]) -> core.futures.PFuture[R]:
105
+ futshim = _FutureInterpretationShim(interpreter)
106
+ with self._lock:
107
+ if key not in self._keyed_futures_state:
108
+ self._keyed_futures_state[key] = _FuturesState(
109
+ [futshim],
110
+ last_seen_at=official_timer() + self._check_stale_seconds,
111
+ # we provide a double margin for objects that we have never seen before.
112
+ )
113
+ self._keyed_futures_state.move_to_end(key, last=False)
114
+ # never seen and therefore should be at the beginning (most stale)
115
+ else:
116
+ # maintain our ordered dict so we can handle garbage collection of stale Futures.
117
+ self._keyed_futures_state[key].futshims.append(futshim)
118
+
119
+ return futshim.future
120
+
121
+ def update(self, key: ty.Optional[K], r_0: ty.Optional[R_0]) -> None:
122
+ """Update the keyed Futures based on their interpreters.
123
+
124
+ Also check any stale Futures - Futures that have not seen an update (via their key) in a while.
125
+
126
+ If `key` is None, we will update all Futures that have been created so far.
127
+ """
128
+
129
+ def check_resolution(fut_state: _FuturesState[R_0], inner_r_0: ty.Optional[R_0]) -> None:
130
+ for future_shim_that_is_done in core.parallel.yield_results(
131
+ [
132
+ core.thunks.thunking(futshim)(inner_r_0, fut_state.last_seen_at)
133
+ for futshim in fut_state.futshims
134
+ ],
135
+ progress_logger=core.log.getLogger(__name__).debug,
136
+ named="UncertainFuturesTracker.update",
137
+ ):
138
+ if future_shim_that_is_done is not None:
139
+ # the Future is done, so we can remove it from the list of Futures.
140
+ fut_state.futshims.remove(future_shim_that_is_done)
141
+
142
+ if key is not None:
143
+ with self._lock:
144
+ if key not in self._keyed_futures_state:
145
+ self._keyed_futures_state[key] = _FuturesState(list(), last_seen_at=official_timer())
146
+ else:
147
+ # maintain our ordered dict so we can handle garbage collection of stale Futures.
148
+ self._keyed_futures_state.move_to_end(key)
149
+ self._keyed_futures_state[key].last_seen_at = official_timer()
150
+
151
+ fut_state = self._keyed_futures_state[key]
152
+ check_resolution(fut_state, r_0)
153
+
154
+ # 'garbage collect' any Futures that haven't been updated in a while.
155
+ for futs_state in self._keyed_futures_state.values():
156
+ if futs_state.last_seen_at + self._check_stale_seconds < official_timer():
157
+ check_resolution(futs_state, None)
158
+ else: # these are ordered, so once we see one that's not stale, we can stop checking.
159
+ # this prevents us from having to do O(N) checks for every update.
160
+ break
thds/mops/k8s/watch.py CHANGED
@@ -12,17 +12,20 @@ import urllib3
12
12
  from kubernetes import client
13
13
  from kubernetes import watch as k8s_watch
14
14
 
15
- from thds.core import scope
15
+ from thds.core import futures, scope
16
16
  from thds.core.log import getLogger, logger_context
17
17
  from thds.termtool.colorize import colorized
18
18
 
19
19
  from . import config
20
20
  from .auth import load_config
21
21
  from .too_old_resource_version import parse_too_old_resource_version
22
+ from .uncertain_future import FutureInterpreter, UncertainFuturesTracker
22
23
 
23
24
  logger = getLogger(__name__)
24
25
 
25
26
  T = ty.TypeVar("T")
27
+ K = ty.TypeVar("K")
28
+ R = ty.TypeVar("R")
26
29
 
27
30
 
28
31
  class V1List(ty.Protocol[T]):
@@ -115,10 +118,6 @@ def callback_events(
115
118
  break
116
119
 
117
120
 
118
- def _make_name(namespace: str, name: str) -> str:
119
- return f"{namespace}/{name}"
120
-
121
-
122
121
  def _default_get_name(obj: ty.Any) -> str:
123
122
  return obj.metadata.name
124
123
 
@@ -148,9 +147,15 @@ class OneShotLimiter:
148
147
  self._names.add(name)
149
148
 
150
149
 
150
+ def _watch_timer() -> float:
151
+ # in this context, monotonicity (actual timing) is most useful because we don't need sentinels.
152
+ return time.monotonic()
153
+
154
+
151
155
  def is_stale(api_last_update_time: float, obj_last_seen_time: float) -> bool:
152
- now = time.monotonic()
156
+ now = _watch_timer()
153
157
  allowed_stale_seconds = config.k8s_watch_object_stale_seconds()
158
+ # about 5 minutes by default as of 2025-07-15.
154
159
  if (time_since_api_update := now - api_last_update_time) > allowed_stale_seconds: # noqa: F841
155
160
  # we haven't heard anything from the API in a while; probably
156
161
  # the API is down. Ignore object staleness to avoid false positives.
@@ -223,93 +228,146 @@ def watch_forever(
223
228
  break
224
229
 
225
230
 
226
- class WatchingObjectSource(ty.Generic[T]):
227
- """Efficiently 'get' objects by reliably watching for changes to all such objects in a given namespace.
228
-
229
- This is network-efficient for observing many different objects,
230
- but not memory efficient if you really only need to fetch details
231
- for a few objects.
231
+ class _SeenObjectContainer(ty.Generic[K, T]):
232
+ """Splits some of the logic for 'get' out of WatchingObjectSource
233
+ so that we can have it be a simpler container for both this and the UncertainFuturesTracker.
232
234
  """
233
235
 
234
236
  def __init__(
235
237
  self,
236
- get_list_method: GetListMethod[T],
237
- get_name: ty.Callable[[T], str] = ty.cast( # noqa: B008
238
- ty.Callable[[T], str], _default_get_name
239
- ),
240
- backup_fetch: ty.Optional[ty.Callable[[str, str], T]] = None,
241
- typename: str = "object",
242
- starting: ty.Callable[[str], str] = STARTING,
238
+ backup_fetch: ty.Optional[ty.Callable[[K], ty.Optional[T]]] = None,
243
239
  ) -> None:
244
- self.get_list_method = get_list_method
245
- self.get_name = get_name
246
- self.backup_fetch = backup_fetch
247
- self.typename = typename
248
- self._objs_by_name: ty.Dict[str, T] = dict()
240
+ self._objs: ty.Dict[K, T] = dict()
249
241
  # ^ is a possibly big/expensive local cache of the most recent
250
242
  # state for all of the event type in the namespace. Don't use
251
243
  # this class if you can't afford the memory overhead of
252
244
  # observing everything in your namespace and keeping the last
253
245
  # known copy of everything forever.
254
- self._last_seen_time_by_name: ty.Dict[str, float] = dict()
246
+ self._last_seen_times: ty.Dict[K, float] = dict()
255
247
  self._last_api_update_time = 0.0
256
- self._limiter = OneShotLimiter()
257
-
258
- def _start_thread(self, namespace: str) -> None:
259
- create_watch_thread(
260
- self.get_list_method, self._add_object, namespace, typename=self.typename
261
- ).start()
262
-
263
- def _add_object(self, namespace: str, obj: T, _event_type: EventType) -> None:
264
- """This is where we receive updates from the k8s API."""
265
- self._last_api_update_time = time.monotonic()
266
-
267
- if not obj:
268
- logger.warning(f"Received null/empty {self.typename}")
269
- return
248
+ self.backup_fetch = backup_fetch
270
249
 
271
- name = _make_name(namespace, self.get_name(obj))
272
- logger.debug(f"{self.typename} {name} updated")
273
- self._last_seen_time_by_name[name] = time.monotonic()
274
- self._objs_by_name[name] = obj
250
+ def set_object(self, key: K, obj: T) -> None:
251
+ """Set an object in the cache, updating the last seen time."""
252
+ now = _watch_timer()
253
+ self._last_api_update_time = now
254
+ self._last_seen_times[key] = now
255
+ self._objs[key] = obj
275
256
 
276
- def _is_stale(self, name: str) -> bool:
277
- return is_stale(self._last_api_update_time, self._last_seen_time_by_name.get(name) or 0)
278
-
279
- @scope.bound
280
- def get(self, obj_name: str, namespace: str = "") -> ty.Optional[T]:
281
- namespace = namespace or config.k8s_namespace()
282
- name = _make_name(namespace, obj_name)
283
- scope.enter(logger_context(name=obj_name, namespace=namespace))
257
+ def _is_stale(self, key: K) -> bool:
258
+ return is_stale(self._last_api_update_time, self._last_seen_times.get(key) or 0)
284
259
 
260
+ def get(self, key: K) -> ty.Optional[T]:
285
261
  # first try is looking in our local cache
286
- if (obj := self._objs_by_name.get(name)) and not self._is_stale(name):
262
+ if (obj := self._objs.get(key)) and not self._is_stale(key):
287
263
  return obj
288
264
 
289
265
  # second try is making sure the namespace watcher is running, sleeping, and then looking in the cache again.
290
266
  # This is much more efficient than a manual fetch.
291
- self._limiter(namespace, self._start_thread)
292
267
  time.sleep(config.k8s_monitor_delay())
293
- if (obj := self._objs_by_name.get(name)) and not self._is_stale(name):
268
+ if (obj := self._objs.get(key)) and not self._is_stale(key):
294
269
  return obj
295
270
 
296
271
  # if that doesn't work, try a manual fetch.
297
272
  if self.backup_fetch:
298
- logger.warning(f"Manually fetching {self.typename}...")
273
+ logger.warning(f"Manually fetching {key}...")
299
274
  # doing a lot of manual fetches may indicate that the k8s API is having trouble keeping up...
300
275
  try:
301
- if obj := self.backup_fetch(namespace, obj_name):
302
- self._add_object(namespace, obj, "FETCH") # updates last seen, too
276
+ if obj := self.backup_fetch(key):
277
+ self.set_object(key, obj) # updates last seen, too
303
278
  return obj
304
279
 
305
280
  except Exception:
306
- logger.exception(f"Unexpected error during manual fetch of {self.typename}.")
281
+ logger.exception(f"Unexpected error during manual fetch of {key}.")
307
282
 
308
- if self._is_stale(name):
283
+ if self._is_stale(key):
309
284
  logger.warning(
310
- f"Could not refresh {name}, and our record of it is stale - dropping stale object!"
285
+ f"Could not refresh {key}, and our record of it is stale - dropping stale object!"
311
286
  )
312
- self._objs_by_name.pop(name, None)
313
- self._last_seen_time_by_name.pop(name, None)
287
+ self._objs.pop(key, None)
288
+ self._last_seen_times.pop(key, None)
314
289
 
315
290
  return None
291
+
292
+
293
+ class WatchingObjectSource(ty.Generic[T]):
294
+ """Efficiently 'get' objects by launching a single thread to
295
+ watch for changes to all such objects in a given namespace.
296
+
297
+ Also provide a way to create a future that will be resolved according to the logic
298
+ provided by the caller whenever an object is updated, or if the object has not been
299
+ updated in a while.
300
+
301
+ Importantly, the Futures are only prevented from deadlocking (never awakening their
302
+ condition variable) by the fact that we very occasionally will go through the list
303
+ of seen objects and raise Exceptions for objects that have not been updated in a while.
304
+ This is vaguely akin to garbage collection, in that it will occasionally
305
+ cause a 'pause' in the watcher thread as it tries to collect stale objects.
306
+
307
+ This is network-efficient for observing many different objects,
308
+ but not memory efficient if you really only need to fetch details
309
+ for a few objects, because we retain the last known state for every observed object indefinitely.
310
+ """
311
+
312
+ def __init__(
313
+ self,
314
+ get_list_method: GetListMethod[T],
315
+ get_name: ty.Callable[[T], str] = ty.cast( # noqa: B008
316
+ ty.Callable[[T], str], _default_get_name
317
+ ),
318
+ backup_fetch: ty.Optional[ty.Callable[[str, str], ty.Optional[T]]] = None,
319
+ typename: str = "object",
320
+ ) -> None:
321
+ self.get_list_method = get_list_method
322
+ self.get_name = get_name
323
+ self.typename = typename
324
+ self._limiter = OneShotLimiter()
325
+ self._uncertain_futures = UncertainFuturesTracker[tuple[str, str], T](
326
+ config.k8s_watch_object_stale_seconds()
327
+ )
328
+ self._seen_objects = _SeenObjectContainer[tuple[str, str], T](
329
+ lambda namespace_and_name: backup_fetch(*namespace_and_name) if backup_fetch else None
330
+ )
331
+
332
+ def _add_object(self, namespace: str, obj: T, _event_type: EventType) -> None:
333
+ """This is where we receive updates from the k8s API."""
334
+ if not obj:
335
+ logger.warning(f"Received null/empty {self.typename}")
336
+ return
337
+
338
+ key = (namespace, self.get_name(obj))
339
+ self._seen_objects.set_object(key, obj)
340
+ self._uncertain_futures.update(key, obj)
341
+ logger.debug("%s %s updated", self.typename, key)
342
+
343
+ def _start_namespace_watcher_thread(self, namespace: str) -> None:
344
+ create_watch_thread(
345
+ self.get_list_method, self._add_object, namespace, typename=self.typename
346
+ ).start()
347
+
348
+ @scope.bound
349
+ def get(self, obj_name: str, namespace: str = "") -> ty.Optional[T]:
350
+ """May block for a little while if a manual fetch is required."""
351
+ namespace = namespace or config.k8s_namespace()
352
+ scope.enter(logger_context(name=obj_name, namespace=namespace))
353
+ self._limiter(namespace, self._start_namespace_watcher_thread)
354
+ return self._seen_objects.get((namespace, obj_name))
355
+
356
+ def create_future(
357
+ self,
358
+ interpreter: FutureInterpreter[T, R],
359
+ obj_name: str,
360
+ *,
361
+ namespace: str = "",
362
+ ) -> futures.PFuture[R]:
363
+ """Create a future that will be resolved when the object is available according to
364
+ the interpreter.
365
+
366
+ The FutureInterpreter must:
367
+ - raise an exception if it wishes the future to raise.
368
+ - return a Done with the result if it wishes the future to resolve successfully.
369
+ -return None if the status is still in progress.
370
+ """
371
+ namespace = namespace or config.k8s_namespace()
372
+ self._limiter(namespace, self._start_namespace_watcher_thread)
373
+ return self._uncertain_futures.create((namespace, obj_name), interpreter)
@@ -7,6 +7,7 @@
7
7
  from . import adls # noqa
8
8
  from ._magic.api import magic # noqa
9
9
  from .core.entry import register_entry_handler
10
+ from .core.lock.maintain import no_maintain as no_maintain_locks # noqa: F401
10
11
  from .core.memo import results # noqa
11
12
  from .core.memo.function_memospace import ( # noqa
12
13
  add_pipeline_memospace_handlers,
@@ -21,7 +22,7 @@ from .core.use_runner import use_runner # noqa
21
22
  from .pickling.memoize_only import memoize_in # noqa
22
23
  from .pickling.mprunner import MemoizingPicklingRunner # noqa
23
24
  from .runner.simple_shims import samethread_shim, subprocess_shim # noqa
24
- from .runner.types import Shim, ShimBuilder # noqa
25
+ from .runner.types import FutureShim, Shim, ShimBuilder # noqa
25
26
 
26
27
 
27
28
  def _register_things() -> None:
@@ -6,7 +6,7 @@ import typing as ty
6
6
 
7
7
  from typing_extensions import ParamSpec
8
8
 
9
- from thds.core import stack_context
9
+ from thds.core import futures, stack_context
10
10
  from thds.mops._utils import config_tree
11
11
 
12
12
  from ..core import file_blob_store, pipeline_id, pipeline_id_mask, uris
@@ -109,7 +109,7 @@ class Magic(ty.Generic[P, R]):
109
109
  def _is_off(self) -> bool:
110
110
  return self._shim_builder_or_off is None
111
111
 
112
- def _shimbuilder(self, f: ty.Callable[P, R], args: P.args, kwargs: P.kwargs) -> Shim:
112
+ def _shimbuilder(self, f: ty.Callable[P, R], args: P.args, kwargs: P.kwargs) -> Shim: # type: ignore[valid-type]
113
113
  # this can be set using a stack-local context, or set globally as specifically
114
114
  # or generally as the user needs. We prefer stack local over everything else.
115
115
  sb = self._shim_builder_or_off
@@ -123,8 +123,16 @@ class Magic(ty.Generic[P, R]):
123
123
  def _pipeline_id(self) -> str:
124
124
  return self.config.pipeline_id.getv(self._func_config_path)
125
125
 
126
+ def submit(self, *args: P.args, **kwargs: P.kwargs) -> futures.PFuture[R]:
127
+ """A futures-based interface that doesn't block on the result of the wrapped
128
+ function call, but returns a PFuture once either a result has been found or a a
129
+ new invocation has been started.
130
+ """
131
+ with pipeline_id.set_pipeline_id_for_stack(self._pipeline_id):
132
+ return self.runner.submit(self.__wrapped__, *args, **kwargs)
133
+
126
134
  def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R:
127
- """This is the wrapped function."""
135
+ """This is the wrapped function - call this as though it were the function itself."""
128
136
  with pipeline_id.set_pipeline_id_for_stack(self._pipeline_id):
129
137
  return self._func(*args, **kwargs)
130
138
 
@@ -4,14 +4,14 @@ from thds import core
4
4
 
5
5
  from ..runner.shim_builder import make_builder
6
6
  from ..runner.simple_shims import samethread_shim, subprocess_shim
7
- from ..runner.types import Shim, ShimBuilder
7
+ from ..runner.types import FutureShim, Shim, ShimBuilder
8
8
 
9
9
  ShimName = ty.Literal[
10
10
  "samethread", # memoization and coordination, but run in the same thread as the caller.
11
11
  "subprocess", # memoization and coordination, but transfer to a subprocess rather than remote.
12
12
  "off", # equivalent to None - disables use of mops.
13
13
  ]
14
- ShimOrBuilder = ty.Union[ShimBuilder, Shim]
14
+ ShimOrBuilder = ty.Union[ShimBuilder, Shim, FutureShim]
15
15
  logger = core.log.getLogger(__name__)
16
16
 
17
17
 
@@ -32,15 +32,7 @@ def open_context() -> ty.Iterator[None]:
32
32
  The idea is that you'd call perform_all() inside your Shim which transfers
33
33
  execution to a remote environment, but _not_ call it if you're transferring execution
34
34
  to a local environment, as the upload will not be needed.
35
-
36
- This is not re-entrant. If this is called while the dictionary is non-empty, an
37
- exception will be raised. This is only because I can think of no reason why anyone
38
- would want it to be re-entrant, so it seems better to raise an error. If for some
39
- reason re-entrancy were desired, we could just silently pass if the dictionary already
40
- has deferred work.
41
35
  """
42
- existing_work = _DEFERRED_INVOCATION_WORK()
43
- assert existing_work is None, f"deferred work context is not re-entrant! {existing_work}"
44
36
  with _DEFERRED_INVOCATION_WORK.set(dict()):
45
37
  logger.debug("Opening deferred work context")
46
38
  yield
@@ -5,14 +5,6 @@ In practice we only have a single Runner type registered, the MemoizingPicklingR
5
5
 
6
6
  import typing as ty
7
7
 
8
- from thds.core import stack_context
9
-
10
- RUNNER_ENTRY_COUNT = stack_context.StackContext("runner_entry_count", 0)
11
-
12
-
13
- def entry_count() -> int:
14
- return RUNNER_ENTRY_COUNT()
15
-
16
8
 
17
9
  class EntryHandler(ty.Protocol):
18
10
  def __call__(self, *__args: str) -> ty.Any:
@@ -27,5 +19,4 @@ def register_entry_handler(name: str, mh: EntryHandler) -> None:
27
19
 
28
20
 
29
21
  def run_named_entry_handler(name: str, *args: str) -> None:
30
- with RUNNER_ENTRY_COUNT.set(RUNNER_ENTRY_COUNT() + 1):
31
- ENTRY_HANDLERS[name](*args)
22
+ ENTRY_HANDLERS[name](*args)
@@ -2,6 +2,7 @@ from ._acquire import acquire # noqa: F401
2
2
  from .maintain import ( # noqa: F401
3
3
  CannotMaintainLock,
4
4
  launch_daemon_lock_maintainer,
5
+ maintain_to_release,
5
6
  remote_lock_maintain,
6
7
  )
7
8
  from .types import LockAcquired # noqa: F401
@@ -32,7 +32,7 @@ from thds.core import log
32
32
  from . import _funcs
33
33
  from .read import get_writer_id, make_read_lockfile
34
34
  from .types import LockAcquired, LockContents
35
- from .write import LockfileWriter, make_lock_contents
35
+ from .write import LockEmitter, LockfileWriter
36
36
 
37
37
  logger = log.getLogger(__name__)
38
38
 
@@ -106,7 +106,7 @@ def acquire( # noqa: C901
106
106
  lockfile_writer = LockfileWriter(
107
107
  my_writer_id,
108
108
  lock_dir_uri,
109
- make_lock_contents(my_writer_id, expire),
109
+ LockEmitter(my_writer_id, expire),
110
110
  expire.total_seconds(),
111
111
  debug=debug,
112
112
  )
@@ -15,12 +15,14 @@ from datetime import datetime, timedelta
15
15
  from functools import partial
16
16
  from threading import Thread
17
17
 
18
- from thds.core import log
18
+ from thds.core import config, log
19
19
 
20
20
  from ._funcs import make_lock_uri
21
21
  from .read import get_writer_id, make_read_lockfile
22
22
  from .types import LockAcquired
23
- from .write import LockfileWriter, make_lock_contents
23
+ from .write import LockEmitter, LockfileWriter
24
+
25
+ MAINTAIN_LOCKS = config.item("thds.mops.pure.local.maintain_locks", default=True, parse=config.tobool)
24
26
 
25
27
  logger = log.getLogger(__name__)
26
28
 
@@ -103,7 +105,7 @@ def remote_lock_maintain(lock_dir_uri: str, expected_writer_id: str = "") -> Loc
103
105
  lockfile_writer = LockfileWriter(
104
106
  current_writer_id,
105
107
  lock_dir_uri,
106
- make_lock_contents(get_writer_id(lock_contents), timedelta(seconds=expire_s)),
108
+ LockEmitter(get_writer_id(lock_contents), timedelta(seconds=expire_s)),
107
109
  expire_s,
108
110
  writer_name="remote",
109
111
  )
@@ -148,3 +150,20 @@ def launch_daemon_lock_maintainer(lock_acq: LockAcquired) -> ty.Callable[[], Non
148
150
  lock_acq.release()
149
151
 
150
152
  return stop_maintaining
153
+
154
+
155
+ def maintain_to_release(
156
+ acquired_lock: LockAcquired,
157
+ ) -> ty.Callable[[], None]:
158
+ """Depending on configuration, potentially start maintaining the lock.
159
+
160
+ Return a callable that will release the lock when called.
161
+ """
162
+ if MAINTAIN_LOCKS():
163
+ return launch_daemon_lock_maintainer(acquired_lock)
164
+
165
+ return acquired_lock.release
166
+
167
+
168
+ def no_maintain() -> None:
169
+ MAINTAIN_LOCKS.set_global(False)
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import typing as ty
3
+ from dataclasses import dataclass
3
4
  from datetime import datetime, timedelta
4
5
 
5
6
  from thds.core import hostname, log
@@ -10,38 +11,37 @@ from .types import LockContents
10
11
  logger = log.getLogger(__name__)
11
12
 
12
13
 
13
- def make_lock_contents(
14
- writer_id: str, expire: timedelta
15
- ) -> ty.Callable[[ty.Optional[datetime]], LockContents]:
16
- """Impure - Resets written_at to 'right now' to keep the lock 'live'."""
17
- write_count = 0
18
- first_written_at = ""
14
+ @dataclass
15
+ class LockEmitter:
16
+ writer_id: str
17
+ expire: timedelta
19
18
 
20
- assert (
21
- "/" not in writer_id
22
- ), f"{writer_id} should not contain a slash - maybe you passed a URI instead?"
19
+ write_count: int = 0
20
+ first_written_at: str = ""
23
21
 
24
- def lock_contents(first_acquired_at: ty.Optional[datetime]) -> LockContents:
25
- nonlocal write_count, first_written_at
26
- write_count += 1
22
+ def __post_init__(self) -> None:
23
+ assert (
24
+ "/" not in self.writer_id
25
+ ), f"{self.writer_id} should not contain a slash - maybe you passed a URI instead?"
26
+
27
+ def __call__(self, first_acquired_at: ty.Optional[datetime]) -> LockContents:
28
+ self.write_count += 1
27
29
  now = _funcs.utc_now().isoformat()
28
- first_written_at = first_written_at or now
30
+ self.first_written_at = self.first_written_at or now
29
31
 
30
32
  return {
31
- "writer_id": writer_id,
33
+ "writer_id": self.writer_id,
32
34
  "written_at": now,
33
- "expire_s": expire.total_seconds(),
35
+ "expire_s": self.expire.total_seconds(),
34
36
  # debug stuff:
35
- "write_count": write_count,
37
+ "write_count": self.write_count,
36
38
  "hostname": hostname.friendly(),
37
39
  "pid": str(os.getpid()),
38
- "first_written_at": first_written_at,
40
+ "first_written_at": self.first_written_at,
39
41
  "first_acquired_at": first_acquired_at.isoformat() if first_acquired_at else "",
40
42
  "released_at": "",
41
43
  }
42
44
 
43
- return lock_contents
44
-
45
45
 
46
46
  class LockfileWriter:
47
47
  """The core purpose of this class is to allow setting of first_acquired_at immediately
@@ -1,4 +1,4 @@
1
- from . import calls, unique_name_for_function # noqa: F401
1
+ from . import calls, results, unique_name_for_function # noqa: F401
2
2
  from .function_memospace import ( # noqa
3
3
  args_kwargs_content_address,
4
4
  make_function_memospace,