thds.mops 3.8.20250721144551__py3-none-any.whl → 3.9.20250721231027__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. thds/mops/impure/runner.py +1 -1
  2. thds/mops/k8s/__init__.py +3 -1
  3. thds/mops/k8s/{launch.py → _launch.py} +56 -57
  4. thds/mops/k8s/batching.py +198 -0
  5. thds/mops/k8s/config.py +1 -1
  6. thds/mops/k8s/counts.py +28 -0
  7. thds/mops/k8s/job_future.py +109 -0
  8. thds/mops/k8s/jobs.py +4 -0
  9. thds/mops/k8s/logging.py +37 -5
  10. thds/mops/k8s/uncertain_future.py +160 -0
  11. thds/mops/k8s/watch.py +120 -62
  12. thds/mops/pure/__init__.py +2 -1
  13. thds/mops/pure/_magic/sauce.py +11 -3
  14. thds/mops/pure/_magic/shims.py +2 -2
  15. thds/mops/pure/core/deferred_work.py +0 -8
  16. thds/mops/pure/core/entry/runner_registry.py +1 -10
  17. thds/mops/pure/core/lock/__init__.py +1 -0
  18. thds/mops/pure/core/lock/_acquire.py +2 -2
  19. thds/mops/pure/core/lock/maintain.py +22 -3
  20. thds/mops/pure/core/lock/write.py +19 -19
  21. thds/mops/pure/core/memo/__init__.py +1 -1
  22. thds/mops/pure/core/memo/results.py +5 -4
  23. thds/mops/pure/core/use_runner.py +21 -7
  24. thds/mops/pure/pickling/mprunner.py +21 -14
  25. thds/mops/pure/pickling/pickles.py +19 -8
  26. thds/mops/pure/pickling/remote.py +3 -1
  27. thds/mops/pure/runner/get_results.py +106 -0
  28. thds/mops/pure/runner/local.py +58 -87
  29. thds/mops/pure/runner/shim_builder.py +7 -7
  30. thds/mops/pure/runner/simple_shims.py +7 -0
  31. thds/mops/pure/runner/types.py +15 -4
  32. thds/mops/pure/tools/summarize/run_summary.py +9 -8
  33. {thds_mops-3.8.20250721144551.dist-info → thds_mops-3.9.20250721231027.dist-info}/METADATA +1 -1
  34. {thds_mops-3.8.20250721144551.dist-info → thds_mops-3.9.20250721231027.dist-info}/RECORD +37 -32
  35. {thds_mops-3.8.20250721144551.dist-info → thds_mops-3.9.20250721231027.dist-info}/WHEEL +0 -0
  36. {thds_mops-3.8.20250721144551.dist-info → thds_mops-3.9.20250721231027.dist-info}/entry_points.txt +0 -0
  37. {thds_mops-3.8.20250721144551.dist-info → thds_mops-3.9.20250721231027.dist-info}/top_level.txt +0 -0
@@ -76,8 +76,8 @@ class RequiredResultNotFound(Exception):
76
76
 
77
77
  def check_if_result_exists(
78
78
  memo_uri: str,
79
- rerun_excs: bool = False,
80
- before_raise: ty.Callable[[], ty.Any] = lambda: None,
79
+ check_for_exception: bool = False,
80
+ before_raise: ty.Optional[ty.Callable[[], ty.Any]] = None,
81
81
  ) -> ty.Union[None, Success, Error]:
82
82
  fs = lookup_blob_store(memo_uri)
83
83
  value_uri = fs.join(memo_uri, RESULT)
@@ -86,14 +86,15 @@ def check_if_result_exists(
86
86
 
87
87
  required_msg = _should_require_result(memo_uri)
88
88
  if required_msg: # might be custom or the default. either way it indicates a required result.
89
- before_raise()
89
+ if before_raise:
90
+ before_raise()
90
91
  error_msg = f"Required a result for {ORANGE(memo_uri)} but that result was not found"
91
92
  # i'm tired of visually scanning for these memo_uris in logs.
92
93
  if required_msg != _NO_MSG:
93
94
  error_msg += f": {required_msg}"
94
95
  raise RequiredResultNotFound(error_msg, memo_uri)
95
96
 
96
- if rerun_excs:
97
+ if not check_for_exception:
97
98
  return None
98
99
 
99
100
  error_uri = fs.join(memo_uri, EXCEPTION)
@@ -4,21 +4,29 @@ You can transfer control to a Runner without this, but decorators are a Pythonic
4
4
  """
5
5
 
6
6
  import typing as ty
7
+ from contextlib import contextmanager
7
8
  from functools import wraps
8
9
 
9
10
  from thds.core import log, stack_context
11
+ from thds.mops._utils.names import full_name_and_callable
10
12
 
11
- from .entry.runner_registry import entry_count
12
13
  from .types import Runner
13
14
 
15
+ _USE_RUNNER_BYPASS = stack_context.StackContext[set[str]]("use_runner_bypass", set())
16
+ # use this in a Runner remote entry point to allow the remote function call
17
+ # to bypass any use_runner decorator. Also necessary in case somebody is doing advanced
18
+ # things like using a remote runner to run a manifest of _other_ remote functions...
19
+
14
20
  logger = log.getLogger(__name__)
15
21
  F = ty.TypeVar("F", bound=ty.Callable)
16
- FUNCTION_UNWRAP_COUNT = stack_context.StackContext("function_unwrap_count", 0)
17
22
 
18
23
 
19
- def _is_runner_entry() -> bool:
20
- """Function is being called in the context of a Runner."""
21
- return entry_count() > FUNCTION_UNWRAP_COUNT()
24
+ @contextmanager
25
+ def unwrap_use_runner(f: F) -> ty.Iterator[None]:
26
+ full_name, _ = full_name_and_callable(f)
27
+ with _USE_RUNNER_BYPASS.set({full_name}):
28
+ # this is a no-op if the function is not wrapped
29
+ yield
22
30
 
23
31
 
24
32
  def use_runner(runner: Runner, skip: ty.Callable[[], bool] = lambda: False) -> ty.Callable[[F], F]:
@@ -34,9 +42,15 @@ def use_runner(runner: Runner, skip: ty.Callable[[], bool] = lambda: False) -> t
34
42
  def deco(f: F) -> F:
35
43
  @wraps(f)
36
44
  def __use_runner_wrapper(*args, **kwargs): # type: ignore
37
- if _is_runner_entry() or skip():
45
+ def should_bypass() -> bool:
46
+ if skip():
47
+ return True
48
+ full_name, _ = full_name_and_callable(f)
49
+ return full_name in _USE_RUNNER_BYPASS()
50
+
51
+ if should_bypass():
38
52
  logger.debug("Calling function %s directly...", f)
39
- with FUNCTION_UNWRAP_COUNT.set(FUNCTION_UNWRAP_COUNT() + 1):
53
+ with unwrap_use_runner(f):
40
54
  return f(*args, **kwargs)
41
55
 
42
56
  logger.debug("Forwarding local function %s call to runner...", f)
@@ -11,7 +11,7 @@ from functools import partial
11
11
 
12
12
  from typing_extensions import Self
13
13
 
14
- from thds.core import cache, log
14
+ from thds.core import cache, futures, log
15
15
  from thds.core.stack_context import StackContext
16
16
 
17
17
  from ..._utils.once import Once
@@ -20,7 +20,7 @@ from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
20
20
  from ..core.serialize_paths import CoordinatingPathSerializer
21
21
  from ..core.types import Args, F, Kwargs, Serializer, T
22
22
  from ..runner import local, shim_builder
23
- from ..runner.types import Shim, ShimBuilder
23
+ from ..runner.types import FutureShim, Shim, ShimBuilder
24
24
  from ..tools.summarize import run_summary
25
25
  from . import _pickle, pickles, sha256_b64
26
26
 
@@ -32,7 +32,7 @@ _KWARGS_CONTEXT = StackContext[ty.Mapping]("args_kwargs", dict())
32
32
  logger = log.getLogger(__name__)
33
33
 
34
34
 
35
- def mp_shim(base_shim: Shim, shim_args: ty.Sequence[str]) -> ty.Any:
35
+ def mp_shim(base_shim: ty.Union[Shim, FutureShim], shim_args: ty.Sequence[str]) -> ty.Any:
36
36
  return base_shim((RUNNER_NAME, *shim_args))
37
37
 
38
38
 
@@ -48,7 +48,7 @@ class MemoizingPicklingRunner:
48
48
 
49
49
  def __init__(
50
50
  self,
51
- shim: ty.Union[ShimBuilder, Shim],
51
+ shim: ty.Union[ShimBuilder, Shim, FutureShim],
52
52
  blob_storage_root: uris.UriResolvable,
53
53
  *,
54
54
  rerun_exceptions: bool = True,
@@ -159,20 +159,14 @@ class MemoizingPicklingRunner:
159
159
  ),
160
160
  )
161
161
 
162
- def _wrap_shim_builder(self, func: F, args: Args, kwargs: Kwargs) -> Shim:
162
+ def _wrap_shim_builder(self, func: F, args: Args, kwargs: Kwargs) -> ty.Union[Shim, FutureShim]:
163
163
  base_shim = self._shim_builder(func, args, kwargs)
164
164
  return partial(mp_shim, base_shim)
165
165
 
166
- def __call__(self, func: ty.Callable[..., T], args: Args, kwargs: Kwargs) -> T:
167
- """Return result of running this function remotely via the shim.
168
-
169
- Passes data to shim process via pickles in a Blob Store.
166
+ def submit(self, func: ty.Callable[..., T], *args: ty.Any, **kwargs: ty.Any) -> futures.PFuture[T]:
167
+ """Now that mops supports Futures, we can have an 'inner' API that returns a PFuture.
170
168
 
171
- May return cached (previously-computed) results found via the
172
- derived function memo URI, which contains the determinstic
173
- hashed bytes of all the function arguments, but also
174
- additional namespacing including pipeline_id as documented
175
- in memo.function_memospace.py.
169
+ We are trying to mimic the interface that concurrent.futures.Executors provide.
176
170
  """
177
171
  logger.debug("Preparing to run function via remote shim")
178
172
  with _ARGS_CONTEXT.set(args), _KWARGS_CONTEXT.set(kwargs):
@@ -192,3 +186,16 @@ class MemoizingPicklingRunner:
192
186
  args,
193
187
  kwargs,
194
188
  )
189
+
190
+ def __call__(self, func: ty.Callable[..., T], args: Args, kwargs: Kwargs) -> T:
191
+ """Return result of running this function remotely via the shim.
192
+
193
+ Passes data to shim process via pickles in a Blob Store.
194
+
195
+ May return cached (previously-computed) results found via the
196
+ derived function memo URI, which contains the determinstic
197
+ hashed bytes of all the function arguments, but also
198
+ additional namespacing including pipeline_id as documented
199
+ in memo.function_memospace.py.
200
+ """
201
+ return self.submit(func, *args, **kwargs).result()
@@ -60,19 +60,30 @@ class PicklableFunction:
60
60
  def __repr__(self) -> str:
61
61
  return str(self)
62
62
 
63
+ def _resolve(self) -> ty.Callable:
64
+ """Resolve the function if it hasn't been resolved yet."""
65
+ if self.f is None:
66
+ logger.debug(f"Dynamically importing function {str(self)}")
67
+ if self.fmod == "__main__":
68
+ self.f = get_main_module_function(self.fname) # type: ignore
69
+ else:
70
+ mod = importlib.import_module(self.fmod)
71
+ self.f = getattr(mod, self.fname)
72
+ assert self.f is not None
73
+ return self.f
74
+ return self.f
75
+
76
+ @property
77
+ def func(self) -> ty.Callable:
78
+ """This is a property so we aren't ruining backward pickle compatibility."""
79
+ return self._resolve()
80
+
63
81
  @property
64
82
  def __name__(self) -> str:
65
83
  return self.fname
66
84
 
67
85
  def __call__(self, *args: ty.Any, **kwargs: ty.Any) -> ty.Any:
68
- logger.debug(f"Dynamically importing function {str(self)}")
69
- if self.fmod == "__main__":
70
- self.f = get_main_module_function(self.fname) # type: ignore
71
- else:
72
- mod = importlib.import_module(self.fmod)
73
- self.f = getattr(mod, self.fname)
74
- assert self.f
75
- return self.f(*args, **kwargs)
86
+ return self._resolve()(*args, **kwargs)
76
87
 
77
88
 
78
89
  class UnpickleSimplePickleFromUri:
@@ -12,6 +12,7 @@ from ..core.memo import results
12
12
  from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
13
13
  from ..core.serialize_paths import CoordinatingPathSerializer
14
14
  from ..core.types import Args, BlobStore, Kwargs, T
15
+ from ..core.use_runner import unwrap_use_runner
15
16
  from ..runner import strings
16
17
  from . import _pickle, mprunner, pickles, sha256_b64
17
18
 
@@ -123,7 +124,8 @@ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
123
124
  def do_work_return_result() -> object:
124
125
  # ONLY failures in this code should transmit an EXCEPTION
125
126
  # back to the orchestrator side.
126
- return func(*args, **kwargs)
127
+ with unwrap_use_runner(func):
128
+ return func(*args, **kwargs)
127
129
 
128
130
  route_return_value_or_exception(
129
131
  _ResultExcWithMetadataChannel(
@@ -0,0 +1,106 @@
1
+ import concurrent.futures
2
+ import threading
3
+ import typing as ty
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from thds.core import futures, log
8
+
9
+ from ...config import max_concurrent_network_ops
10
+ from ..core import lock, memo
11
+ from ..core.types import NoResultAfterShimSuccess
12
+ from ..tools.summarize import run_summary
13
+ from . import types
14
+
15
+
16
+ class ResultAndInvocationType(ty.NamedTuple):
17
+ value_or_error: ty.Union[memo.results.Success, memo.results.Error]
18
+ invoc_type: run_summary.InvocationType
19
+
20
+
21
+ def unwrap_value_or_error(
22
+ get_meta_and_result: types.GetMetaAndResult,
23
+ run_directory: ty.Optional[Path],
24
+ runner_prefix: str,
25
+ args_kwargs_uris: ty.Collection[str],
26
+ memo_uri: str,
27
+ result_and_itype: ResultAndInvocationType,
28
+ ) -> ty.Any: # the result value
29
+ result = result_and_itype.value_or_error
30
+ metadata = None
31
+ value_t = None
32
+ try:
33
+ if isinstance(result, memo.results.Success):
34
+ metadata, value_t = get_meta_and_result("value", result.value_uri)
35
+ return value_t
36
+ else:
37
+ assert isinstance(result, memo.results.Error), "Must be Error or Success"
38
+ metadata, exc = get_meta_and_result("EXCEPTION", result.exception_uri)
39
+ raise exc
40
+ finally:
41
+ run_summary.log_function_execution(
42
+ *(run_directory, memo_uri, result_and_itype.invoc_type),
43
+ metadata=metadata,
44
+ runner_prefix=runner_prefix,
45
+ was_error=not isinstance(result, memo.results.Success),
46
+ return_value=value_t,
47
+ args_kwargs_uris=args_kwargs_uris,
48
+ )
49
+
50
+
51
+ _AFTER_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()) * 3)
52
+ # _IN prioritizes retrieving the result of a Shim that has completed.
53
+ logger = log.getLogger(__name__)
54
+ T = ty.TypeVar("T")
55
+
56
+
57
+ @dataclass
58
+ class PostShimResultGetter(ty.Generic[T]):
59
+ """Must be serializable on its own, so we can pass it across process boundaries
60
+ to serve as a foundation for a cross-process Future.
61
+
62
+ Happily, this should not be terribly difficult, as the 'state' of a mops function
63
+ is predicted entirely on the memo URI, which is a string.
64
+ """
65
+
66
+ memo_uri: str
67
+ partially_applied_unwrap_value_or_error: ty.Callable[[str, ResultAndInvocationType], T]
68
+ release_lock: ty.Optional[ty.Callable[[], None]] = None
69
+
70
+ def __call__(self, _shim_result: ty.Any) -> T:
71
+ """Check if the result exists, and return it if it does.
72
+
73
+ This is the future 'translator' that allows us to chain a shim future to be a result future.
74
+ """
75
+ memo_uri = self.memo_uri
76
+
77
+ try:
78
+ with _AFTER_INVOCATION_SEMAPHORE:
79
+ value_or_error = memo.results.check_if_result_exists(memo_uri, check_for_exception=True)
80
+ if not value_or_error:
81
+ raise NoResultAfterShimSuccess(
82
+ f"The shim for {memo_uri} exited cleanly, but no result or exception was found."
83
+ )
84
+ return self.partially_applied_unwrap_value_or_error(
85
+ memo_uri, ResultAndInvocationType(value_or_error, "invoked")
86
+ )
87
+ finally:
88
+ if self.release_lock is not None:
89
+ try:
90
+ self.release_lock()
91
+ except Exception:
92
+ logger.exception("Failed to release lock after shim result retrieval.")
93
+
94
+
95
+ def lock_maintaining_future(
96
+ lock_acquired: lock.LockAcquired,
97
+ post_shim_result_getter: PostShimResultGetter[futures.R1],
98
+ inner_future: futures.PFuture[futures.R],
99
+ ) -> concurrent.futures.Future[futures.R1]:
100
+ """Create a Future that will be used to retrieve the result of a shim invocation.
101
+
102
+ This Future will be used to retrieve the result of a shim invocation, and will
103
+ maintain the lock while it is being retrieved.
104
+ """
105
+ post_shim_result_getter.release_lock = lock.maintain_to_release(lock_acquired)
106
+ return futures.chain_futures(inner_future, concurrent.futures.Future(), post_shim_result_getter)
@@ -5,28 +5,32 @@ import threading
5
5
  import time
6
6
  import typing as ty
7
7
  from datetime import datetime, timedelta, timezone
8
+ from functools import partial
8
9
  from pathlib import Path
9
10
 
10
- from thds.core import config, log, scope
11
+ from thds.core import futures, log, scope
11
12
  from thds.termtool.colorize import colorized, make_colorized_out
12
13
 
13
14
  from ...config import max_concurrent_network_ops
14
15
  from ..core import deferred_work, lock, memo, metadata, pipeline_id_mask, uris
16
+ from ..core.lock.maintain import MAINTAIN_LOCKS # noqa: F401
15
17
  from ..core.partial import unwrap_partial
16
- from ..core.types import Args, Kwargs, NoResultAfterShimSuccess, T
18
+ from ..core.types import Args, Kwargs, T
17
19
  from ..tools.summarize import run_summary
18
20
  from . import strings, types
19
-
20
- MAINTAIN_LOCKS = config.item("thds.mops.pure.local.maintain_locks", default=True, parse=config.tobool)
21
-
22
- # these two semaphores allow us to prioritize getting meaningful units
21
+ from .get_results import (
22
+ PostShimResultGetter,
23
+ ResultAndInvocationType,
24
+ lock_maintaining_future,
25
+ unwrap_value_or_error,
26
+ )
27
+
28
+ # this semaphore (and a similar one in get_results) allow us to prioritize getting a single unit
23
29
  # of progress _complete_, rather than issuing many instructions to the
24
30
  # underlying client and allowing it to randomly order the operations
25
31
  # such that it takes longer to get a full unit of work complete.
26
32
  _BEFORE_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
27
- # _OUT prioritizes uploading a single invocation and its dependencies so the Shim can start running.
28
- _AFTER_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
29
- # _IN prioritizes retrieving the result of a Shim that has completed.
33
+ # _BEFORE prioritizes uploading a single invocation and its dependencies so the Shim can start running.
30
34
 
31
35
  _DarkBlue = colorized(fg="white", bg="#00008b")
32
36
  _GreenYellow = colorized(fg="black", bg="#adff2f")
@@ -44,9 +48,9 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
44
48
  get_meta_and_result: types.GetMetaAndResult,
45
49
  run_directory: ty.Optional[Path] = None,
46
50
  calls_registry: ty.Mapping[ty.Callable, ty.Collection[ty.Callable]] = dict(), # noqa: B006
47
- ) -> ty.Callable[[bool, str, ty.Callable[..., T], Args, Kwargs], T]:
51
+ ) -> ty.Callable[[bool, str, ty.Callable[..., T], Args, Kwargs], futures.PFuture[T]]:
48
52
  @scope.bound
49
- def create_invocation__check_result__wait_shim(
53
+ def create_invocation_and_result_future(
50
54
  rerun_exceptions: bool,
51
55
  function_memospace: str,
52
56
  # by allowing the caller to set the function memospace, we allow 'redirects' to look up an old result by name.
@@ -54,7 +58,7 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
54
58
  func: ty.Callable[..., T],
55
59
  args_: Args,
56
60
  kwargs_: Kwargs,
57
- ) -> T:
61
+ ) -> futures.PFuture[T]:
58
62
  """This is the generic local runner. Its core abstractions are:
59
63
 
60
64
  - serializers of some sort (for the function and its arguments)
@@ -89,16 +93,13 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
89
93
  )
90
94
 
91
95
  # Define some important and reusable 'chunks of work'
92
-
93
- class ResultAndInvocationType(ty.NamedTuple):
94
- value_or_error: ty.Union[memo.results.Success, memo.results.Error]
95
- invoc_type: run_summary.InvocationType
96
-
97
- def check_result(
96
+ def check_result_exists(
98
97
  invoc_type: run_summary.InvocationType,
99
98
  ) -> ty.Union[ResultAndInvocationType, None]:
100
99
  result = memo.results.check_if_result_exists(
101
- memo_uri, rerun_excs=rerun_exceptions, before_raise=debug_required_result_failure
100
+ memo_uri,
101
+ check_for_exception=not rerun_exceptions,
102
+ before_raise=debug_required_result_failure,
102
103
  )
103
104
  if not result:
104
105
  return None
@@ -108,28 +109,6 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
108
109
  )
109
110
  return ResultAndInvocationType(result, invoc_type)
110
111
 
111
- def unwrap_value_or_error(result_and_itype: ResultAndInvocationType) -> T:
112
- result = result_and_itype.value_or_error
113
- metadata = None
114
- value_t = None
115
- try:
116
- if isinstance(result, memo.results.Success):
117
- metadata, value_t = get_meta_and_result("value", result.value_uri)
118
- return ty.cast(T, value_t)
119
- else:
120
- assert isinstance(result, memo.results.Error), "Must be Error or Success"
121
- metadata, exc = get_meta_and_result("EXCEPTION", result.exception_uri)
122
- raise exc
123
- finally:
124
- run_summary.log_function_execution(
125
- *(run_directory, memo_uri, result_and_itype.invoc_type),
126
- metadata=metadata,
127
- runner_prefix=function_memospace.split(pipeline_id)[0],
128
- was_error=not isinstance(result, memo.results.Success),
129
- return_value=value_t,
130
- args_kwargs=(args, kwargs),
131
- )
132
-
133
112
  def acquire_lock() -> ty.Optional[lock.LockAcquired]:
134
113
  return lock.acquire(fs.join(memo_uri, "lock"), expire=timedelta(seconds=88))
135
114
 
@@ -154,6 +133,14 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
154
133
 
155
134
  inspect_and_log(memo_uri)
156
135
 
136
+ p_unwrap_value_or_error = partial(
137
+ unwrap_value_or_error,
138
+ get_meta_and_result,
139
+ run_directory,
140
+ function_memospace.split(pipeline_id)[0], # runner_prefix
141
+ run_summary.extract_source_uris((args, kwargs)),
142
+ )
143
+
157
144
  # the network ops being grouped by _BEFORE_INVOCATION include one or more
158
145
  # download attempts (consider possible Paths) plus
159
146
  # one or more uploads (embedded Paths & Sources/refs, and then invocation).
@@ -162,9 +149,9 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
162
149
 
163
150
  # it's possible that our result may already exist from a previous run of this pipeline id.
164
151
  # we can short-circuit the entire process by looking for that result and returning it immediately.
165
- result = check_result("memoized")
152
+ result = check_result_exists("memoized")
166
153
  if result:
167
- return unwrap_value_or_error(result)
154
+ return futures.resolved(p_unwrap_value_or_error(memo_uri, result))
168
155
 
169
156
  lock_owned = acquire_lock()
170
157
  # if no result exists, the vastly most common outcome here will be acquiring
@@ -175,10 +162,6 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
175
162
  # LOCK LOOP: entering this loop (where we attempt to acquire the lock) is the common non-memoized case
176
163
  while not result:
177
164
  if lock_owned:
178
- if MAINTAIN_LOCKS():
179
- release_lock = lock.launch_daemon_lock_maintainer(lock_owned)
180
- else:
181
- release_lock = lock_owned.release
182
165
  break # we own the invocation - invoke the shim ourselves (below)
183
166
 
184
167
  # getting to this point ONLY happens if we failed to acquire the lock, which
@@ -190,57 +173,45 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
190
173
  time.sleep(22)
191
174
 
192
175
  with _BEFORE_INVOCATION_SEMAPHORE:
193
- result = check_result("awaited")
176
+ result = check_result_exists("awaited")
194
177
  if result:
195
178
  _LogAwaitedResult(
196
179
  f"{val_or_res} for {memo_uri} was found after waiting for the lock."
197
180
  )
198
- return unwrap_value_or_error(result)
181
+ return futures.resolved(p_unwrap_value_or_error(memo_uri, result))
199
182
 
200
183
  lock_owned = acquire_lock() # still inside the semaphore, as it's a network op
201
184
 
202
- assert release_lock is not None
203
185
  assert lock_owned is not None
204
186
  # if/when we acquire the lock, we move forever into 'run this ourselves mode'.
205
187
  # If something about our invocation fails,
206
188
  # we fail just as we would have previously, without any attempt to go
207
189
  # 'back' to waiting for someone else to compute the result.
208
190
 
209
- try:
210
- with _BEFORE_INVOCATION_SEMAPHORE:
211
- _LogNewInvocation(f"Invoking {memo_uri}")
212
- upload_invocation_and_deps()
213
-
214
- # can't hold the semaphore while we block on the shim, though.
215
- shim_ex = None
216
- shim = shim_builder(func, args_, kwargs_)
217
- shim( # ACTUAL INVOCATION (handoff to remote shim) HAPPENS HERE
218
- (
219
- memo_uri,
220
- *metadata.format_invocation_cli_args(
221
- metadata.InvocationMetadata.new(pipeline_id, invoked_at, lock_owned.writer_id)
222
- ),
223
- )
191
+ future_result_getter = PostShimResultGetter[T](memo_uri, p_unwrap_value_or_error)
192
+
193
+ with _BEFORE_INVOCATION_SEMAPHORE:
194
+ _LogNewInvocation(f"Invoking {memo_uri}")
195
+ upload_invocation_and_deps()
196
+
197
+ # can't hold the semaphore while we block on the shim, though.
198
+ shim = shim_builder(func, args_, kwargs_)
199
+ future_or_shim_result = shim( # ACTUAL INVOCATION (handoff to remote shim) HAPPENS HERE
200
+ (
201
+ memo_uri,
202
+ *metadata.format_invocation_cli_args(
203
+ metadata.InvocationMetadata.new(pipeline_id, invoked_at, lock_owned.writer_id)
204
+ ),
224
205
  )
225
- except Exception as ex:
226
- # network or similar errors are very common and hard to completely eliminate.
227
- # We know that if a result (or error) exists, then the network failure is
228
- # not important, because results in blob storage are atomically populated (either fully there or not)
229
- logger.exception("Error awaiting shim. Optimistically checking for result.")
230
- shim_ex = ex
231
-
232
- finally:
233
- release_lock()
234
-
235
- # the network ops being grouped by _AFTER_INVOCATION include one or more downloads.
236
- with _AFTER_INVOCATION_SEMAPHORE:
237
- value_or_error = memo.results.check_if_result_exists(memo_uri)
238
- if not value_or_error:
239
- if shim_ex:
240
- raise shim_ex # re-raise the underlying exception rather than making up our own.
241
- raise NoResultAfterShimSuccess(
242
- f"The shim for {memo_uri} exited cleanly, but no result or exception was found."
243
- )
244
- return unwrap_value_or_error(ResultAndInvocationType(value_or_error, "invoked"))
245
-
246
- return create_invocation__check_result__wait_shim
206
+ )
207
+ if hasattr(future_or_shim_result, "add_done_callback"):
208
+ # if the shim returns a Future, we wrap it.
209
+ logger.debug("Shim returned a Future; wrapping it for post-shim result retrieval.")
210
+ return futures.make_lazy(lock_maintaining_future)(
211
+ lock_owned, future_result_getter, future_or_shim_result
212
+ )
213
+ else: # it's a synchronous shim - just process the result directly.
214
+ future_result_getter.release_lock = lock.maintain_to_release(lock_owned)
215
+ return futures.resolved(future_result_getter(future_or_shim_result))
216
+
217
+ return create_invocation_and_result_future
@@ -2,24 +2,24 @@ import inspect
2
2
  import typing as ty
3
3
 
4
4
  from ..core.types import Args, F, Kwargs
5
- from .types import Shim, ShimBuilder
5
+ from .types import FutureShim, Shim, ShimBuilder
6
6
 
7
7
 
8
8
  class _static_shim_builder:
9
- def __init__(self, shim: Shim) -> None:
9
+ def __init__(self, shim: ty.Union[Shim, FutureShim]) -> None:
10
10
  self.shim = shim
11
11
 
12
- def __call__(self, _f: F, _args: Args, _kwargs: Kwargs) -> Shim:
12
+ def __call__(self, _f: F, _args: Args, _kwargs: Kwargs) -> ty.Union[Shim, FutureShim]:
13
13
  return self.shim
14
14
 
15
15
  def __repr__(self) -> str:
16
16
  return f"<static_shim_builder for {self.shim}>"
17
17
 
18
18
 
19
- def make_builder(shim: ty.Union[Shim, ShimBuilder]) -> ShimBuilder:
19
+ def make_builder(shim_or_builder: ty.Union[Shim, ShimBuilder, FutureShim]) -> ShimBuilder:
20
20
  """If you have a Shim and you want to make it into the simplest possible ShimBuilder."""
21
21
 
22
- if len(inspect.signature(shim).parameters) == 3:
23
- return ty.cast(ShimBuilder, shim)
22
+ if len(inspect.signature(shim_or_builder).parameters) == 3:
23
+ return ty.cast(ShimBuilder, shim_or_builder)
24
24
 
25
- return _static_shim_builder(ty.cast(Shim, shim))
25
+ return _static_shim_builder(ty.cast(Shim, shim_or_builder))
@@ -1,3 +1,4 @@
1
+ import concurrent.futures
1
2
  import subprocess
2
3
  from typing import Sequence
3
4
 
@@ -19,3 +20,9 @@ def samethread_shim(shim_args: Sequence[str]) -> None:
19
20
  def subprocess_shim(shim_args: Sequence[str]) -> None:
20
21
  logger.debug("Running a mops function locally in a new subprocess.")
21
22
  subprocess.check_call(["python", "-m", "thds.mops.pure.core.entry.main", *shim_args])
23
+
24
+
25
+ def future_subprocess_shim(shim_args: Sequence[str]) -> concurrent.futures.Future:
26
+ """Use this if you really want a Future rather than just running the process"""
27
+ logger.debug("Running a mops function in a new subprocess, returning a Future.")
28
+ return concurrent.futures.ProcessPoolExecutor().submit(samethread_shim, shim_args)
@@ -1,25 +1,36 @@
1
1
  import typing as ty
2
2
 
3
+ from thds.core import futures
4
+
3
5
  from ..core.metadata import ResultMetadata
4
6
  from ..core.types import Args, F, Kwargs
5
7
 
6
- Shim = ty.Callable[[ty.Sequence[str]], ty.Any]
8
+ FutureShim = ty.Callable[[ty.Sequence[str]], futures.PFuture]
9
+ SyncShim = ty.Callable[[ty.Sequence[str]], None]
10
+ Shim = ty.Union[SyncShim, FutureShim]
7
11
  """A runner Shim is a way of getting back into a Python process with enough
8
12
  context to download the uploaded function and its arguments from the
9
13
  location where a runner placed it, and then invoke the function. All
10
14
  arguments are strings because it is assumed that this represents some
11
15
  kind of command line invocation.
12
16
 
13
- The Shim must be a blocking call, and its result(s) must be available
17
+ A SyncShim must be a blocking call, and its result(s) must be available
14
18
  immediately after its return.
19
+ A FutureShim must return a Future (with an 'add_done_callback' method)
20
+ that, when resolved, means that the result(s) are available.
15
21
  """
16
22
 
23
+ S = ty.TypeVar("S", SyncShim, FutureShim, Shim, covariant=True)
24
+
17
25
 
18
- class ShimBuilder(ty.Protocol):
19
- def __call__(self, __f: F, __args: Args, __kwargs: Kwargs) -> Shim:
26
+ class ShimBuilder(ty.Protocol, ty.Generic[S]):
27
+ def __call__(self, __f: ty.Callable, __args: Args, __kwargs: Kwargs) -> S:
20
28
  ... # pragma: no cover
21
29
 
22
30
 
31
+ SyncShimBuilder = ShimBuilder[SyncShim]
32
+ FutureShimBuilder = ShimBuilder[FutureShim]
33
+
23
34
  StorageRootURI = str
24
35
  SerializeArgsKwargs = ty.Callable[[StorageRootURI, F, Args, Kwargs], bytes]
25
36
  SerializeInvocation = ty.Callable[[StorageRootURI, F, bytes], bytes]