thds.mops 3.9.20250722150738__py3-none-any.whl → 3.9.20250722163657__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

Files changed (37) hide show
  1. thds/mops/impure/runner.py +1 -1
  2. thds/mops/k8s/__init__.py +1 -3
  3. thds/mops/k8s/config.py +1 -1
  4. thds/mops/k8s/jobs.py +0 -4
  5. thds/mops/k8s/{_launch.py → launch.py} +57 -56
  6. thds/mops/k8s/logging.py +5 -37
  7. thds/mops/k8s/watch.py +62 -120
  8. thds/mops/pure/__init__.py +1 -2
  9. thds/mops/pure/_magic/sauce.py +3 -11
  10. thds/mops/pure/_magic/shims.py +2 -2
  11. thds/mops/pure/core/deferred_work.py +12 -15
  12. thds/mops/pure/core/entry/runner_registry.py +10 -1
  13. thds/mops/pure/core/lock/__init__.py +0 -1
  14. thds/mops/pure/core/lock/_acquire.py +2 -2
  15. thds/mops/pure/core/lock/maintain.py +3 -22
  16. thds/mops/pure/core/lock/write.py +19 -19
  17. thds/mops/pure/core/memo/__init__.py +1 -1
  18. thds/mops/pure/core/memo/results.py +4 -5
  19. thds/mops/pure/core/use_runner.py +7 -21
  20. thds/mops/pure/pickling/mprunner.py +14 -21
  21. thds/mops/pure/pickling/pickles.py +8 -19
  22. thds/mops/pure/pickling/remote.py +1 -3
  23. thds/mops/pure/runner/local.py +87 -58
  24. thds/mops/pure/runner/shim_builder.py +7 -7
  25. thds/mops/pure/runner/simple_shims.py +0 -7
  26. thds/mops/pure/runner/types.py +4 -15
  27. thds/mops/pure/tools/summarize/run_summary.py +8 -9
  28. {thds_mops-3.9.20250722150738.dist-info → thds_mops-3.9.20250722163657.dist-info}/METADATA +1 -1
  29. {thds_mops-3.9.20250722150738.dist-info → thds_mops-3.9.20250722163657.dist-info}/RECORD +32 -37
  30. thds/mops/k8s/batching.py +0 -198
  31. thds/mops/k8s/counts.py +0 -28
  32. thds/mops/k8s/job_future.py +0 -109
  33. thds/mops/k8s/uncertain_future.py +0 -160
  34. thds/mops/pure/runner/get_results.py +0 -106
  35. {thds_mops-3.9.20250722150738.dist-info → thds_mops-3.9.20250722163657.dist-info}/WHEEL +0 -0
  36. {thds_mops-3.9.20250722150738.dist-info → thds_mops-3.9.20250722163657.dist-info}/entry_points.txt +0 -0
  37. {thds_mops-3.9.20250722150738.dist-info → thds_mops-3.9.20250722163657.dist-info}/top_level.txt +0 -0
thds/mops/k8s/batching.py DELETED
@@ -1,198 +0,0 @@
1
- """The basic idea of this module is that different threads can submit _parts_ of a job to a batcher,
2
- and immediately get the job name back, while the batcher itself defers creating the job until the
3
- batch is full, or when the process exits.
4
-
5
- The theory is that will get used in processes whose only responsibility is to create jobs,
6
- so waiting on atexit to create the final batch is not an issue.
7
-
8
- If you want a batcher that has a more context-manager-like behavior, you can write one of
9
- those, but it wouldn't work well with a concurrent.futures Executor-style approach, since
10
- those don't have an explicit shutdown procedure that we can hook to call __exit__.
11
- """
12
-
13
- import atexit
14
- import concurrent.futures
15
- import itertools
16
- import multiprocessing
17
- import threading
18
- import typing as ty
19
-
20
- from thds.core import cpus, futures, log
21
-
22
- from . import _launch, counts
23
-
24
- T = ty.TypeVar("T")
25
- logger = log.getLogger(__name__)
26
-
27
-
28
- class _AtExitBatcher(ty.Generic[T]):
29
- def __init__(self, batch_processor: ty.Callable[[ty.Collection[T]], None]) -> None:
30
- self.batch: list[T] = []
31
- self._registered = False
32
- self._lock = threading.RLock()
33
- self._batch_processor = batch_processor
34
-
35
- def add(self, item: T) -> None:
36
- with self._lock:
37
- if not self._registered:
38
- atexit.register(self.process)
39
- # ensure we flush on process exit, since we don't know how many items are coming
40
- self._registered = True
41
- self.batch.append(item)
42
-
43
- def process(self) -> None:
44
- if self.batch:
45
- with self._lock:
46
- if self.batch:
47
- self._batch_processor(self.batch)
48
- self.batch = []
49
-
50
-
51
- class K8sJobBatchingShim(_AtExitBatcher[str]):
52
- """Thread-safe for use within a single process by multiple threads."""
53
-
54
- def __init__(
55
- self,
56
- submit_func: ty.Callable[[ty.Collection[str]], ty.Any],
57
- max_batch_size: int,
58
- job_counter: counts.MpValue[int],
59
- name_prefix: str = "",
60
- ) -> None:
61
- """submit_func in particular should be a closure around whatever setup you need to
62
- do to call back into a function that is locally wrapped with a k8s shim that will
63
- ultimately call k8s.launch. Notably, you
64
- """
65
- super().__init__(self._process_batch)
66
- self._max_batch_size = max_batch_size
67
- self._job_counter = job_counter
68
- self._job_name = ""
69
- self._name_prefix = name_prefix
70
- self._submit_func = submit_func
71
-
72
- def _get_new_name(self) -> str:
73
- # counts.inc takes a multiprocess lock. do not forget this!
74
- job_num = counts.inc(self._job_counter)
75
- return _launch.construct_job_name(self._name_prefix, counts.to_name(job_num))
76
-
77
- def add_to_named_job(self, mops_invocation: ty.Sequence[str]) -> str:
78
- """Returns job name for the invocation."""
79
- with self._lock:
80
- if not self._job_name:
81
- self._job_name = self._get_new_name()
82
- if len(self.batch) >= self._max_batch_size:
83
- self.process()
84
- self._job_name = self._get_new_name()
85
- super().add(" ".join(mops_invocation))
86
- return self._job_name
87
-
88
- def _process_batch(self, batch: ty.Collection[str]) -> None:
89
- with _launch.JOB_NAME.set(self._job_name):
90
- log_lvl = logger.warning if len(batch) < self._max_batch_size else logger.info
91
- log_lvl(f"Processing batch of len {len(batch)} with job name {self._job_name}")
92
- self._submit_func(batch)
93
-
94
-
95
- F = ty.TypeVar("F", bound=ty.Callable)
96
- FunctionDecorator = ty.Callable[[F], F]
97
-
98
-
99
- _BATCHER: ty.Optional[K8sJobBatchingShim] = None
100
-
101
-
102
- def init_batcher(
103
- submit_func: ty.Callable[[ty.Collection[str]], ty.Any],
104
- func_max_batch_size: int,
105
- job_counter: counts.MpValue[int],
106
- name_prefix: str = "",
107
- ) -> None:
108
- # for use with multiprocessing pool initializer
109
- global _BATCHER
110
- if _BATCHER is not None:
111
- logger.warning("Batcher is already initialized; reinitializing will reset the job name.")
112
- return
113
-
114
- _BATCHER = K8sJobBatchingShim(submit_func, func_max_batch_size, job_counter, name_prefix)
115
-
116
-
117
- def init_batcher_with_unpicklable_submit_func(
118
- make_submit_func: ty.Callable[[T], ty.Callable[[ty.Collection[str]], ty.Any]],
119
- submit_func_arg: T,
120
- func_max_batch_size: int,
121
- job_counter: counts.MpValue[int],
122
- name_prefix: str = "",
123
- ) -> None:
124
- """Use this if you want to have an unpicklable submit function - because applying make_submit_func(submit_func_arg)
125
- will happen inside the pool worker process after all the pickling/unpickling has happened.
126
- """
127
- return init_batcher(
128
- make_submit_func(submit_func_arg), func_max_batch_size, job_counter, name_prefix=name_prefix
129
- )
130
-
131
-
132
- def make_counting_process_pool_executor(
133
- make_submit_func: ty.Callable[[T], ty.Callable[[ty.Collection[str]], ty.Any]],
134
- submit_func_arg: T,
135
- max_batch_size: int,
136
- name_prefix: str = "",
137
- max_workers: int = 0,
138
- ) -> concurrent.futures.ProcessPoolExecutor:
139
- """Creates a ProcessPoolExecutor that uses the batching shim for job submission.
140
-
141
- We are introducing this because we see segfaults prior to Python 3.12 related to this issue:
142
- https://github.com/python/cpython/issues/77377
143
-
144
- And it would seem that this had to do with creating mp.Values using a 'fork' start
145
- method, and then passing those to a ProcessPoolExecutor with
146
- mp_context=multiprolcessing.get_context('spawn'). So we can help you avoid that by creating
147
- the mp.Value for you, alongside its ProcessPoolExecutor.
148
-
149
- NOTE!!
150
-
151
- You should only have one of these per process at a time, because we're doing spooky
152
- things with the Job Counter. In fact, you should probably only create one of these
153
- _ever_ within a single logical 'application'.
154
-
155
- If you fail to heed this advice, you will get weird launched/finished counts at a
156
- minimum. Although these job counts are not mission-critical, you _will_ be confused.
157
- """
158
- start_method: str = "spawn"
159
- # 'spawn' prevents weird batch processing deadlocks that seem to only happen on Linux with 'fork'.
160
- # it is strongly recommended to use 'spawn' for this reason.
161
-
162
- mp_context = multiprocessing.get_context(start_method)
163
- launch_count = mp_context.Value("i", 0)
164
- # even though i want to assign this to a global, I also want to prevent
165
- # any possible race condition where i somehow use a different thread's LAUNCH_COUNT
166
- # when i create the ProcessPoolExecutor a few lines below.
167
- counts.LAUNCH_COUNT = launch_count
168
- counts.FINISH_COUNT = mp_context.Value("i", 0) # we don't use this here; we just reset it to zero.
169
- # SPOOKY - reset the global finish counter and make it be the same 'type'
170
- return concurrent.futures.ProcessPoolExecutor(
171
- max_workers=max_workers or cpus.available_cpu_count(),
172
- initializer=init_batcher_with_unpicklable_submit_func,
173
- initargs=(make_submit_func, submit_func_arg, max_batch_size, launch_count, name_prefix),
174
- mp_context=mp_context,
175
- )
176
-
177
-
178
- def shim(args: ty.Sequence[str]) -> futures.PFuture[bool]:
179
- # This thing needs to return a lazy Uncertain Future that contains a job name, so that Job can be polled on
180
- # ... but the job does not exist yet! So the batcher is in charge of creating the job name
181
- # upfront, and then ensuring that it gets used when the job is created.
182
- assert _BATCHER is not None, "Batcher must be initialized before using the batching shim."
183
- job_name = _BATCHER.add_to_named_job(args)
184
- return _launch.create_lazy_job_logging_future(job_name)
185
-
186
-
187
- def batched(iterable: ty.Iterable[T], n: int, *, strict: bool = False) -> ty.Iterator[tuple[T, ...]]:
188
- """Just a utility for pre-batching if you're using multiprocessing to create batches."""
189
- # TODO get rid of this when we go to Python 3.12+ which has itertools.batched
190
- #
191
- # batched('ABCDEFG', 3) → ABC DEF G
192
- if n < 1:
193
- raise ValueError("n must be at least one")
194
- iterator = iter(iterable)
195
- while batch := tuple(itertools.islice(iterator, n)):
196
- if strict and len(batch) != n:
197
- raise ValueError("batched(): incomplete batch")
198
- yield batch
thds/mops/k8s/counts.py DELETED
@@ -1,28 +0,0 @@
1
- import multiprocessing as mp
2
- import typing as ty
3
-
4
- T = ty.TypeVar("T")
5
-
6
-
7
- class MpValue(ty.Protocol[T]):
8
- def get_lock(self) -> ty.Any:
9
- ...
10
-
11
- value: T
12
-
13
-
14
- def inc(mp_val: MpValue[int]) -> int:
15
- with mp_val.get_lock():
16
- mp_val.value += 1
17
- return mp_val.value
18
-
19
-
20
- LAUNCH_COUNT = mp.Value("i", 0)
21
- FINISH_COUNT = mp.Value("i", 0)
22
- # these are spooky - they're global and mutable, and may in fact get overwritten by code
23
- # using specific multiprocessing contexts.
24
-
25
-
26
- def to_name(count: int) -> str:
27
- """Convert a count to a name."""
28
- return f"{count:0>4}"
@@ -1,109 +0,0 @@
1
- import threading
2
- import typing as ty
3
-
4
- from kubernetes import client
5
-
6
- from thds.core import futures, log
7
- from thds.termtool.colorize import colorized
8
-
9
- from . import config, counts, uncertain_future
10
- from .jobs import is_job_failed, is_job_succeeded, job_source
11
-
12
- logger = log.getLogger(__name__)
13
-
14
- UNUSUAL = colorized(fg="white", bg="yellow")
15
- SUCCEEDED = colorized(fg="white", bg="blue")
16
- FAILED = colorized(fg="white", bg="red")
17
-
18
-
19
- _FINISHED_JOBS = set[str]()
20
- _FINISHED_JOBS_LOCK = threading.Lock()
21
-
22
-
23
- def _check_newly_finished(job_name: str, namespace: str = "") -> str:
24
- # I don't believe it's possible to ever have a Job that both succeeds and fails.
25
- namespace = namespace or config.k8s_namespace()
26
- job_full = f"{namespace}/{job_name}"
27
- if job_full in _FINISHED_JOBS:
28
- return ""
29
-
30
- with _FINISHED_JOBS_LOCK:
31
- if job_full in _FINISHED_JOBS:
32
- return ""
33
-
34
- _FINISHED_JOBS.add(job_full)
35
-
36
- launched = counts.LAUNCH_COUNT.value
37
- return f"- ({launched - counts.inc(counts.FINISH_COUNT)} unfinished of {launched})"
38
-
39
-
40
- class K8sJobFailedError(Exception):
41
- """Raised by `launch` when a Job is seen to terminate in a Failed state."""
42
-
43
-
44
- def make_job_completion_future(job_name: str, *, namespace: str = "") -> futures.PFuture[bool]:
45
- """This is a natural boundary for a serializable lazy future - something that represents
46
- work being done across process boundaries (since Kubernetes jobs will be listed via an API.
47
-
48
- If True is returned, the Job has definitely succeeded.
49
-
50
- If False is returned, the Job may have succeeded but we saw no evidence of it.
51
-
52
- If the Job definitely failed, an Exception will be raised.
53
- """
54
-
55
- JOB_SEEN = False
56
-
57
- def job_completion_interpreter(
58
- job: ty.Optional[client.models.V1Job], last_seen_at: float
59
- ) -> ty.Union[uncertain_future.NotYetDone, bool]:
60
- nonlocal JOB_SEEN
61
- if not job:
62
- if JOB_SEEN:
63
- logger.warning(
64
- UNUSUAL(f"Previously-seen job {job_name} no longer exists - assuming success!")
65
- )
66
- # we hereby indicate an unusual success to the Future waiter.
67
- return False
68
-
69
- time_since_last_seen = uncertain_future.official_timer() - last_seen_at
70
- if time_since_last_seen > config.k8s_watch_object_stale_seconds():
71
- # this is 5 minutes by default as of 2025-07-15.
72
- raise TimeoutError(
73
- f"Job {job_name} has not been seen for {time_since_last_seen:.1f} seconds - assuming failure!"
74
- )
75
-
76
- # we don't know what's going on but things aren't truly stale yet.
77
- return uncertain_future.NotYetDone()
78
-
79
- JOB_SEEN = True
80
-
81
- if is_job_succeeded(job):
82
- newly_succeeded = _check_newly_finished(job_name, namespace)
83
- if newly_succeeded:
84
- logger.info(SUCCEEDED(f"Job {job_name} Succeeded! {newly_succeeded}"))
85
- return True
86
-
87
- if is_job_failed(job):
88
- newly_failed = _check_newly_finished(job_name, namespace)
89
- if newly_failed:
90
- logger.error(FAILED(f"Job {job_name} Failed! {newly_failed}"))
91
- raise K8sJobFailedError(f"Job {job_name} has failed with status: {job.status}")
92
-
93
- return uncertain_future.NotYetDone() # job is still in progress
94
-
95
- return job_source().create_future(
96
- job_completion_interpreter,
97
- job_name,
98
- namespace=namespace or config.k8s_namespace(),
99
- )
100
-
101
-
102
- def make_lazy_completion_future(job_name: str, *, namespace: str = "") -> futures.LazyFuture[bool]:
103
- """This is a convenience function that will create a job completion future and then
104
- immediately process it, returning the result. See docs on function above.
105
- """
106
- return futures.make_lazy(make_job_completion_future)(
107
- job_name,
108
- namespace=namespace or config.k8s_namespace(),
109
- )
@@ -1,160 +0,0 @@
1
- import collections
2
- import threading
3
- import time
4
- import typing as ty
5
-
6
- # we use concurrent.futures.Future as an implementation detail, but it's communicated
7
- # as core.futures.PFuture to give us the flexibility to change the implementation later if needed.
8
- from concurrent.futures import Future
9
- from dataclasses import dataclass
10
- from uuid import uuid4
11
-
12
- from typing_extensions import Self
13
-
14
- from thds import core
15
-
16
- R_0 = ty.TypeVar("R_0", contravariant=True) # R-naught - the thing that might resolve a Future.
17
- # a value for this type may never be None.
18
-
19
- R = ty.TypeVar("R")
20
- # the Result type of the Future. These are allowed to be None, since some Futures may
21
- # resolve but not return a value.
22
-
23
-
24
- class NotYetDone:
25
- pass
26
-
27
-
28
- _LastSeenAt = float # type alias for the last seen time of the Future, in seconds since epoch
29
-
30
-
31
- FutureInterpreter = ty.Callable[[ty.Optional[R_0], _LastSeenAt], ty.Union[R, NotYetDone]]
32
- # a FutureInterpreter is a function that takes an object R_0 and the time.monotonic() at
33
- # which it was last seen, and returns either NotYetDone (if the status is still in progress) or
34
- # the actual Future result of type R, or, if the status is failure,
35
- # _raises_ an appropriate Exception.
36
-
37
-
38
- class _FutureInterpretationShim(ty.Generic[R_0, R]):
39
- def __init__(self, interpreter: FutureInterpreter[R_0, ty.Union[NotYetDone, R]]) -> None:
40
- self.future = Future[R]()
41
- self._interpreter = interpreter
42
- self._id = uuid4().hex # has an id so it can be hashed and therefore easily found in a set
43
-
44
- def __hash__(self) -> int:
45
- return hash(self._id)
46
-
47
- def __call__(self, r_0: ty.Optional[R_0], last_seen_at: float) -> ty.Optional[Self]:
48
- """First and foremost - this _must_ be treated as an object that the creator
49
- is ultimately responsible for calling on a semi-regular basis. It represents a
50
- likely deadlock for the holder of the Future if it is never called.
51
-
52
- Return False if the Future is still in progress and should not be unregistered.
53
- Return True if the Future is done and should be unregistered.
54
- """
55
- try:
56
- interpretation = self._interpreter(r_0, last_seen_at)
57
- if isinstance(interpretation, NotYetDone):
58
- return None # do nothing and do not unregister - the status is still in progress.
59
-
60
- self.future.set_result(interpretation)
61
- except Exception as e:
62
- self.future.set_exception(e)
63
-
64
- return self
65
-
66
-
67
- K = ty.TypeVar("K") # Key type for the UncertainFuturesTracker
68
-
69
-
70
- @dataclass
71
- class _FuturesState(ty.Generic[R_0]):
72
- """Represents a single 'observable' that may have multiple Futures (and therefore interpretations) associated with it."""
73
-
74
- futshims: list[_FutureInterpretationShim[R_0, ty.Any]]
75
- last_seen_at: float
76
-
77
-
78
- def official_timer() -> float:
79
- # we don't need any particular meaning to the time.
80
- return time.monotonic()
81
-
82
-
83
- class UncertainFuturesTracker(ty.Generic[K, R_0]):
84
- """This class represents a kind of Future where we cannot be guaranteed that we will ever see
85
- any further information about it, because we do not control the source of the data.
86
-
87
- A good example would be a Kubernetes object that we are watching - we may _think_ that a Job will be created,
88
- but there are race conditions galore in terms of actually looking for that object.
89
-
90
- However, if we _do_ see it at a some point, then we can interpret future 'missingness'
91
- as a tentative success.
92
-
93
- The danger with this uncertainty is that Futures represent implicit deadlocks - if we
94
- never resolve the Future, then a caller may be waiting for it forever. Therefore, we
95
- ask the original requestor of the Future to specify how long they are willing to wait
96
- to get a result, after which point we will resolve the Future as an exception.
97
- """
98
-
99
- def __init__(self, allowed_stale_seconds: float) -> None:
100
- self._keyed_futures_state = collections.OrderedDict[K, _FuturesState[R_0]]()
101
- self._lock = threading.Lock() # i don't trust ordered dict operations to be thread-safe.
102
- self._check_stale_seconds = allowed_stale_seconds
103
-
104
- def create(self, key: K, interpreter: FutureInterpreter[R_0, R]) -> core.futures.PFuture[R]:
105
- futshim = _FutureInterpretationShim(interpreter)
106
- with self._lock:
107
- if key not in self._keyed_futures_state:
108
- self._keyed_futures_state[key] = _FuturesState(
109
- [futshim],
110
- last_seen_at=official_timer() + self._check_stale_seconds,
111
- # we provide a double margin for objects that we have never seen before.
112
- )
113
- self._keyed_futures_state.move_to_end(key, last=False)
114
- # never seen and therefore should be at the beginning (most stale)
115
- else:
116
- # maintain our ordered dict so we can handle garbage collection of stale Futures.
117
- self._keyed_futures_state[key].futshims.append(futshim)
118
-
119
- return futshim.future
120
-
121
- def update(self, key: ty.Optional[K], r_0: ty.Optional[R_0]) -> None:
122
- """Update the keyed Futures based on their interpreters.
123
-
124
- Also check any stale Futures - Futures that have not seen an update (via their key) in a while.
125
-
126
- If `key` is None, we will update all Futures that have been created so far.
127
- """
128
-
129
- def check_resolution(fut_state: _FuturesState[R_0], inner_r_0: ty.Optional[R_0]) -> None:
130
- for future_shim_that_is_done in core.parallel.yield_results(
131
- [
132
- core.thunks.thunking(futshim)(inner_r_0, fut_state.last_seen_at)
133
- for futshim in fut_state.futshims
134
- ],
135
- progress_logger=core.log.getLogger(__name__).debug,
136
- named="UncertainFuturesTracker.update",
137
- ):
138
- if future_shim_that_is_done is not None:
139
- # the Future is done, so we can remove it from the list of Futures.
140
- fut_state.futshims.remove(future_shim_that_is_done)
141
-
142
- if key is not None:
143
- with self._lock:
144
- if key not in self._keyed_futures_state:
145
- self._keyed_futures_state[key] = _FuturesState(list(), last_seen_at=official_timer())
146
- else:
147
- # maintain our ordered dict so we can handle garbage collection of stale Futures.
148
- self._keyed_futures_state.move_to_end(key)
149
- self._keyed_futures_state[key].last_seen_at = official_timer()
150
-
151
- fut_state = self._keyed_futures_state[key]
152
- check_resolution(fut_state, r_0)
153
-
154
- # 'garbage collect' any Futures that haven't been updated in a while.
155
- for futs_state in self._keyed_futures_state.values():
156
- if futs_state.last_seen_at + self._check_stale_seconds < official_timer():
157
- check_resolution(futs_state, None)
158
- else: # these are ordered, so once we see one that's not stale, we can stop checking.
159
- # this prevents us from having to do O(N) checks for every update.
160
- break
@@ -1,106 +0,0 @@
1
- import concurrent.futures
2
- import threading
3
- import typing as ty
4
- from dataclasses import dataclass
5
- from pathlib import Path
6
-
7
- from thds.core import futures, log
8
-
9
- from ...config import max_concurrent_network_ops
10
- from ..core import lock, memo
11
- from ..core.types import NoResultAfterShimSuccess
12
- from ..tools.summarize import run_summary
13
- from . import types
14
-
15
-
16
- class ResultAndInvocationType(ty.NamedTuple):
17
- value_or_error: ty.Union[memo.results.Success, memo.results.Error]
18
- invoc_type: run_summary.InvocationType
19
-
20
-
21
- def unwrap_value_or_error(
22
- get_meta_and_result: types.GetMetaAndResult,
23
- run_directory: ty.Optional[Path],
24
- runner_prefix: str,
25
- args_kwargs_uris: ty.Collection[str],
26
- memo_uri: str,
27
- result_and_itype: ResultAndInvocationType,
28
- ) -> ty.Any: # the result value
29
- result = result_and_itype.value_or_error
30
- metadata = None
31
- value_t = None
32
- try:
33
- if isinstance(result, memo.results.Success):
34
- metadata, value_t = get_meta_and_result("value", result.value_uri)
35
- return value_t
36
- else:
37
- assert isinstance(result, memo.results.Error), "Must be Error or Success"
38
- metadata, exc = get_meta_and_result("EXCEPTION", result.exception_uri)
39
- raise exc
40
- finally:
41
- run_summary.log_function_execution(
42
- *(run_directory, memo_uri, result_and_itype.invoc_type),
43
- metadata=metadata,
44
- runner_prefix=runner_prefix,
45
- was_error=not isinstance(result, memo.results.Success),
46
- return_value=value_t,
47
- args_kwargs_uris=args_kwargs_uris,
48
- )
49
-
50
-
51
- _AFTER_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()) * 3)
52
- # _IN prioritizes retrieving the result of a Shim that has completed.
53
- logger = log.getLogger(__name__)
54
- T = ty.TypeVar("T")
55
-
56
-
57
- @dataclass
58
- class PostShimResultGetter(ty.Generic[T]):
59
- """Must be serializable on its own, so we can pass it across process boundaries
60
- to serve as a foundation for a cross-process Future.
61
-
62
- Happily, this should not be terribly difficult, as the 'state' of a mops function
63
- is predicted entirely on the memo URI, which is a string.
64
- """
65
-
66
- memo_uri: str
67
- partially_applied_unwrap_value_or_error: ty.Callable[[str, ResultAndInvocationType], T]
68
- release_lock: ty.Optional[ty.Callable[[], None]] = None
69
-
70
- def __call__(self, _shim_result: ty.Any) -> T:
71
- """Check if the result exists, and return it if it does.
72
-
73
- This is the future 'translator' that allows us to chain a shim future to be a result future.
74
- """
75
- memo_uri = self.memo_uri
76
-
77
- try:
78
- with _AFTER_INVOCATION_SEMAPHORE:
79
- value_or_error = memo.results.check_if_result_exists(memo_uri, check_for_exception=True)
80
- if not value_or_error:
81
- raise NoResultAfterShimSuccess(
82
- f"The shim for {memo_uri} exited cleanly, but no result or exception was found."
83
- )
84
- return self.partially_applied_unwrap_value_or_error(
85
- memo_uri, ResultAndInvocationType(value_or_error, "invoked")
86
- )
87
- finally:
88
- if self.release_lock is not None:
89
- try:
90
- self.release_lock()
91
- except Exception:
92
- logger.exception("Failed to release lock after shim result retrieval.")
93
-
94
-
95
- def lock_maintaining_future(
96
- lock_acquired: lock.LockAcquired,
97
- post_shim_result_getter: PostShimResultGetter[futures.R1],
98
- inner_future: futures.PFuture[futures.R],
99
- ) -> concurrent.futures.Future[futures.R1]:
100
- """Create a Future that will be used to retrieve the result of a shim invocation.
101
-
102
- This Future will be used to retrieve the result of a shim invocation, and will
103
- maintain the lock while it is being retrieved.
104
- """
105
- post_shim_result_getter.release_lock = lock.maintain_to_release(lock_acquired)
106
- return futures.chain_futures(inner_future, concurrent.futures.Future(), post_shim_result_getter)