thds.mops 3.9.20250722163657__py3-none-any.whl → 3.9.20250722164625__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.mops might be problematic. Click here for more details.
- thds/mops/impure/runner.py +1 -1
- thds/mops/k8s/__init__.py +3 -1
- thds/mops/k8s/{launch.py → _launch.py} +56 -57
- thds/mops/k8s/batching.py +198 -0
- thds/mops/k8s/config.py +1 -1
- thds/mops/k8s/counts.py +28 -0
- thds/mops/k8s/job_future.py +109 -0
- thds/mops/k8s/jobs.py +4 -0
- thds/mops/k8s/logging.py +37 -5
- thds/mops/k8s/uncertain_future.py +160 -0
- thds/mops/k8s/watch.py +120 -62
- thds/mops/pure/__init__.py +2 -1
- thds/mops/pure/_magic/sauce.py +11 -3
- thds/mops/pure/_magic/shims.py +2 -2
- thds/mops/pure/core/deferred_work.py +15 -12
- thds/mops/pure/core/entry/runner_registry.py +1 -10
- thds/mops/pure/core/lock/__init__.py +1 -0
- thds/mops/pure/core/lock/_acquire.py +2 -2
- thds/mops/pure/core/lock/maintain.py +22 -3
- thds/mops/pure/core/lock/write.py +19 -19
- thds/mops/pure/core/memo/__init__.py +1 -1
- thds/mops/pure/core/memo/results.py +5 -4
- thds/mops/pure/core/use_runner.py +21 -7
- thds/mops/pure/pickling/mprunner.py +21 -14
- thds/mops/pure/pickling/pickles.py +19 -8
- thds/mops/pure/pickling/remote.py +3 -1
- thds/mops/pure/runner/get_results.py +106 -0
- thds/mops/pure/runner/local.py +58 -87
- thds/mops/pure/runner/shim_builder.py +7 -7
- thds/mops/pure/runner/simple_shims.py +7 -0
- thds/mops/pure/runner/types.py +15 -4
- thds/mops/pure/tools/summarize/run_summary.py +9 -8
- {thds_mops-3.9.20250722163657.dist-info → thds_mops-3.9.20250722164625.dist-info}/METADATA +1 -1
- {thds_mops-3.9.20250722163657.dist-info → thds_mops-3.9.20250722164625.dist-info}/RECORD +37 -32
- {thds_mops-3.9.20250722163657.dist-info → thds_mops-3.9.20250722164625.dist-info}/WHEEL +0 -0
- {thds_mops-3.9.20250722163657.dist-info → thds_mops-3.9.20250722164625.dist-info}/entry_points.txt +0 -0
- {thds_mops-3.9.20250722163657.dist-info → thds_mops-3.9.20250722164625.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import typing as ty
|
|
3
|
+
from dataclasses import dataclass
|
|
3
4
|
from datetime import datetime, timedelta
|
|
4
5
|
|
|
5
6
|
from thds.core import hostname, log
|
|
@@ -10,38 +11,37 @@ from .types import LockContents
|
|
|
10
11
|
logger = log.getLogger(__name__)
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
write_count = 0
|
|
18
|
-
first_written_at = ""
|
|
14
|
+
@dataclass
|
|
15
|
+
class LockEmitter:
|
|
16
|
+
writer_id: str
|
|
17
|
+
expire: timedelta
|
|
19
18
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
), f"{writer_id} should not contain a slash - maybe you passed a URI instead?"
|
|
19
|
+
write_count: int = 0
|
|
20
|
+
first_written_at: str = ""
|
|
23
21
|
|
|
24
|
-
def
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
def __post_init__(self) -> None:
|
|
23
|
+
assert (
|
|
24
|
+
"/" not in self.writer_id
|
|
25
|
+
), f"{self.writer_id} should not contain a slash - maybe you passed a URI instead?"
|
|
26
|
+
|
|
27
|
+
def __call__(self, first_acquired_at: ty.Optional[datetime]) -> LockContents:
|
|
28
|
+
self.write_count += 1
|
|
27
29
|
now = _funcs.utc_now().isoformat()
|
|
28
|
-
first_written_at = first_written_at or now
|
|
30
|
+
self.first_written_at = self.first_written_at or now
|
|
29
31
|
|
|
30
32
|
return {
|
|
31
|
-
"writer_id": writer_id,
|
|
33
|
+
"writer_id": self.writer_id,
|
|
32
34
|
"written_at": now,
|
|
33
|
-
"expire_s": expire.total_seconds(),
|
|
35
|
+
"expire_s": self.expire.total_seconds(),
|
|
34
36
|
# debug stuff:
|
|
35
|
-
"write_count": write_count,
|
|
37
|
+
"write_count": self.write_count,
|
|
36
38
|
"hostname": hostname.friendly(),
|
|
37
39
|
"pid": str(os.getpid()),
|
|
38
|
-
"first_written_at": first_written_at,
|
|
40
|
+
"first_written_at": self.first_written_at,
|
|
39
41
|
"first_acquired_at": first_acquired_at.isoformat() if first_acquired_at else "",
|
|
40
42
|
"released_at": "",
|
|
41
43
|
}
|
|
42
44
|
|
|
43
|
-
return lock_contents
|
|
44
|
-
|
|
45
45
|
|
|
46
46
|
class LockfileWriter:
|
|
47
47
|
"""The core purpose of this class is to allow setting of first_acquired_at immediately
|
|
@@ -76,8 +76,8 @@ class RequiredResultNotFound(Exception):
|
|
|
76
76
|
|
|
77
77
|
def check_if_result_exists(
|
|
78
78
|
memo_uri: str,
|
|
79
|
-
|
|
80
|
-
before_raise: ty.Callable[[], ty.Any] =
|
|
79
|
+
check_for_exception: bool = False,
|
|
80
|
+
before_raise: ty.Optional[ty.Callable[[], ty.Any]] = None,
|
|
81
81
|
) -> ty.Union[None, Success, Error]:
|
|
82
82
|
fs = lookup_blob_store(memo_uri)
|
|
83
83
|
value_uri = fs.join(memo_uri, RESULT)
|
|
@@ -86,14 +86,15 @@ def check_if_result_exists(
|
|
|
86
86
|
|
|
87
87
|
required_msg = _should_require_result(memo_uri)
|
|
88
88
|
if required_msg: # might be custom or the default. either way it indicates a required result.
|
|
89
|
-
before_raise
|
|
89
|
+
if before_raise:
|
|
90
|
+
before_raise()
|
|
90
91
|
error_msg = f"Required a result for {ORANGE(memo_uri)} but that result was not found"
|
|
91
92
|
# i'm tired of visually scanning for these memo_uris in logs.
|
|
92
93
|
if required_msg != _NO_MSG:
|
|
93
94
|
error_msg += f": {required_msg}"
|
|
94
95
|
raise RequiredResultNotFound(error_msg, memo_uri)
|
|
95
96
|
|
|
96
|
-
if
|
|
97
|
+
if not check_for_exception:
|
|
97
98
|
return None
|
|
98
99
|
|
|
99
100
|
error_uri = fs.join(memo_uri, EXCEPTION)
|
|
@@ -4,21 +4,29 @@ You can transfer control to a Runner without this, but decorators are a Pythonic
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import typing as ty
|
|
7
|
+
from contextlib import contextmanager
|
|
7
8
|
from functools import wraps
|
|
8
9
|
|
|
9
10
|
from thds.core import log, stack_context
|
|
11
|
+
from thds.mops._utils.names import full_name_and_callable
|
|
10
12
|
|
|
11
|
-
from .entry.runner_registry import entry_count
|
|
12
13
|
from .types import Runner
|
|
13
14
|
|
|
15
|
+
_USE_RUNNER_BYPASS = stack_context.StackContext[set[str]]("use_runner_bypass", set())
|
|
16
|
+
# use this in a Runner remote entry point to allow the remote function call
|
|
17
|
+
# to bypass any use_runner decorator. Also necessary in case somebody is doing advanced
|
|
18
|
+
# things like using a remote runner to run a manifest of _other_ remote functions...
|
|
19
|
+
|
|
14
20
|
logger = log.getLogger(__name__)
|
|
15
21
|
F = ty.TypeVar("F", bound=ty.Callable)
|
|
16
|
-
FUNCTION_UNWRAP_COUNT = stack_context.StackContext("function_unwrap_count", 0)
|
|
17
22
|
|
|
18
23
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
@contextmanager
|
|
25
|
+
def unwrap_use_runner(f: F) -> ty.Iterator[None]:
|
|
26
|
+
full_name, _ = full_name_and_callable(f)
|
|
27
|
+
with _USE_RUNNER_BYPASS.set({full_name}):
|
|
28
|
+
# this is a no-op if the function is not wrapped
|
|
29
|
+
yield
|
|
22
30
|
|
|
23
31
|
|
|
24
32
|
def use_runner(runner: Runner, skip: ty.Callable[[], bool] = lambda: False) -> ty.Callable[[F], F]:
|
|
@@ -34,9 +42,15 @@ def use_runner(runner: Runner, skip: ty.Callable[[], bool] = lambda: False) -> t
|
|
|
34
42
|
def deco(f: F) -> F:
|
|
35
43
|
@wraps(f)
|
|
36
44
|
def __use_runner_wrapper(*args, **kwargs): # type: ignore
|
|
37
|
-
|
|
45
|
+
def should_bypass() -> bool:
|
|
46
|
+
if skip():
|
|
47
|
+
return True
|
|
48
|
+
full_name, _ = full_name_and_callable(f)
|
|
49
|
+
return full_name in _USE_RUNNER_BYPASS()
|
|
50
|
+
|
|
51
|
+
if should_bypass():
|
|
38
52
|
logger.debug("Calling function %s directly...", f)
|
|
39
|
-
with
|
|
53
|
+
with unwrap_use_runner(f):
|
|
40
54
|
return f(*args, **kwargs)
|
|
41
55
|
|
|
42
56
|
logger.debug("Forwarding local function %s call to runner...", f)
|
|
@@ -11,7 +11,7 @@ from functools import partial
|
|
|
11
11
|
|
|
12
12
|
from typing_extensions import Self
|
|
13
13
|
|
|
14
|
-
from thds.core import cache, log
|
|
14
|
+
from thds.core import cache, futures, log
|
|
15
15
|
from thds.core.stack_context import StackContext
|
|
16
16
|
|
|
17
17
|
from ..._utils.once import Once
|
|
@@ -20,7 +20,7 @@ from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
|
|
|
20
20
|
from ..core.serialize_paths import CoordinatingPathSerializer
|
|
21
21
|
from ..core.types import Args, F, Kwargs, Serializer, T
|
|
22
22
|
from ..runner import local, shim_builder
|
|
23
|
-
from ..runner.types import Shim, ShimBuilder
|
|
23
|
+
from ..runner.types import FutureShim, Shim, ShimBuilder
|
|
24
24
|
from ..tools.summarize import run_summary
|
|
25
25
|
from . import _pickle, pickles, sha256_b64
|
|
26
26
|
|
|
@@ -32,7 +32,7 @@ _KWARGS_CONTEXT = StackContext[ty.Mapping]("args_kwargs", dict())
|
|
|
32
32
|
logger = log.getLogger(__name__)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def mp_shim(base_shim: Shim, shim_args: ty.Sequence[str]) -> ty.Any:
|
|
35
|
+
def mp_shim(base_shim: ty.Union[Shim, FutureShim], shim_args: ty.Sequence[str]) -> ty.Any:
|
|
36
36
|
return base_shim((RUNNER_NAME, *shim_args))
|
|
37
37
|
|
|
38
38
|
|
|
@@ -48,7 +48,7 @@ class MemoizingPicklingRunner:
|
|
|
48
48
|
|
|
49
49
|
def __init__(
|
|
50
50
|
self,
|
|
51
|
-
shim: ty.Union[ShimBuilder, Shim],
|
|
51
|
+
shim: ty.Union[ShimBuilder, Shim, FutureShim],
|
|
52
52
|
blob_storage_root: uris.UriResolvable,
|
|
53
53
|
*,
|
|
54
54
|
rerun_exceptions: bool = True,
|
|
@@ -159,20 +159,14 @@ class MemoizingPicklingRunner:
|
|
|
159
159
|
),
|
|
160
160
|
)
|
|
161
161
|
|
|
162
|
-
def _wrap_shim_builder(self, func: F, args: Args, kwargs: Kwargs) -> Shim:
|
|
162
|
+
def _wrap_shim_builder(self, func: F, args: Args, kwargs: Kwargs) -> ty.Union[Shim, FutureShim]:
|
|
163
163
|
base_shim = self._shim_builder(func, args, kwargs)
|
|
164
164
|
return partial(mp_shim, base_shim)
|
|
165
165
|
|
|
166
|
-
def
|
|
167
|
-
"""
|
|
168
|
-
|
|
169
|
-
Passes data to shim process via pickles in a Blob Store.
|
|
166
|
+
def submit(self, func: ty.Callable[..., T], *args: ty.Any, **kwargs: ty.Any) -> futures.PFuture[T]:
|
|
167
|
+
"""Now that mops supports Futures, we can have an 'inner' API that returns a PFuture.
|
|
170
168
|
|
|
171
|
-
|
|
172
|
-
derived function memo URI, which contains the determinstic
|
|
173
|
-
hashed bytes of all the function arguments, but also
|
|
174
|
-
additional namespacing including pipeline_id as documented
|
|
175
|
-
in memo.function_memospace.py.
|
|
169
|
+
We are trying to mimic the interface that concurrent.futures.Executors provide.
|
|
176
170
|
"""
|
|
177
171
|
logger.debug("Preparing to run function via remote shim")
|
|
178
172
|
with _ARGS_CONTEXT.set(args), _KWARGS_CONTEXT.set(kwargs):
|
|
@@ -192,3 +186,16 @@ class MemoizingPicklingRunner:
|
|
|
192
186
|
args,
|
|
193
187
|
kwargs,
|
|
194
188
|
)
|
|
189
|
+
|
|
190
|
+
def __call__(self, func: ty.Callable[..., T], args: Args, kwargs: Kwargs) -> T:
|
|
191
|
+
"""Return result of running this function remotely via the shim.
|
|
192
|
+
|
|
193
|
+
Passes data to shim process via pickles in a Blob Store.
|
|
194
|
+
|
|
195
|
+
May return cached (previously-computed) results found via the
|
|
196
|
+
derived function memo URI, which contains the determinstic
|
|
197
|
+
hashed bytes of all the function arguments, but also
|
|
198
|
+
additional namespacing including pipeline_id as documented
|
|
199
|
+
in memo.function_memospace.py.
|
|
200
|
+
"""
|
|
201
|
+
return self.submit(func, *args, **kwargs).result()
|
|
@@ -60,19 +60,30 @@ class PicklableFunction:
|
|
|
60
60
|
def __repr__(self) -> str:
|
|
61
61
|
return str(self)
|
|
62
62
|
|
|
63
|
+
def _resolve(self) -> ty.Callable:
|
|
64
|
+
"""Resolve the function if it hasn't been resolved yet."""
|
|
65
|
+
if self.f is None:
|
|
66
|
+
logger.debug(f"Dynamically importing function {str(self)}")
|
|
67
|
+
if self.fmod == "__main__":
|
|
68
|
+
self.f = get_main_module_function(self.fname) # type: ignore
|
|
69
|
+
else:
|
|
70
|
+
mod = importlib.import_module(self.fmod)
|
|
71
|
+
self.f = getattr(mod, self.fname)
|
|
72
|
+
assert self.f is not None
|
|
73
|
+
return self.f
|
|
74
|
+
return self.f
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def func(self) -> ty.Callable:
|
|
78
|
+
"""This is a property so we aren't ruining backward pickle compatibility."""
|
|
79
|
+
return self._resolve()
|
|
80
|
+
|
|
63
81
|
@property
|
|
64
82
|
def __name__(self) -> str:
|
|
65
83
|
return self.fname
|
|
66
84
|
|
|
67
85
|
def __call__(self, *args: ty.Any, **kwargs: ty.Any) -> ty.Any:
|
|
68
|
-
|
|
69
|
-
if self.fmod == "__main__":
|
|
70
|
-
self.f = get_main_module_function(self.fname) # type: ignore
|
|
71
|
-
else:
|
|
72
|
-
mod = importlib.import_module(self.fmod)
|
|
73
|
-
self.f = getattr(mod, self.fname)
|
|
74
|
-
assert self.f
|
|
75
|
-
return self.f(*args, **kwargs)
|
|
86
|
+
return self._resolve()(*args, **kwargs)
|
|
76
87
|
|
|
77
88
|
|
|
78
89
|
class UnpickleSimplePickleFromUri:
|
|
@@ -12,6 +12,7 @@ from ..core.memo import results
|
|
|
12
12
|
from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
|
|
13
13
|
from ..core.serialize_paths import CoordinatingPathSerializer
|
|
14
14
|
from ..core.types import Args, BlobStore, Kwargs, T
|
|
15
|
+
from ..core.use_runner import unwrap_use_runner
|
|
15
16
|
from ..runner import strings
|
|
16
17
|
from . import _pickle, mprunner, pickles, sha256_b64
|
|
17
18
|
|
|
@@ -123,7 +124,8 @@ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
|
|
|
123
124
|
def do_work_return_result() -> object:
|
|
124
125
|
# ONLY failures in this code should transmit an EXCEPTION
|
|
125
126
|
# back to the orchestrator side.
|
|
126
|
-
|
|
127
|
+
with unwrap_use_runner(func):
|
|
128
|
+
return func(*args, **kwargs)
|
|
127
129
|
|
|
128
130
|
route_return_value_or_exception(
|
|
129
131
|
_ResultExcWithMetadataChannel(
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import threading
|
|
3
|
+
import typing as ty
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from thds.core import futures, log
|
|
8
|
+
|
|
9
|
+
from ...config import max_concurrent_network_ops
|
|
10
|
+
from ..core import lock, memo
|
|
11
|
+
from ..core.types import NoResultAfterShimSuccess
|
|
12
|
+
from ..tools.summarize import run_summary
|
|
13
|
+
from . import types
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ResultAndInvocationType(ty.NamedTuple):
|
|
17
|
+
value_or_error: ty.Union[memo.results.Success, memo.results.Error]
|
|
18
|
+
invoc_type: run_summary.InvocationType
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def unwrap_value_or_error(
|
|
22
|
+
get_meta_and_result: types.GetMetaAndResult,
|
|
23
|
+
run_directory: ty.Optional[Path],
|
|
24
|
+
runner_prefix: str,
|
|
25
|
+
args_kwargs_uris: ty.Collection[str],
|
|
26
|
+
memo_uri: str,
|
|
27
|
+
result_and_itype: ResultAndInvocationType,
|
|
28
|
+
) -> ty.Any: # the result value
|
|
29
|
+
result = result_and_itype.value_or_error
|
|
30
|
+
metadata = None
|
|
31
|
+
value_t = None
|
|
32
|
+
try:
|
|
33
|
+
if isinstance(result, memo.results.Success):
|
|
34
|
+
metadata, value_t = get_meta_and_result("value", result.value_uri)
|
|
35
|
+
return value_t
|
|
36
|
+
else:
|
|
37
|
+
assert isinstance(result, memo.results.Error), "Must be Error or Success"
|
|
38
|
+
metadata, exc = get_meta_and_result("EXCEPTION", result.exception_uri)
|
|
39
|
+
raise exc
|
|
40
|
+
finally:
|
|
41
|
+
run_summary.log_function_execution(
|
|
42
|
+
*(run_directory, memo_uri, result_and_itype.invoc_type),
|
|
43
|
+
metadata=metadata,
|
|
44
|
+
runner_prefix=runner_prefix,
|
|
45
|
+
was_error=not isinstance(result, memo.results.Success),
|
|
46
|
+
return_value=value_t,
|
|
47
|
+
args_kwargs_uris=args_kwargs_uris,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
_AFTER_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()) * 3)
|
|
52
|
+
# _IN prioritizes retrieving the result of a Shim that has completed.
|
|
53
|
+
logger = log.getLogger(__name__)
|
|
54
|
+
T = ty.TypeVar("T")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class PostShimResultGetter(ty.Generic[T]):
|
|
59
|
+
"""Must be serializable on its own, so we can pass it across process boundaries
|
|
60
|
+
to serve as a foundation for a cross-process Future.
|
|
61
|
+
|
|
62
|
+
Happily, this should not be terribly difficult, as the 'state' of a mops function
|
|
63
|
+
is predicted entirely on the memo URI, which is a string.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
memo_uri: str
|
|
67
|
+
partially_applied_unwrap_value_or_error: ty.Callable[[str, ResultAndInvocationType], T]
|
|
68
|
+
release_lock: ty.Optional[ty.Callable[[], None]] = None
|
|
69
|
+
|
|
70
|
+
def __call__(self, _shim_result: ty.Any) -> T:
|
|
71
|
+
"""Check if the result exists, and return it if it does.
|
|
72
|
+
|
|
73
|
+
This is the future 'translator' that allows us to chain a shim future to be a result future.
|
|
74
|
+
"""
|
|
75
|
+
memo_uri = self.memo_uri
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
with _AFTER_INVOCATION_SEMAPHORE:
|
|
79
|
+
value_or_error = memo.results.check_if_result_exists(memo_uri, check_for_exception=True)
|
|
80
|
+
if not value_or_error:
|
|
81
|
+
raise NoResultAfterShimSuccess(
|
|
82
|
+
f"The shim for {memo_uri} exited cleanly, but no result or exception was found."
|
|
83
|
+
)
|
|
84
|
+
return self.partially_applied_unwrap_value_or_error(
|
|
85
|
+
memo_uri, ResultAndInvocationType(value_or_error, "invoked")
|
|
86
|
+
)
|
|
87
|
+
finally:
|
|
88
|
+
if self.release_lock is not None:
|
|
89
|
+
try:
|
|
90
|
+
self.release_lock()
|
|
91
|
+
except Exception:
|
|
92
|
+
logger.exception("Failed to release lock after shim result retrieval.")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def lock_maintaining_future(
|
|
96
|
+
lock_acquired: lock.LockAcquired,
|
|
97
|
+
post_shim_result_getter: PostShimResultGetter[futures.R1],
|
|
98
|
+
inner_future: futures.PFuture[futures.R],
|
|
99
|
+
) -> concurrent.futures.Future[futures.R1]:
|
|
100
|
+
"""Create a Future that will be used to retrieve the result of a shim invocation.
|
|
101
|
+
|
|
102
|
+
This Future will be used to retrieve the result of a shim invocation, and will
|
|
103
|
+
maintain the lock while it is being retrieved.
|
|
104
|
+
"""
|
|
105
|
+
post_shim_result_getter.release_lock = lock.maintain_to_release(lock_acquired)
|
|
106
|
+
return futures.chain_futures(inner_future, concurrent.futures.Future(), post_shim_result_getter)
|
thds/mops/pure/runner/local.py
CHANGED
|
@@ -5,28 +5,32 @@ import threading
|
|
|
5
5
|
import time
|
|
6
6
|
import typing as ty
|
|
7
7
|
from datetime import datetime, timedelta, timezone
|
|
8
|
+
from functools import partial
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
|
|
10
|
-
from thds.core import
|
|
11
|
+
from thds.core import futures, log, scope
|
|
11
12
|
from thds.termtool.colorize import colorized, make_colorized_out
|
|
12
13
|
|
|
13
14
|
from ...config import max_concurrent_network_ops
|
|
14
15
|
from ..core import deferred_work, lock, memo, metadata, pipeline_id_mask, uris
|
|
16
|
+
from ..core.lock.maintain import MAINTAIN_LOCKS # noqa: F401
|
|
15
17
|
from ..core.partial import unwrap_partial
|
|
16
|
-
from ..core.types import Args, Kwargs,
|
|
18
|
+
from ..core.types import Args, Kwargs, T
|
|
17
19
|
from ..tools.summarize import run_summary
|
|
18
20
|
from . import strings, types
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
from .get_results import (
|
|
22
|
+
PostShimResultGetter,
|
|
23
|
+
ResultAndInvocationType,
|
|
24
|
+
lock_maintaining_future,
|
|
25
|
+
unwrap_value_or_error,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# this semaphore (and a similar one in get_results) allow us to prioritize getting a single unit
|
|
23
29
|
# of progress _complete_, rather than issuing many instructions to the
|
|
24
30
|
# underlying client and allowing it to randomly order the operations
|
|
25
31
|
# such that it takes longer to get a full unit of work complete.
|
|
26
32
|
_BEFORE_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
|
|
27
|
-
#
|
|
28
|
-
_AFTER_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
|
|
29
|
-
# _IN prioritizes retrieving the result of a Shim that has completed.
|
|
33
|
+
# _BEFORE prioritizes uploading a single invocation and its dependencies so the Shim can start running.
|
|
30
34
|
|
|
31
35
|
_DarkBlue = colorized(fg="white", bg="#00008b")
|
|
32
36
|
_GreenYellow = colorized(fg="black", bg="#adff2f")
|
|
@@ -44,9 +48,9 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
44
48
|
get_meta_and_result: types.GetMetaAndResult,
|
|
45
49
|
run_directory: ty.Optional[Path] = None,
|
|
46
50
|
calls_registry: ty.Mapping[ty.Callable, ty.Collection[ty.Callable]] = dict(), # noqa: B006
|
|
47
|
-
) -> ty.Callable[[bool, str, ty.Callable[..., T], Args, Kwargs], T]:
|
|
51
|
+
) -> ty.Callable[[bool, str, ty.Callable[..., T], Args, Kwargs], futures.PFuture[T]]:
|
|
48
52
|
@scope.bound
|
|
49
|
-
def
|
|
53
|
+
def create_invocation_and_result_future(
|
|
50
54
|
rerun_exceptions: bool,
|
|
51
55
|
function_memospace: str,
|
|
52
56
|
# by allowing the caller to set the function memospace, we allow 'redirects' to look up an old result by name.
|
|
@@ -54,7 +58,7 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
54
58
|
func: ty.Callable[..., T],
|
|
55
59
|
args_: Args,
|
|
56
60
|
kwargs_: Kwargs,
|
|
57
|
-
) -> T:
|
|
61
|
+
) -> futures.PFuture[T]:
|
|
58
62
|
"""This is the generic local runner. Its core abstractions are:
|
|
59
63
|
|
|
60
64
|
- serializers of some sort (for the function and its arguments)
|
|
@@ -89,16 +93,13 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
89
93
|
)
|
|
90
94
|
|
|
91
95
|
# Define some important and reusable 'chunks of work'
|
|
92
|
-
|
|
93
|
-
class ResultAndInvocationType(ty.NamedTuple):
|
|
94
|
-
value_or_error: ty.Union[memo.results.Success, memo.results.Error]
|
|
95
|
-
invoc_type: run_summary.InvocationType
|
|
96
|
-
|
|
97
|
-
def check_result(
|
|
96
|
+
def check_result_exists(
|
|
98
97
|
invoc_type: run_summary.InvocationType,
|
|
99
98
|
) -> ty.Union[ResultAndInvocationType, None]:
|
|
100
99
|
result = memo.results.check_if_result_exists(
|
|
101
|
-
memo_uri,
|
|
100
|
+
memo_uri,
|
|
101
|
+
check_for_exception=not rerun_exceptions,
|
|
102
|
+
before_raise=debug_required_result_failure,
|
|
102
103
|
)
|
|
103
104
|
if not result:
|
|
104
105
|
return None
|
|
@@ -108,28 +109,6 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
108
109
|
)
|
|
109
110
|
return ResultAndInvocationType(result, invoc_type)
|
|
110
111
|
|
|
111
|
-
def unwrap_value_or_error(result_and_itype: ResultAndInvocationType) -> T:
|
|
112
|
-
result = result_and_itype.value_or_error
|
|
113
|
-
metadata = None
|
|
114
|
-
value_t = None
|
|
115
|
-
try:
|
|
116
|
-
if isinstance(result, memo.results.Success):
|
|
117
|
-
metadata, value_t = get_meta_and_result("value", result.value_uri)
|
|
118
|
-
return ty.cast(T, value_t)
|
|
119
|
-
else:
|
|
120
|
-
assert isinstance(result, memo.results.Error), "Must be Error or Success"
|
|
121
|
-
metadata, exc = get_meta_and_result("EXCEPTION", result.exception_uri)
|
|
122
|
-
raise exc
|
|
123
|
-
finally:
|
|
124
|
-
run_summary.log_function_execution(
|
|
125
|
-
*(run_directory, memo_uri, result_and_itype.invoc_type),
|
|
126
|
-
metadata=metadata,
|
|
127
|
-
runner_prefix=function_memospace.split(pipeline_id)[0],
|
|
128
|
-
was_error=not isinstance(result, memo.results.Success),
|
|
129
|
-
return_value=value_t,
|
|
130
|
-
args_kwargs=(args, kwargs),
|
|
131
|
-
)
|
|
132
|
-
|
|
133
112
|
def acquire_lock() -> ty.Optional[lock.LockAcquired]:
|
|
134
113
|
return lock.acquire(fs.join(memo_uri, "lock"), expire=timedelta(seconds=88))
|
|
135
114
|
|
|
@@ -154,6 +133,14 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
154
133
|
|
|
155
134
|
inspect_and_log(memo_uri)
|
|
156
135
|
|
|
136
|
+
p_unwrap_value_or_error = partial(
|
|
137
|
+
unwrap_value_or_error,
|
|
138
|
+
get_meta_and_result,
|
|
139
|
+
run_directory,
|
|
140
|
+
function_memospace.split(pipeline_id)[0], # runner_prefix
|
|
141
|
+
run_summary.extract_source_uris((args, kwargs)),
|
|
142
|
+
)
|
|
143
|
+
|
|
157
144
|
# the network ops being grouped by _BEFORE_INVOCATION include one or more
|
|
158
145
|
# download attempts (consider possible Paths) plus
|
|
159
146
|
# one or more uploads (embedded Paths & Sources/refs, and then invocation).
|
|
@@ -162,9 +149,9 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
162
149
|
|
|
163
150
|
# it's possible that our result may already exist from a previous run of this pipeline id.
|
|
164
151
|
# we can short-circuit the entire process by looking for that result and returning it immediately.
|
|
165
|
-
result =
|
|
152
|
+
result = check_result_exists("memoized")
|
|
166
153
|
if result:
|
|
167
|
-
return
|
|
154
|
+
return futures.resolved(p_unwrap_value_or_error(memo_uri, result))
|
|
168
155
|
|
|
169
156
|
lock_owned = acquire_lock()
|
|
170
157
|
# if no result exists, the vastly most common outcome here will be acquiring
|
|
@@ -175,10 +162,6 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
175
162
|
# LOCK LOOP: entering this loop (where we attempt to acquire the lock) is the common non-memoized case
|
|
176
163
|
while not result:
|
|
177
164
|
if lock_owned:
|
|
178
|
-
if MAINTAIN_LOCKS():
|
|
179
|
-
release_lock = lock.launch_daemon_lock_maintainer(lock_owned)
|
|
180
|
-
else:
|
|
181
|
-
release_lock = lock_owned.release
|
|
182
165
|
break # we own the invocation - invoke the shim ourselves (below)
|
|
183
166
|
|
|
184
167
|
# getting to this point ONLY happens if we failed to acquire the lock, which
|
|
@@ -190,57 +173,45 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
190
173
|
time.sleep(22)
|
|
191
174
|
|
|
192
175
|
with _BEFORE_INVOCATION_SEMAPHORE:
|
|
193
|
-
result =
|
|
176
|
+
result = check_result_exists("awaited")
|
|
194
177
|
if result:
|
|
195
178
|
_LogAwaitedResult(
|
|
196
179
|
f"{val_or_res} for {memo_uri} was found after waiting for the lock."
|
|
197
180
|
)
|
|
198
|
-
return
|
|
181
|
+
return futures.resolved(p_unwrap_value_or_error(memo_uri, result))
|
|
199
182
|
|
|
200
183
|
lock_owned = acquire_lock() # still inside the semaphore, as it's a network op
|
|
201
184
|
|
|
202
|
-
assert release_lock is not None
|
|
203
185
|
assert lock_owned is not None
|
|
204
186
|
# if/when we acquire the lock, we move forever into 'run this ourselves mode'.
|
|
205
187
|
# If something about our invocation fails,
|
|
206
188
|
# we fail just as we would have previously, without any attempt to go
|
|
207
189
|
# 'back' to waiting for someone else to compute the result.
|
|
208
190
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
)
|
|
191
|
+
future_result_getter = PostShimResultGetter[T](memo_uri, p_unwrap_value_or_error)
|
|
192
|
+
|
|
193
|
+
with _BEFORE_INVOCATION_SEMAPHORE:
|
|
194
|
+
_LogNewInvocation(f"Invoking {memo_uri}")
|
|
195
|
+
upload_invocation_and_deps()
|
|
196
|
+
|
|
197
|
+
# can't hold the semaphore while we block on the shim, though.
|
|
198
|
+
shim = shim_builder(func, args_, kwargs_)
|
|
199
|
+
future_or_shim_result = shim( # ACTUAL INVOCATION (handoff to remote shim) HAPPENS HERE
|
|
200
|
+
(
|
|
201
|
+
memo_uri,
|
|
202
|
+
*metadata.format_invocation_cli_args(
|
|
203
|
+
metadata.InvocationMetadata.new(pipeline_id, invoked_at, lock_owned.writer_id)
|
|
204
|
+
),
|
|
224
205
|
)
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
release_lock()
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
value_or_error = memo.results.check_if_result_exists(memo_uri)
|
|
238
|
-
if not value_or_error:
|
|
239
|
-
if shim_ex:
|
|
240
|
-
raise shim_ex # re-raise the underlying exception rather than making up our own.
|
|
241
|
-
raise NoResultAfterShimSuccess(
|
|
242
|
-
f"The shim for {memo_uri} exited cleanly, but no result or exception was found."
|
|
243
|
-
)
|
|
244
|
-
return unwrap_value_or_error(ResultAndInvocationType(value_or_error, "invoked"))
|
|
245
|
-
|
|
246
|
-
return create_invocation__check_result__wait_shim
|
|
206
|
+
)
|
|
207
|
+
if hasattr(future_or_shim_result, "add_done_callback"):
|
|
208
|
+
# if the shim returns a Future, we wrap it.
|
|
209
|
+
logger.debug("Shim returned a Future; wrapping it for post-shim result retrieval.")
|
|
210
|
+
return futures.make_lazy(lock_maintaining_future)(
|
|
211
|
+
lock_owned, future_result_getter, future_or_shim_result
|
|
212
|
+
)
|
|
213
|
+
else: # it's a synchronous shim - just process the result directly.
|
|
214
|
+
future_result_getter.release_lock = lock.maintain_to_release(lock_owned)
|
|
215
|
+
return futures.resolved(future_result_getter(future_or_shim_result))
|
|
216
|
+
|
|
217
|
+
return create_invocation_and_result_future
|
|
@@ -2,24 +2,24 @@ import inspect
|
|
|
2
2
|
import typing as ty
|
|
3
3
|
|
|
4
4
|
from ..core.types import Args, F, Kwargs
|
|
5
|
-
from .types import Shim, ShimBuilder
|
|
5
|
+
from .types import FutureShim, Shim, ShimBuilder
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class _static_shim_builder:
|
|
9
|
-
def __init__(self, shim: Shim) -> None:
|
|
9
|
+
def __init__(self, shim: ty.Union[Shim, FutureShim]) -> None:
|
|
10
10
|
self.shim = shim
|
|
11
11
|
|
|
12
|
-
def __call__(self, _f: F, _args: Args, _kwargs: Kwargs) -> Shim:
|
|
12
|
+
def __call__(self, _f: F, _args: Args, _kwargs: Kwargs) -> ty.Union[Shim, FutureShim]:
|
|
13
13
|
return self.shim
|
|
14
14
|
|
|
15
15
|
def __repr__(self) -> str:
|
|
16
16
|
return f"<static_shim_builder for {self.shim}>"
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def make_builder(
|
|
19
|
+
def make_builder(shim_or_builder: ty.Union[Shim, ShimBuilder, FutureShim]) -> ShimBuilder:
|
|
20
20
|
"""If you have a Shim and you want to make it into the simplest possible ShimBuilder."""
|
|
21
21
|
|
|
22
|
-
if len(inspect.signature(
|
|
23
|
-
return ty.cast(ShimBuilder,
|
|
22
|
+
if len(inspect.signature(shim_or_builder).parameters) == 3:
|
|
23
|
+
return ty.cast(ShimBuilder, shim_or_builder)
|
|
24
24
|
|
|
25
|
-
return _static_shim_builder(ty.cast(Shim,
|
|
25
|
+
return _static_shim_builder(ty.cast(Shim, shim_or_builder))
|