thds.mops 3.6.20250219172032__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.mops might be problematic. Click here for more details.
- thds/mops/__about__.py +8 -0
- thds/mops/__init__.py +3 -0
- thds/mops/_compat.py +6 -0
- thds/mops/_utils/__init__.py +0 -0
- thds/mops/_utils/colorize.py +110 -0
- thds/mops/_utils/config_tree.py +167 -0
- thds/mops/_utils/exception.py +16 -0
- thds/mops/_utils/locked_cache.py +78 -0
- thds/mops/_utils/names.py +23 -0
- thds/mops/_utils/on_slow.py +28 -0
- thds/mops/_utils/once.py +30 -0
- thds/mops/_utils/temp.py +32 -0
- thds/mops/config.py +60 -0
- thds/mops/impure/__init__.py +2 -0
- thds/mops/impure/keyfunc.py +14 -0
- thds/mops/impure/runner.py +73 -0
- thds/mops/k8s/__init__.py +27 -0
- thds/mops/k8s/_shared.py +3 -0
- thds/mops/k8s/apply_yaml.py +22 -0
- thds/mops/k8s/auth.py +49 -0
- thds/mops/k8s/config.py +37 -0
- thds/mops/k8s/container_registry.py +14 -0
- thds/mops/k8s/jobs.py +57 -0
- thds/mops/k8s/launch.py +234 -0
- thds/mops/k8s/logging.py +239 -0
- thds/mops/k8s/namespace.py +17 -0
- thds/mops/k8s/node_selection.py +58 -0
- thds/mops/k8s/retry.py +75 -0
- thds/mops/k8s/too_old_resource_version.py +42 -0
- thds/mops/k8s/tools/krsync.py +50 -0
- thds/mops/k8s/tools/krsync.sh +22 -0
- thds/mops/k8s/wait_job.py +72 -0
- thds/mops/k8s/warn_image_backoff.py +63 -0
- thds/mops/k8s/watch.py +266 -0
- thds/mops/meta.json +8 -0
- thds/mops/parallel.py +36 -0
- thds/mops/pure/__init__.py +43 -0
- thds/mops/pure/_magic/__init__.py +0 -0
- thds/mops/pure/_magic/api.py +114 -0
- thds/mops/pure/_magic/sauce.py +152 -0
- thds/mops/pure/_magic/shims.py +34 -0
- thds/mops/pure/adls/__init__.py +1 -0
- thds/mops/pure/adls/_files.py +22 -0
- thds/mops/pure/adls/blob_store.py +185 -0
- thds/mops/pure/adls/output_fqn.py +17 -0
- thds/mops/pure/core/__init__.py +0 -0
- thds/mops/pure/core/content_addressed.py +31 -0
- thds/mops/pure/core/deferred_work.py +83 -0
- thds/mops/pure/core/entry/__init__.py +2 -0
- thds/mops/pure/core/entry/main.py +47 -0
- thds/mops/pure/core/entry/route_result.py +66 -0
- thds/mops/pure/core/entry/runner_registry.py +31 -0
- thds/mops/pure/core/file_blob_store.py +120 -0
- thds/mops/pure/core/lock/__init__.py +7 -0
- thds/mops/pure/core/lock/_acquire.py +192 -0
- thds/mops/pure/core/lock/_funcs.py +37 -0
- thds/mops/pure/core/lock/cli.py +73 -0
- thds/mops/pure/core/lock/maintain.py +150 -0
- thds/mops/pure/core/lock/read.py +39 -0
- thds/mops/pure/core/lock/types.py +37 -0
- thds/mops/pure/core/lock/write.py +136 -0
- thds/mops/pure/core/memo/__init__.py +6 -0
- thds/mops/pure/core/memo/function_memospace.py +267 -0
- thds/mops/pure/core/memo/keyfunc.py +53 -0
- thds/mops/pure/core/memo/overwrite_params.py +61 -0
- thds/mops/pure/core/memo/results.py +103 -0
- thds/mops/pure/core/memo/unique_name_for_function.py +70 -0
- thds/mops/pure/core/metadata.py +230 -0
- thds/mops/pure/core/output_naming.py +52 -0
- thds/mops/pure/core/partial.py +15 -0
- thds/mops/pure/core/pipeline_id.py +62 -0
- thds/mops/pure/core/pipeline_id_mask.py +79 -0
- thds/mops/pure/core/script_support.py +25 -0
- thds/mops/pure/core/serialize_big_objs.py +73 -0
- thds/mops/pure/core/serialize_paths.py +149 -0
- thds/mops/pure/core/source.py +291 -0
- thds/mops/pure/core/types.py +142 -0
- thds/mops/pure/core/uris.py +81 -0
- thds/mops/pure/core/use_runner.py +47 -0
- thds/mops/pure/joblib/__init__.py +1 -0
- thds/mops/pure/joblib/backend.py +81 -0
- thds/mops/pure/joblib/batching.py +67 -0
- thds/mops/pure/pickling/__init__.py +3 -0
- thds/mops/pure/pickling/_pickle.py +193 -0
- thds/mops/pure/pickling/memoize_only.py +22 -0
- thds/mops/pure/pickling/mprunner.py +173 -0
- thds/mops/pure/pickling/pickles.py +149 -0
- thds/mops/pure/pickling/remote.py +145 -0
- thds/mops/pure/pickling/sha256_b64.py +71 -0
- thds/mops/pure/runner/__init__.py +0 -0
- thds/mops/pure/runner/local.py +239 -0
- thds/mops/pure/runner/shim_builder.py +25 -0
- thds/mops/pure/runner/simple_shims.py +21 -0
- thds/mops/pure/runner/strings.py +1 -0
- thds/mops/pure/runner/types.py +28 -0
- thds/mops/pure/tools/__init__.py +0 -0
- thds/mops/pure/tools/history.py +35 -0
- thds/mops/pure/tools/inspect.py +372 -0
- thds/mops/pure/tools/sha256_b64_addressed.py +40 -0
- thds/mops/pure/tools/stress.py +63 -0
- thds/mops/pure/tools/summarize/__init__.py +4 -0
- thds/mops/pure/tools/summarize/cli.py +293 -0
- thds/mops/pure/tools/summarize/run_summary.py +143 -0
- thds/mops/py.typed +0 -0
- thds/mops/testing/__init__.py +0 -0
- thds/mops/testing/deferred_imports.py +81 -0
- thds.mops-3.6.20250219172032.dist-info/METADATA +42 -0
- thds.mops-3.6.20250219172032.dist-info/RECORD +111 -0
- thds.mops-3.6.20250219172032.dist-info/WHEEL +5 -0
- thds.mops-3.6.20250219172032.dist-info/entry_points.txt +7 -0
- thds.mops-3.6.20250219172032.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""This module is a good place to define actual objects that need to
|
|
2
|
+
pickled in a backward-compatible way - i.e., we want to remember not
|
|
3
|
+
to refactor their names or the name of the module they live in so as
|
|
4
|
+
to maintain backward-compatibility more easily.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import importlib
|
|
8
|
+
import io
|
|
9
|
+
import pickle
|
|
10
|
+
import typing as ty
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from thds.core import hashing, log, source
|
|
15
|
+
|
|
16
|
+
from ..core.script_support import add_main_module_function, get_main_module_function
|
|
17
|
+
from ..core.source import source_from_hashref, source_from_source_result
|
|
18
|
+
from ..core.uris import get_bytes, lookup_blob_store
|
|
19
|
+
|
|
20
|
+
logger = log.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Invocation:
|
|
25
|
+
"""Basically, NestedFunctionPickle was the v2. This is v3. By switching to dataclass,
|
|
26
|
+
we can more easily add new optional attributes later on.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
func: ty.Callable
|
|
30
|
+
args_kwargs_pickle: bytes
|
|
31
|
+
# this is pickled separately so that we can hash it separately.
|
|
32
|
+
# the identity of the function is represented by the name part of the blob path.
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class NestedFunctionPickle(ty.NamedTuple):
|
|
36
|
+
"""Not in use - retained for mops-inspect backward-compatibility"""
|
|
37
|
+
|
|
38
|
+
f: ty.Callable
|
|
39
|
+
args_kwargs_pickle: bytes
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class PicklableFunction:
|
|
43
|
+
"""The main 'issue' this is working around is that decorated
|
|
44
|
+
functions aren't picklable because of something having to do with
|
|
45
|
+
the way the function gets 'replaced' at decoration time.
|
|
46
|
+
|
|
47
|
+
There may be other solutions to this, but this seems to work fine.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, f: ty.Callable) -> None:
|
|
51
|
+
if f.__module__ == "__main__":
|
|
52
|
+
add_main_module_function(f.__name__, f)
|
|
53
|
+
self.fmod = f.__module__
|
|
54
|
+
self.fname = f.__name__
|
|
55
|
+
self.f = None
|
|
56
|
+
|
|
57
|
+
def __str__(self) -> str:
|
|
58
|
+
return f"{self.fmod}.{self.fname}"
|
|
59
|
+
|
|
60
|
+
def __repr__(self) -> str:
|
|
61
|
+
return str(self)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def __name__(self) -> str:
|
|
65
|
+
return self.fname
|
|
66
|
+
|
|
67
|
+
def __call__(self, *args: ty.Any, **kwargs: ty.Any) -> ty.Any:
|
|
68
|
+
logger.debug(f"Dynamically importing function {str(self)}")
|
|
69
|
+
if self.fmod == "__main__":
|
|
70
|
+
self.f = get_main_module_function(self.fname) # type: ignore
|
|
71
|
+
else:
|
|
72
|
+
mod = importlib.import_module(self.fmod)
|
|
73
|
+
self.f = getattr(mod, self.fname)
|
|
74
|
+
assert self.f
|
|
75
|
+
return self.f(*args, **kwargs)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class UnpickleSimplePickleFromUri:
|
|
79
|
+
def __init__(self, uri: str):
|
|
80
|
+
self.uri = uri # serializable as a pure string for simplicity
|
|
81
|
+
self._cached = None
|
|
82
|
+
|
|
83
|
+
def __call__(self) -> object:
|
|
84
|
+
# i don't believe there's any need for thread safety here, since pickle won't use threads.
|
|
85
|
+
if self._cached is None:
|
|
86
|
+
self._cached = pickle.load(io.BytesIO(get_bytes(self.uri, type_hint="simple-uri-pickle")))
|
|
87
|
+
return self._cached
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class UnpicklePathFromUri(ty.NamedTuple):
|
|
91
|
+
uri: str
|
|
92
|
+
|
|
93
|
+
def __call__(self) -> Path:
|
|
94
|
+
return lookup_blob_store(self.uri).getfile(self.uri)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class UnpickleSourceUriArgument(ty.NamedTuple):
|
|
98
|
+
"""The URI fully specifies this type of source. Nothing fancy happens here. We just
|
|
99
|
+
return a new Source object that represents the URI.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
uri: str
|
|
103
|
+
|
|
104
|
+
def __call__(self) -> source.Source:
|
|
105
|
+
return source.from_uri(self.uri)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class UnpickleSourceHashrefArgument(ty.NamedTuple):
|
|
109
|
+
"""Represents the root for a single file hashref. May be either local or remote.
|
|
110
|
+
|
|
111
|
+
For stability, the module name and the class name must not change.
|
|
112
|
+
|
|
113
|
+
This only applies to arguments _into_ a function. Results _from_ a function should
|
|
114
|
+
have a different form.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
hash: hashing.Hash
|
|
118
|
+
|
|
119
|
+
def __call__(self) -> source.Source:
|
|
120
|
+
return source_from_hashref(self.hash)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class UnpickleSourceResult(ty.NamedTuple):
|
|
124
|
+
"""Stability for this is not critical, as it will only ever exist in the result
|
|
125
|
+
payload, which does not participate in memoization.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
remote_uri: str
|
|
129
|
+
hash: ty.Optional[hashing.Hash]
|
|
130
|
+
file_uri: str
|
|
131
|
+
|
|
132
|
+
def __call__(self) -> source.Source:
|
|
133
|
+
return source_from_source_result(*self)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class UnpickleFunctionWithLogicKey(ty.NamedTuple):
|
|
137
|
+
"""When a mops-memoized function receives, in standard "functional programming" style,
|
|
138
|
+
a function as an argument (whether partially-applied or not), we need to make
|
|
139
|
+
sure to represent any function-logic-key on that callable as part of what gets serialized,
|
|
140
|
+
so that memoization does not happen when unexpected/undesired.
|
|
141
|
+
|
|
142
|
+
The function itself must be picklable in the natural way.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
func_bytes: bytes
|
|
146
|
+
function_logic_key: str
|
|
147
|
+
|
|
148
|
+
def __call__(self) -> ty.Callable:
|
|
149
|
+
return pickle.loads(self.func_bytes)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import typing as ty
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from functools import cached_property
|
|
5
|
+
|
|
6
|
+
from thds.core import log, scope
|
|
7
|
+
|
|
8
|
+
from ..._utils.once import Once
|
|
9
|
+
from ..core import lock, metadata, pipeline_id, uris
|
|
10
|
+
from ..core.entry import route_return_value_or_exception
|
|
11
|
+
from ..core.memo import results
|
|
12
|
+
from ..core.pipeline_id_mask import pipeline_id_mask
|
|
13
|
+
from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
|
|
14
|
+
from ..core.serialize_paths import CoordinatingPathSerializer
|
|
15
|
+
from ..core.types import Args, BlobStore, Kwargs, T
|
|
16
|
+
from ..runner import strings
|
|
17
|
+
from . import _pickle, mprunner, pickles, sha256_b64
|
|
18
|
+
|
|
19
|
+
logger = log.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass # needed for cached_property
|
|
23
|
+
class _ResultExcWithMetadataChannel:
|
|
24
|
+
fs: BlobStore
|
|
25
|
+
dumper: _pickle.Dumper
|
|
26
|
+
call_id: str
|
|
27
|
+
invocation_metadata: metadata.InvocationMetadata
|
|
28
|
+
started_at: datetime
|
|
29
|
+
|
|
30
|
+
@cached_property
|
|
31
|
+
def _metadata_header(self) -> bytes:
|
|
32
|
+
"""This is always embedded _alongside_ the actual return value or exception.
|
|
33
|
+
This is to make sure that whatever metadata is in the result is atomically
|
|
34
|
+
part of the result, such that in the rare case of racing invocations,
|
|
35
|
+
the metadata can be trusted to be accurate.
|
|
36
|
+
"""
|
|
37
|
+
result_metadata = metadata.ResultMetadata.from_invocation(
|
|
38
|
+
self.invocation_metadata, self.started_at, datetime.now(tz=timezone.utc)
|
|
39
|
+
)
|
|
40
|
+
logger.info(f"Remote code version: {result_metadata.remote_code_version}")
|
|
41
|
+
return metadata.format_result_header(result_metadata).encode("utf-8")
|
|
42
|
+
|
|
43
|
+
def _write_metadata_only(self, prefix: str) -> None:
|
|
44
|
+
"""This is a mops v3 thing that is unnecessary but adds clarity when debugging.
|
|
45
|
+
If you see more than one of these files in a directory, that usually means either
|
|
46
|
+
the success was preceded by a failure, _or_ it means that there was an (unusual) race condition.
|
|
47
|
+
"""
|
|
48
|
+
self.fs.putbytes(
|
|
49
|
+
self.fs.join(self.call_id, f"{prefix}-metadata-{self.invocation_metadata.invoker_uuid}.txt"),
|
|
50
|
+
self._metadata_header,
|
|
51
|
+
type_hint="text/plain",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def return_value(self, r: T) -> None:
|
|
55
|
+
return_value_bytes = _pickle.gimme_bytes(self.dumper, r)
|
|
56
|
+
self.fs.putbytes(
|
|
57
|
+
self.fs.join(self.call_id, results.RESULT),
|
|
58
|
+
self._metadata_header + return_value_bytes,
|
|
59
|
+
type_hint="application/mops-return-value",
|
|
60
|
+
)
|
|
61
|
+
self._write_metadata_only("result")
|
|
62
|
+
|
|
63
|
+
def exception(self, exc: Exception) -> None:
|
|
64
|
+
exc_bytes = _pickle.gimme_bytes(self.dumper, exc)
|
|
65
|
+
self.fs.putbytes(
|
|
66
|
+
self.fs.join(self.call_id, results.EXCEPTION),
|
|
67
|
+
self._metadata_header + exc_bytes,
|
|
68
|
+
type_hint="application/mops-exception",
|
|
69
|
+
)
|
|
70
|
+
self._write_metadata_only("exception")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _unpickle_invocation(memo_uri: str) -> ty.Tuple[ty.Callable, Args, Kwargs]:
|
|
74
|
+
_, invocation_raw = _pickle.make_read_header_and_object(strings.INVOCATION)(
|
|
75
|
+
uris.lookup_blob_store(memo_uri).join(memo_uri, strings.INVOCATION)
|
|
76
|
+
)
|
|
77
|
+
invocation = ty.cast(pickles.Invocation, invocation_raw)
|
|
78
|
+
args, kwargs = _pickle.unfreeze_args_kwargs(invocation.args_kwargs_pickle)
|
|
79
|
+
return invocation.func, args, kwargs
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
|
|
83
|
+
"""The arguments are those supplied by MemoizingPicklingRunner.
|
|
84
|
+
|
|
85
|
+
As of v3, we now expect a number of (required) metadata arguments with every invocation.
|
|
86
|
+
"""
|
|
87
|
+
started_at = datetime.now(tz=timezone.utc) # capture this timestamp right at the outset.
|
|
88
|
+
invocation_metadata = metadata.parse_invocation_metadata_args(metadata_args)
|
|
89
|
+
metadata.INVOKED_BY.set_global(invocation_metadata.invoked_by)
|
|
90
|
+
pipeline_id.set_pipeline_id(invocation_metadata.pipeline_id)
|
|
91
|
+
fs = uris.lookup_blob_store(memo_uri)
|
|
92
|
+
|
|
93
|
+
# any recursively-called functions that use metadata will retain the original invoker.
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
stop_lock = lock.launch_daemon_lock_maintainer(
|
|
97
|
+
lock.remote_lock_maintain(
|
|
98
|
+
fs.join(memo_uri, "lock"), expected_writer_id=invocation_metadata.invoker_uuid
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
except lock.CannotMaintainLock as e:
|
|
102
|
+
logger.info(f"Cannot maintain lock: {e}. Continuing without the lock.")
|
|
103
|
+
stop_lock = lambda: None # noqa: E731
|
|
104
|
+
|
|
105
|
+
def _extract_invocation_unique_key(memo_uri: str) -> ty.Tuple[str, str]:
|
|
106
|
+
parts = fs.split(memo_uri)
|
|
107
|
+
try:
|
|
108
|
+
runner_idx = parts.index(mprunner.RUNNER_NAME)
|
|
109
|
+
except ValueError as ve:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"Unable to find the runner name {mprunner.RUNNER_NAME} in parts {parts}"
|
|
112
|
+
) from ve
|
|
113
|
+
invocation_parts = parts[runner_idx + 1 :]
|
|
114
|
+
return fs.join(*invocation_parts[:-1]), invocation_parts[-1]
|
|
115
|
+
|
|
116
|
+
scope.enter(uris.ACTIVE_STORAGE_ROOT.set(uris.get_root(memo_uri)))
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
func, args, kwargs = _unpickle_invocation(memo_uri)
|
|
120
|
+
except Exception:
|
|
121
|
+
logger.error(f"Failed to unpickle invocation from {memo_uri} - this is a bug in mops!")
|
|
122
|
+
raise
|
|
123
|
+
|
|
124
|
+
def do_work_return_result() -> object:
|
|
125
|
+
# ONLY failures in this code should transmit an EXCEPTION
|
|
126
|
+
# back to the orchestrator side.
|
|
127
|
+
return pipeline_id_mask(invocation_metadata.pipeline_id)(func)(*args, **kwargs)
|
|
128
|
+
|
|
129
|
+
route_return_value_or_exception(
|
|
130
|
+
_ResultExcWithMetadataChannel(
|
|
131
|
+
fs,
|
|
132
|
+
_pickle.Dumper(
|
|
133
|
+
ByIdSerializer(ByIdRegistry()),
|
|
134
|
+
CoordinatingPathSerializer(sha256_b64.Sha256B64PathStream(), Once()),
|
|
135
|
+
_pickle.SourceResultPickler(),
|
|
136
|
+
),
|
|
137
|
+
memo_uri,
|
|
138
|
+
invocation_metadata,
|
|
139
|
+
started_at,
|
|
140
|
+
),
|
|
141
|
+
ty.cast(ty.Callable[[], T], do_work_return_result),
|
|
142
|
+
invocation_metadata.pipeline_id,
|
|
143
|
+
_extract_invocation_unique_key(memo_uri),
|
|
144
|
+
)
|
|
145
|
+
stop_lock() # not critical since we don't _own_ the lock, but keeps things cleaner
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Context-local, content-aware ser/de from/to a known URI prefix.
|
|
2
|
+
|
|
3
|
+
Basically, we take some generic pickle utilities and stitch them together into something
|
|
4
|
+
that efficiently serializes object graphs to a combination of locations at some URI prefix,
|
|
5
|
+
such that they are self-deserializing (via CallableUnpickler) on the other side.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import io
|
|
10
|
+
import pickle
|
|
11
|
+
import typing as ty
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from thds.core import hashing, log
|
|
15
|
+
|
|
16
|
+
from ..core.content_addressed import storage_content_addressed, wordybin_content_addressed
|
|
17
|
+
from ..core.serialize_paths import Downloader
|
|
18
|
+
from ..core.uris import active_storage_root, lookup_blob_store
|
|
19
|
+
from .pickles import UnpicklePathFromUri, UnpickleSimplePickleFromUri
|
|
20
|
+
|
|
21
|
+
logger = log.getLogger(__name__)
|
|
22
|
+
T = ty.TypeVar("T")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Sha256B64PathStream:
|
|
26
|
+
def local_to_remote(self, path: Path, sha256: str) -> None:
|
|
27
|
+
"""Return fully qualified remote information after put."""
|
|
28
|
+
# lazily fetches the active storage root.
|
|
29
|
+
full_remote_sha256 = storage_content_addressed(sha256, "sha256")
|
|
30
|
+
lookup_blob_store(full_remote_sha256).putfile(path, full_remote_sha256)
|
|
31
|
+
|
|
32
|
+
def get_downloader(self, remote_sha256: str) -> Downloader:
|
|
33
|
+
return UnpicklePathFromUri(storage_content_addressed(remote_sha256, "sha256")) # type: ignore # NamedTuple silliness
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _pickle_obj_and_upload_to_content_addressed_path(
|
|
37
|
+
obj: object, debug_name: str = ""
|
|
38
|
+
) -> UnpickleSimplePickleFromUri:
|
|
39
|
+
# active_storage_root is lazily fetched because we may want to register the pickler
|
|
40
|
+
# somewhere before settling on the final destination of objects pickled.
|
|
41
|
+
storage_root = active_storage_root()
|
|
42
|
+
with io.BytesIO() as bio:
|
|
43
|
+
pickle.dump(obj, bio)
|
|
44
|
+
bio.seek(0)
|
|
45
|
+
fs = lookup_blob_store(storage_root)
|
|
46
|
+
bytes_uri, debug_uri = wordybin_content_addressed(
|
|
47
|
+
hashing.Hash("sha256", hashing.hash_using(bio, hashlib.sha256()).digest()),
|
|
48
|
+
storage_root,
|
|
49
|
+
debug_name=f"objname_{debug_name}" if debug_name else "",
|
|
50
|
+
)
|
|
51
|
+
fs.putbytes(bytes_uri, bio, type_hint="application/octet-stream")
|
|
52
|
+
if debug_uri:
|
|
53
|
+
# this name is purely for debugging and affects no part of the runtime.
|
|
54
|
+
fs.putbytes(debug_uri, "goodbeef".encode(), type_hint="text/plain")
|
|
55
|
+
|
|
56
|
+
return UnpickleSimplePickleFromUri(bytes_uri)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Sha256B64Pickler:
|
|
60
|
+
"""A type of CallbackPickler, intended for picklable objects that should be serialized
|
|
61
|
+
as pure bytes and stored at a content-addressed URI. Only used (currently) by the
|
|
62
|
+
ById/shared object serializer, most likely for something like a large dataframe.
|
|
63
|
+
|
|
64
|
+
Name exists solely for debugging purposes.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(self, name: str = ""):
|
|
68
|
+
self.name = name
|
|
69
|
+
|
|
70
|
+
def __call__(self, obj: ty.Any) -> UnpickleSimplePickleFromUri:
|
|
71
|
+
return _pickle_obj_and_upload_to_content_addressed_path(obj, self.name)
|
|
File without changes
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""Joins pickle functionality and Blob Store functionality to run functions remotely.
|
|
2
|
+
"""
|
|
3
|
+
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
import typing as ty
|
|
7
|
+
from datetime import datetime, timedelta, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from thds.core import config, log, scope
|
|
11
|
+
|
|
12
|
+
from ..._utils.colorize import colorized, make_colorized_out
|
|
13
|
+
from ...config import max_concurrent_network_ops
|
|
14
|
+
from ..core import deferred_work, lock, memo, metadata, pipeline_id_mask, uris
|
|
15
|
+
from ..core.partial import unwrap_partial
|
|
16
|
+
from ..core.types import Args, Kwargs, NoResultAfterShimSuccess, T
|
|
17
|
+
from ..tools.summarize import run_summary
|
|
18
|
+
from . import strings, types
|
|
19
|
+
|
|
20
|
+
MAINTAIN_LOCKS = config.item("thds.mops.pure.local.maintain_locks", default=True, parse=config.tobool)
|
|
21
|
+
|
|
22
|
+
# these two semaphores allow us to prioritize getting meaningful units
|
|
23
|
+
# of progress _complete_, rather than issuing many instructions to the
|
|
24
|
+
# underlying client and allowing it to randomly order the operations
|
|
25
|
+
# such that it takes longer to get a full unit of work complete.
|
|
26
|
+
_BEFORE_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
|
|
27
|
+
# _OUT prioritizes uploading a single invocation and its dependencies so the Shim can start running.
|
|
28
|
+
_AFTER_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
|
|
29
|
+
# _IN prioritizes retrieving the result of a Shim that has completed.
|
|
30
|
+
|
|
31
|
+
_DarkBlue = colorized(fg="white", bg="#00008b")
|
|
32
|
+
_GreenYellow = colorized(fg="black", bg="#adff2f")
|
|
33
|
+
_Purple = colorized(fg="white", bg="#800080")
|
|
34
|
+
logger = log.getLogger(__name__)
|
|
35
|
+
_LogKnownResult = make_colorized_out(_DarkBlue, out=logger.info, fmt_str=" {} ")
|
|
36
|
+
_LogNewInvocation = make_colorized_out(_GreenYellow, out=logger.info, fmt_str=" {} ")
|
|
37
|
+
_LogAwaitedResult = make_colorized_out(_Purple, out=logger.info, fmt_str=" {} ")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
41
|
+
serialize_args_kwargs: types.SerializeArgsKwargs,
|
|
42
|
+
serialize_invocation: types.SerializeInvocation,
|
|
43
|
+
shim_builder: types.ShimBuilder,
|
|
44
|
+
get_meta_and_result: types.GetMetaAndResult,
|
|
45
|
+
run_directory: ty.Optional[Path] = None,
|
|
46
|
+
) -> ty.Callable[[bool, str, ty.Callable[..., T], Args, Kwargs], T]:
|
|
47
|
+
@scope.bound
|
|
48
|
+
def create_invocation__check_result__wait_shim(
|
|
49
|
+
rerun_exceptions: bool,
|
|
50
|
+
function_memospace: str,
|
|
51
|
+
# by allowing the caller to set the function memospace, we allow 'redirects' to look up an old result by name.
|
|
52
|
+
# while still guaranteeing that the function arguments were the same.
|
|
53
|
+
func: ty.Callable[..., T],
|
|
54
|
+
args_: Args,
|
|
55
|
+
kwargs_: Kwargs,
|
|
56
|
+
) -> T:
|
|
57
|
+
"""This is the generic local runner. Its core abstractions are:
|
|
58
|
+
|
|
59
|
+
- serializers of some sort (for the function and its arguments)
|
|
60
|
+
- a runtime shim of some sort (can start a Python process somewhere else)
|
|
61
|
+
- a result and metadata deserializer
|
|
62
|
+
- URIs that are supported by a registered BlobStore implementation.
|
|
63
|
+
|
|
64
|
+
It uses a mops-internal locking mechanism to prevent concurrent invocations for the same function+args.
|
|
65
|
+
"""
|
|
66
|
+
invoked_at = datetime.now(tz=timezone.utc)
|
|
67
|
+
# capture immediately, because many things may delay actual start.
|
|
68
|
+
storage_root = uris.get_root(function_memospace)
|
|
69
|
+
scope.enter(uris.ACTIVE_STORAGE_ROOT.set(storage_root))
|
|
70
|
+
fs = uris.lookup_blob_store(function_memospace)
|
|
71
|
+
val_or_res = "value" if rerun_exceptions else "result"
|
|
72
|
+
|
|
73
|
+
# we need to unwrap any partial object and combine its wrapped
|
|
74
|
+
# args, kwargs with the provided args, kwargs, otherwise the
|
|
75
|
+
# args and kwargs will not get properly considered in the memoization key.
|
|
76
|
+
func, args, kwargs = unwrap_partial(func, args_, kwargs_)
|
|
77
|
+
pipeline_id = scope.enter(pipeline_id_mask.including_function_docstr(func))
|
|
78
|
+
# TODO pipeline_id should probably be passed in explicitly
|
|
79
|
+
|
|
80
|
+
scope.enter(deferred_work.open_context()) # optimize Source objects during serialization
|
|
81
|
+
|
|
82
|
+
args_kwargs_bytes = serialize_args_kwargs(storage_root, func, args, kwargs)
|
|
83
|
+
memo_uri = fs.join(function_memospace, memo.args_kwargs_content_address(args_kwargs_bytes))
|
|
84
|
+
|
|
85
|
+
# Define some important and reusable 'chunks of work'
|
|
86
|
+
|
|
87
|
+
class ResultAndInvocationType(ty.NamedTuple):
|
|
88
|
+
value_or_error: ty.Union[memo.results.Success, memo.results.Error]
|
|
89
|
+
invoc_type: run_summary.InvocationType
|
|
90
|
+
|
|
91
|
+
def check_result(
|
|
92
|
+
invoc_type: run_summary.InvocationType,
|
|
93
|
+
) -> ty.Union[ResultAndInvocationType, None]:
|
|
94
|
+
result = memo.results.check_if_result_exists(
|
|
95
|
+
memo_uri, rerun_excs=rerun_exceptions, before_raise=debug_required_result_failure
|
|
96
|
+
)
|
|
97
|
+
if not result:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
_LogKnownResult(
|
|
101
|
+
f"{invoc_type} {val_or_res} for {memo_uri} already exists and is being returned without invocation!"
|
|
102
|
+
)
|
|
103
|
+
return ResultAndInvocationType(result, invoc_type)
|
|
104
|
+
|
|
105
|
+
def unwrap_value_or_error(result_and_itype: ResultAndInvocationType) -> T:
|
|
106
|
+
result = result_and_itype.value_or_error
|
|
107
|
+
metadata = None
|
|
108
|
+
value_t = None
|
|
109
|
+
try:
|
|
110
|
+
if isinstance(result, memo.results.Success):
|
|
111
|
+
metadata, value_t = get_meta_and_result("value", result.value_uri)
|
|
112
|
+
return ty.cast(T, value_t)
|
|
113
|
+
else:
|
|
114
|
+
assert isinstance(result, memo.results.Error), "Must be Error or Success"
|
|
115
|
+
metadata, exc = get_meta_and_result("EXCEPTION", result.exception_uri)
|
|
116
|
+
raise exc
|
|
117
|
+
finally:
|
|
118
|
+
run_summary.log_function_execution(
|
|
119
|
+
*(run_directory, func, memo_uri, result_and_itype.invoc_type),
|
|
120
|
+
metadata=metadata,
|
|
121
|
+
runner_prefix=function_memospace.split(pipeline_id)[0],
|
|
122
|
+
was_error=not isinstance(result, memo.results.Success),
|
|
123
|
+
return_value=value_t,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def acquire_lock() -> ty.Optional[lock.LockAcquired]:
|
|
127
|
+
return lock.acquire(fs.join(memo_uri, "lock"), expire=timedelta(seconds=88))
|
|
128
|
+
|
|
129
|
+
def upload_invocation_and_deps() -> None:
|
|
130
|
+
# we're just about to transfer to a remote context,
|
|
131
|
+
# so it's time to perform any deferred work
|
|
132
|
+
deferred_work.perform_all()
|
|
133
|
+
|
|
134
|
+
fs.putbytes(
|
|
135
|
+
fs.join(memo_uri, strings.INVOCATION),
|
|
136
|
+
serialize_invocation(storage_root, func, args_kwargs_bytes),
|
|
137
|
+
type_hint="application/mops-invocation",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def debug_required_result_failure() -> None:
|
|
141
|
+
# This is entirely for the purpose of making debugging easier. It serves no internal functional purpose.
|
|
142
|
+
#
|
|
143
|
+
# first, upload the invocation as an accessible marker of what was expected to exist.
|
|
144
|
+
upload_invocation_and_deps()
|
|
145
|
+
# then use mops-inspect programmatically to print the IRE in the same format as usual.
|
|
146
|
+
from thds.mops.pure.tools.inspect import inspect_and_log
|
|
147
|
+
|
|
148
|
+
inspect_and_log(memo_uri)
|
|
149
|
+
|
|
150
|
+
# the network ops being grouped by _BEFORE_INVOCATION include one or more
|
|
151
|
+
# download attempts (consider possible Paths) plus
|
|
152
|
+
# one or more uploads (embedded Paths & Sources/refs, and then invocation).
|
|
153
|
+
with _BEFORE_INVOCATION_SEMAPHORE:
|
|
154
|
+
# now actually execute the chunks of work that are required...
|
|
155
|
+
|
|
156
|
+
# it's possible that our result may already exist from a previous run of this pipeline id.
|
|
157
|
+
# we can short-circuit the entire process by looking for that result and returning it immediately.
|
|
158
|
+
result = check_result("memoized")
|
|
159
|
+
if result:
|
|
160
|
+
return unwrap_value_or_error(result)
|
|
161
|
+
|
|
162
|
+
lock_owned = acquire_lock()
|
|
163
|
+
# if no result exists, the vastly most common outcome here will be acquiring
|
|
164
|
+
# the lock on the first try. this will lead to breaking out of
|
|
165
|
+
# the LOCK LOOP directly below and going on to the shim invocation.
|
|
166
|
+
# still, we release the semaphore b/c we can't sleep while holding a lock.
|
|
167
|
+
|
|
168
|
+
# LOCK LOOP: entering this loop (where we attempt to acquire the lock) is the common non-memoized case
|
|
169
|
+
while not result:
|
|
170
|
+
if lock_owned:
|
|
171
|
+
if MAINTAIN_LOCKS():
|
|
172
|
+
release_lock = lock.launch_daemon_lock_maintainer(lock_owned)
|
|
173
|
+
else:
|
|
174
|
+
release_lock = lock_owned.release
|
|
175
|
+
break # we own the invocation - invoke the shim ourselves (below)
|
|
176
|
+
|
|
177
|
+
# getting to this point ONLY happens if we failed to acquire the lock, which
|
|
178
|
+
# is not expected to be the usual situation. We log a differently-colored
|
|
179
|
+
# message here to make that clear to users.
|
|
180
|
+
_LogAwaitedResult(
|
|
181
|
+
f"{val_or_res} for {memo_uri} does not exist, but the lock is owned by another process."
|
|
182
|
+
)
|
|
183
|
+
time.sleep(22)
|
|
184
|
+
|
|
185
|
+
with _BEFORE_INVOCATION_SEMAPHORE:
|
|
186
|
+
result = check_result("awaited")
|
|
187
|
+
if result:
|
|
188
|
+
_LogAwaitedResult(
|
|
189
|
+
f"{val_or_res} for {memo_uri} was found after waiting for the lock."
|
|
190
|
+
)
|
|
191
|
+
return unwrap_value_or_error(result)
|
|
192
|
+
|
|
193
|
+
lock_owned = acquire_lock() # still inside the semaphore, as it's a network op
|
|
194
|
+
|
|
195
|
+
assert release_lock is not None
|
|
196
|
+
assert lock_owned is not None
|
|
197
|
+
# if/when we acquire the lock, we move forever into 'run this ourselves mode'.
|
|
198
|
+
# If something about our invocation fails,
|
|
199
|
+
# we fail just as we would have previously, without any attempt to go
|
|
200
|
+
# 'back' to waiting for someone else to compute the result.
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
with _BEFORE_INVOCATION_SEMAPHORE:
|
|
204
|
+
_LogNewInvocation(f"Invoking {memo_uri}")
|
|
205
|
+
upload_invocation_and_deps()
|
|
206
|
+
|
|
207
|
+
# can't hold the semaphore while we block on the shim, though.
|
|
208
|
+
shim_ex = None
|
|
209
|
+
shim = shim_builder(func, args_, kwargs_)
|
|
210
|
+
shim( # ACTUAL INVOCATION (handoff to remote shim) HAPPENS HERE
|
|
211
|
+
(
|
|
212
|
+
memo_uri,
|
|
213
|
+
*metadata.format_invocation_cli_args(
|
|
214
|
+
metadata.InvocationMetadata.new(pipeline_id, invoked_at, lock_owned.writer_id)
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
except Exception as ex:
|
|
219
|
+
# network or similar errors are very common and hard to completely eliminate.
|
|
220
|
+
# We know that if a result (or error) exists, then the network failure is
|
|
221
|
+
# not important, because results in blob storage are atomically populated (either fully there or not)
|
|
222
|
+
logger.exception("Error awaiting shim. Optimistically checking for result.")
|
|
223
|
+
shim_ex = ex
|
|
224
|
+
|
|
225
|
+
finally:
|
|
226
|
+
release_lock()
|
|
227
|
+
|
|
228
|
+
# the network ops being grouped by _AFTER_INVOCATION include one or more downloads.
|
|
229
|
+
with _AFTER_INVOCATION_SEMAPHORE:
|
|
230
|
+
value_or_error = memo.results.check_if_result_exists(memo_uri)
|
|
231
|
+
if not value_or_error:
|
|
232
|
+
if shim_ex:
|
|
233
|
+
raise shim_ex # re-raise the underlying exception rather than making up our own.
|
|
234
|
+
raise NoResultAfterShimSuccess(
|
|
235
|
+
f"The shim for {memo_uri} exited cleanly, but no result or exception was found."
|
|
236
|
+
)
|
|
237
|
+
return unwrap_value_or_error(ResultAndInvocationType(value_or_error, "invoked"))
|
|
238
|
+
|
|
239
|
+
return create_invocation__check_result__wait_shim
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import typing as ty
|
|
3
|
+
|
|
4
|
+
from ..core.types import Args, F, Kwargs
|
|
5
|
+
from .types import Shim, ShimBuilder
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class _static_shim_builder:
|
|
9
|
+
def __init__(self, shim: Shim) -> None:
|
|
10
|
+
self.shim = shim
|
|
11
|
+
|
|
12
|
+
def __call__(self, _f: F, _args: Args, _kwargs: Kwargs) -> Shim:
|
|
13
|
+
return self.shim
|
|
14
|
+
|
|
15
|
+
def __repr__(self) -> str:
|
|
16
|
+
return f"<static_shim_builder for {self.shim}>"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def make_builder(shim: ty.Union[Shim, ShimBuilder]) -> ShimBuilder:
|
|
20
|
+
"""If you have a Shim and you want to make it into the simplest possible ShimBuilder."""
|
|
21
|
+
|
|
22
|
+
if len(inspect.signature(shim).parameters) == 3:
|
|
23
|
+
return ty.cast(ShimBuilder, shim)
|
|
24
|
+
|
|
25
|
+
return _static_shim_builder(ty.cast(Shim, shim))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from typing import Sequence
|
|
3
|
+
|
|
4
|
+
from thds.core import log
|
|
5
|
+
|
|
6
|
+
from ..core.entry.runner_registry import run_named_entry_handler
|
|
7
|
+
|
|
8
|
+
logger = log.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def samethread_shim(shim_args: Sequence[str]) -> None:
|
|
12
|
+
"""Use this inside a memoizing Runner to get the memoization
|
|
13
|
+
without needing to transfer control to an external process.
|
|
14
|
+
"""
|
|
15
|
+
logger.debug("Running a mops function locally in the current thread.")
|
|
16
|
+
run_named_entry_handler(*shim_args)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def subprocess_shim(shim_args: Sequence[str]) -> None:
|
|
20
|
+
logger.debug("Running a mops function locally in a new subprocess.")
|
|
21
|
+
subprocess.check_call(["python", "-m", "thds.mops.pure.core.entry.main", *shim_args])
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
INVOCATION = "invocation"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import typing as ty
|
|
2
|
+
|
|
3
|
+
from ..core.metadata import ResultMetadata
|
|
4
|
+
from ..core.types import Args, F, Kwargs
|
|
5
|
+
|
|
6
|
+
Shim = ty.Callable[[ty.Sequence[str]], ty.Any]
|
|
7
|
+
"""A runner Shim is a way of getting back into a Python process with enough
|
|
8
|
+
context to download the uploaded function and its arguments from the
|
|
9
|
+
location where a runner placed it, and then invoke the function. All
|
|
10
|
+
arguments are strings because it is assumed that this represents some
|
|
11
|
+
kind of command line invocation.
|
|
12
|
+
|
|
13
|
+
The Shim must be a blocking call, and its result(s) must be available
|
|
14
|
+
immediately after its return.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ShimBuilder(ty.Protocol):
|
|
19
|
+
def __call__(self, __f: F, __args: Args, __kwargs: Kwargs) -> Shim:
|
|
20
|
+
... # pragma: no cover
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
StorageRootURI = str
|
|
24
|
+
SerializeArgsKwargs = ty.Callable[[StorageRootURI, F, Args, Kwargs], bytes]
|
|
25
|
+
SerializeInvocation = ty.Callable[[StorageRootURI, F, bytes], bytes]
|
|
26
|
+
# the bytes parameter is the previously-serialized args,kwargs
|
|
27
|
+
GetMetaAndResult = ty.Callable[[str, str], ty.Tuple[ty.Optional[ResultMetadata], ty.Any]]
|
|
28
|
+
# the above should probably not 'hide' the fetch of the bytes, but it is what it is for now.
|