thds.mops 3.6.20250219172032__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

Files changed (111) hide show
  1. thds/mops/__about__.py +8 -0
  2. thds/mops/__init__.py +3 -0
  3. thds/mops/_compat.py +6 -0
  4. thds/mops/_utils/__init__.py +0 -0
  5. thds/mops/_utils/colorize.py +110 -0
  6. thds/mops/_utils/config_tree.py +167 -0
  7. thds/mops/_utils/exception.py +16 -0
  8. thds/mops/_utils/locked_cache.py +78 -0
  9. thds/mops/_utils/names.py +23 -0
  10. thds/mops/_utils/on_slow.py +28 -0
  11. thds/mops/_utils/once.py +30 -0
  12. thds/mops/_utils/temp.py +32 -0
  13. thds/mops/config.py +60 -0
  14. thds/mops/impure/__init__.py +2 -0
  15. thds/mops/impure/keyfunc.py +14 -0
  16. thds/mops/impure/runner.py +73 -0
  17. thds/mops/k8s/__init__.py +27 -0
  18. thds/mops/k8s/_shared.py +3 -0
  19. thds/mops/k8s/apply_yaml.py +22 -0
  20. thds/mops/k8s/auth.py +49 -0
  21. thds/mops/k8s/config.py +37 -0
  22. thds/mops/k8s/container_registry.py +14 -0
  23. thds/mops/k8s/jobs.py +57 -0
  24. thds/mops/k8s/launch.py +234 -0
  25. thds/mops/k8s/logging.py +239 -0
  26. thds/mops/k8s/namespace.py +17 -0
  27. thds/mops/k8s/node_selection.py +58 -0
  28. thds/mops/k8s/retry.py +75 -0
  29. thds/mops/k8s/too_old_resource_version.py +42 -0
  30. thds/mops/k8s/tools/krsync.py +50 -0
  31. thds/mops/k8s/tools/krsync.sh +22 -0
  32. thds/mops/k8s/wait_job.py +72 -0
  33. thds/mops/k8s/warn_image_backoff.py +63 -0
  34. thds/mops/k8s/watch.py +266 -0
  35. thds/mops/meta.json +8 -0
  36. thds/mops/parallel.py +36 -0
  37. thds/mops/pure/__init__.py +43 -0
  38. thds/mops/pure/_magic/__init__.py +0 -0
  39. thds/mops/pure/_magic/api.py +114 -0
  40. thds/mops/pure/_magic/sauce.py +152 -0
  41. thds/mops/pure/_magic/shims.py +34 -0
  42. thds/mops/pure/adls/__init__.py +1 -0
  43. thds/mops/pure/adls/_files.py +22 -0
  44. thds/mops/pure/adls/blob_store.py +185 -0
  45. thds/mops/pure/adls/output_fqn.py +17 -0
  46. thds/mops/pure/core/__init__.py +0 -0
  47. thds/mops/pure/core/content_addressed.py +31 -0
  48. thds/mops/pure/core/deferred_work.py +83 -0
  49. thds/mops/pure/core/entry/__init__.py +2 -0
  50. thds/mops/pure/core/entry/main.py +47 -0
  51. thds/mops/pure/core/entry/route_result.py +66 -0
  52. thds/mops/pure/core/entry/runner_registry.py +31 -0
  53. thds/mops/pure/core/file_blob_store.py +120 -0
  54. thds/mops/pure/core/lock/__init__.py +7 -0
  55. thds/mops/pure/core/lock/_acquire.py +192 -0
  56. thds/mops/pure/core/lock/_funcs.py +37 -0
  57. thds/mops/pure/core/lock/cli.py +73 -0
  58. thds/mops/pure/core/lock/maintain.py +150 -0
  59. thds/mops/pure/core/lock/read.py +39 -0
  60. thds/mops/pure/core/lock/types.py +37 -0
  61. thds/mops/pure/core/lock/write.py +136 -0
  62. thds/mops/pure/core/memo/__init__.py +6 -0
  63. thds/mops/pure/core/memo/function_memospace.py +267 -0
  64. thds/mops/pure/core/memo/keyfunc.py +53 -0
  65. thds/mops/pure/core/memo/overwrite_params.py +61 -0
  66. thds/mops/pure/core/memo/results.py +103 -0
  67. thds/mops/pure/core/memo/unique_name_for_function.py +70 -0
  68. thds/mops/pure/core/metadata.py +230 -0
  69. thds/mops/pure/core/output_naming.py +52 -0
  70. thds/mops/pure/core/partial.py +15 -0
  71. thds/mops/pure/core/pipeline_id.py +62 -0
  72. thds/mops/pure/core/pipeline_id_mask.py +79 -0
  73. thds/mops/pure/core/script_support.py +25 -0
  74. thds/mops/pure/core/serialize_big_objs.py +73 -0
  75. thds/mops/pure/core/serialize_paths.py +149 -0
  76. thds/mops/pure/core/source.py +291 -0
  77. thds/mops/pure/core/types.py +142 -0
  78. thds/mops/pure/core/uris.py +81 -0
  79. thds/mops/pure/core/use_runner.py +47 -0
  80. thds/mops/pure/joblib/__init__.py +1 -0
  81. thds/mops/pure/joblib/backend.py +81 -0
  82. thds/mops/pure/joblib/batching.py +67 -0
  83. thds/mops/pure/pickling/__init__.py +3 -0
  84. thds/mops/pure/pickling/_pickle.py +193 -0
  85. thds/mops/pure/pickling/memoize_only.py +22 -0
  86. thds/mops/pure/pickling/mprunner.py +173 -0
  87. thds/mops/pure/pickling/pickles.py +149 -0
  88. thds/mops/pure/pickling/remote.py +145 -0
  89. thds/mops/pure/pickling/sha256_b64.py +71 -0
  90. thds/mops/pure/runner/__init__.py +0 -0
  91. thds/mops/pure/runner/local.py +239 -0
  92. thds/mops/pure/runner/shim_builder.py +25 -0
  93. thds/mops/pure/runner/simple_shims.py +21 -0
  94. thds/mops/pure/runner/strings.py +1 -0
  95. thds/mops/pure/runner/types.py +28 -0
  96. thds/mops/pure/tools/__init__.py +0 -0
  97. thds/mops/pure/tools/history.py +35 -0
  98. thds/mops/pure/tools/inspect.py +372 -0
  99. thds/mops/pure/tools/sha256_b64_addressed.py +40 -0
  100. thds/mops/pure/tools/stress.py +63 -0
  101. thds/mops/pure/tools/summarize/__init__.py +4 -0
  102. thds/mops/pure/tools/summarize/cli.py +293 -0
  103. thds/mops/pure/tools/summarize/run_summary.py +143 -0
  104. thds/mops/py.typed +0 -0
  105. thds/mops/testing/__init__.py +0 -0
  106. thds/mops/testing/deferred_imports.py +81 -0
  107. thds.mops-3.6.20250219172032.dist-info/METADATA +42 -0
  108. thds.mops-3.6.20250219172032.dist-info/RECORD +111 -0
  109. thds.mops-3.6.20250219172032.dist-info/WHEEL +5 -0
  110. thds.mops-3.6.20250219172032.dist-info/entry_points.txt +7 -0
  111. thds.mops-3.6.20250219172032.dist-info/top_level.txt +1 -0
@@ -0,0 +1,149 @@
1
+ """This module is a good place to define actual objects that need to
2
+ pickled in a backward-compatible way - i.e., we want to remember not
3
+ to refactor their names or the name of the module they live in so as
4
+ to maintain backward-compatibility more easily.
5
+ """
6
+
7
+ import importlib
8
+ import io
9
+ import pickle
10
+ import typing as ty
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+
14
+ from thds.core import hashing, log, source
15
+
16
+ from ..core.script_support import add_main_module_function, get_main_module_function
17
+ from ..core.source import source_from_hashref, source_from_source_result
18
+ from ..core.uris import get_bytes, lookup_blob_store
19
+
20
+ logger = log.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class Invocation:
25
+ """Basically, NestedFunctionPickle was the v2. This is v3. By switching to dataclass,
26
+ we can more easily add new optional attributes later on.
27
+ """
28
+
29
+ func: ty.Callable
30
+ args_kwargs_pickle: bytes
31
+ # this is pickled separately so that we can hash it separately.
32
+ # the identity of the function is represented by the name part of the blob path.
33
+
34
+
35
+ class NestedFunctionPickle(ty.NamedTuple):
36
+ """Not in use - retained for mops-inspect backward-compatibility"""
37
+
38
+ f: ty.Callable
39
+ args_kwargs_pickle: bytes
40
+
41
+
42
+ class PicklableFunction:
43
+ """The main 'issue' this is working around is that decorated
44
+ functions aren't picklable because of something having to do with
45
+ the way the function gets 'replaced' at decoration time.
46
+
47
+ There may be other solutions to this, but this seems to work fine.
48
+ """
49
+
50
+ def __init__(self, f: ty.Callable) -> None:
51
+ if f.__module__ == "__main__":
52
+ add_main_module_function(f.__name__, f)
53
+ self.fmod = f.__module__
54
+ self.fname = f.__name__
55
+ self.f = None
56
+
57
+ def __str__(self) -> str:
58
+ return f"{self.fmod}.{self.fname}"
59
+
60
+ def __repr__(self) -> str:
61
+ return str(self)
62
+
63
+ @property
64
+ def __name__(self) -> str:
65
+ return self.fname
66
+
67
+ def __call__(self, *args: ty.Any, **kwargs: ty.Any) -> ty.Any:
68
+ logger.debug(f"Dynamically importing function {str(self)}")
69
+ if self.fmod == "__main__":
70
+ self.f = get_main_module_function(self.fname) # type: ignore
71
+ else:
72
+ mod = importlib.import_module(self.fmod)
73
+ self.f = getattr(mod, self.fname)
74
+ assert self.f
75
+ return self.f(*args, **kwargs)
76
+
77
+
78
+ class UnpickleSimplePickleFromUri:
79
+ def __init__(self, uri: str):
80
+ self.uri = uri # serializable as a pure string for simplicity
81
+ self._cached = None
82
+
83
+ def __call__(self) -> object:
84
+ # i don't believe there's any need for thread safety here, since pickle won't use threads.
85
+ if self._cached is None:
86
+ self._cached = pickle.load(io.BytesIO(get_bytes(self.uri, type_hint="simple-uri-pickle")))
87
+ return self._cached
88
+
89
+
90
+ class UnpicklePathFromUri(ty.NamedTuple):
91
+ uri: str
92
+
93
+ def __call__(self) -> Path:
94
+ return lookup_blob_store(self.uri).getfile(self.uri)
95
+
96
+
97
+ class UnpickleSourceUriArgument(ty.NamedTuple):
98
+ """The URI fully specifies this type of source. Nothing fancy happens here. We just
99
+ return a new Source object that represents the URI.
100
+ """
101
+
102
+ uri: str
103
+
104
+ def __call__(self) -> source.Source:
105
+ return source.from_uri(self.uri)
106
+
107
+
108
+ class UnpickleSourceHashrefArgument(ty.NamedTuple):
109
+ """Represents the root for a single file hashref. May be either local or remote.
110
+
111
+ For stability, the module name and the class name must not change.
112
+
113
+ This only applies to arguments _into_ a function. Results _from_ a function should
114
+ have a different form.
115
+ """
116
+
117
+ hash: hashing.Hash
118
+
119
+ def __call__(self) -> source.Source:
120
+ return source_from_hashref(self.hash)
121
+
122
+
123
+ class UnpickleSourceResult(ty.NamedTuple):
124
+ """Stability for this is not critical, as it will only ever exist in the result
125
+ payload, which does not participate in memoization.
126
+ """
127
+
128
+ remote_uri: str
129
+ hash: ty.Optional[hashing.Hash]
130
+ file_uri: str
131
+
132
+ def __call__(self) -> source.Source:
133
+ return source_from_source_result(*self)
134
+
135
+
136
+ class UnpickleFunctionWithLogicKey(ty.NamedTuple):
137
+ """When a mops-memoized function receives, in standard "functional programming" style,
138
+ a function as an argument (whether partially-applied or not), we need to make
139
+ sure to represent any function-logic-key on that callable as part of what gets serialized,
140
+ so that memoization does not happen when unexpected/undesired.
141
+
142
+ The function itself must be picklable in the natural way.
143
+ """
144
+
145
+ func_bytes: bytes
146
+ function_logic_key: str
147
+
148
+ def __call__(self) -> ty.Callable:
149
+ return pickle.loads(self.func_bytes)
@@ -0,0 +1,145 @@
1
+ import typing as ty
2
+ from dataclasses import dataclass
3
+ from datetime import datetime, timezone
4
+ from functools import cached_property
5
+
6
+ from thds.core import log, scope
7
+
8
+ from ..._utils.once import Once
9
+ from ..core import lock, metadata, pipeline_id, uris
10
+ from ..core.entry import route_return_value_or_exception
11
+ from ..core.memo import results
12
+ from ..core.pipeline_id_mask import pipeline_id_mask
13
+ from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
14
+ from ..core.serialize_paths import CoordinatingPathSerializer
15
+ from ..core.types import Args, BlobStore, Kwargs, T
16
+ from ..runner import strings
17
+ from . import _pickle, mprunner, pickles, sha256_b64
18
+
19
+ logger = log.getLogger(__name__)
20
+
21
+
22
+ @dataclass # needed for cached_property
23
+ class _ResultExcWithMetadataChannel:
24
+ fs: BlobStore
25
+ dumper: _pickle.Dumper
26
+ call_id: str
27
+ invocation_metadata: metadata.InvocationMetadata
28
+ started_at: datetime
29
+
30
+ @cached_property
31
+ def _metadata_header(self) -> bytes:
32
+ """This is always embedded _alongside_ the actual return value or exception.
33
+ This is to make sure that whatever metadata is in the result is atomically
34
+ part of the result, such that in the rare case of racing invocations,
35
+ the metadata can be trusted to be accurate.
36
+ """
37
+ result_metadata = metadata.ResultMetadata.from_invocation(
38
+ self.invocation_metadata, self.started_at, datetime.now(tz=timezone.utc)
39
+ )
40
+ logger.info(f"Remote code version: {result_metadata.remote_code_version}")
41
+ return metadata.format_result_header(result_metadata).encode("utf-8")
42
+
43
+ def _write_metadata_only(self, prefix: str) -> None:
44
+ """This is a mops v3 thing that is unnecessary but adds clarity when debugging.
45
+ If you see more than one of these files in a directory, that usually means either
46
+ the success was preceded by a failure, _or_ it means that there was an (unusual) race condition.
47
+ """
48
+ self.fs.putbytes(
49
+ self.fs.join(self.call_id, f"{prefix}-metadata-{self.invocation_metadata.invoker_uuid}.txt"),
50
+ self._metadata_header,
51
+ type_hint="text/plain",
52
+ )
53
+
54
+ def return_value(self, r: T) -> None:
55
+ return_value_bytes = _pickle.gimme_bytes(self.dumper, r)
56
+ self.fs.putbytes(
57
+ self.fs.join(self.call_id, results.RESULT),
58
+ self._metadata_header + return_value_bytes,
59
+ type_hint="application/mops-return-value",
60
+ )
61
+ self._write_metadata_only("result")
62
+
63
+ def exception(self, exc: Exception) -> None:
64
+ exc_bytes = _pickle.gimme_bytes(self.dumper, exc)
65
+ self.fs.putbytes(
66
+ self.fs.join(self.call_id, results.EXCEPTION),
67
+ self._metadata_header + exc_bytes,
68
+ type_hint="application/mops-exception",
69
+ )
70
+ self._write_metadata_only("exception")
71
+
72
+
73
+ def _unpickle_invocation(memo_uri: str) -> ty.Tuple[ty.Callable, Args, Kwargs]:
74
+ _, invocation_raw = _pickle.make_read_header_and_object(strings.INVOCATION)(
75
+ uris.lookup_blob_store(memo_uri).join(memo_uri, strings.INVOCATION)
76
+ )
77
+ invocation = ty.cast(pickles.Invocation, invocation_raw)
78
+ args, kwargs = _pickle.unfreeze_args_kwargs(invocation.args_kwargs_pickle)
79
+ return invocation.func, args, kwargs
80
+
81
+
82
+ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
83
+ """The arguments are those supplied by MemoizingPicklingRunner.
84
+
85
+ As of v3, we now expect a number of (required) metadata arguments with every invocation.
86
+ """
87
+ started_at = datetime.now(tz=timezone.utc) # capture this timestamp right at the outset.
88
+ invocation_metadata = metadata.parse_invocation_metadata_args(metadata_args)
89
+ metadata.INVOKED_BY.set_global(invocation_metadata.invoked_by)
90
+ pipeline_id.set_pipeline_id(invocation_metadata.pipeline_id)
91
+ fs = uris.lookup_blob_store(memo_uri)
92
+
93
+ # any recursively-called functions that use metadata will retain the original invoker.
94
+
95
+ try:
96
+ stop_lock = lock.launch_daemon_lock_maintainer(
97
+ lock.remote_lock_maintain(
98
+ fs.join(memo_uri, "lock"), expected_writer_id=invocation_metadata.invoker_uuid
99
+ )
100
+ )
101
+ except lock.CannotMaintainLock as e:
102
+ logger.info(f"Cannot maintain lock: {e}. Continuing without the lock.")
103
+ stop_lock = lambda: None # noqa: E731
104
+
105
+ def _extract_invocation_unique_key(memo_uri: str) -> ty.Tuple[str, str]:
106
+ parts = fs.split(memo_uri)
107
+ try:
108
+ runner_idx = parts.index(mprunner.RUNNER_NAME)
109
+ except ValueError as ve:
110
+ raise ValueError(
111
+ f"Unable to find the runner name {mprunner.RUNNER_NAME} in parts {parts}"
112
+ ) from ve
113
+ invocation_parts = parts[runner_idx + 1 :]
114
+ return fs.join(*invocation_parts[:-1]), invocation_parts[-1]
115
+
116
+ scope.enter(uris.ACTIVE_STORAGE_ROOT.set(uris.get_root(memo_uri)))
117
+
118
+ try:
119
+ func, args, kwargs = _unpickle_invocation(memo_uri)
120
+ except Exception:
121
+ logger.error(f"Failed to unpickle invocation from {memo_uri} - this is a bug in mops!")
122
+ raise
123
+
124
+ def do_work_return_result() -> object:
125
+ # ONLY failures in this code should transmit an EXCEPTION
126
+ # back to the orchestrator side.
127
+ return pipeline_id_mask(invocation_metadata.pipeline_id)(func)(*args, **kwargs)
128
+
129
+ route_return_value_or_exception(
130
+ _ResultExcWithMetadataChannel(
131
+ fs,
132
+ _pickle.Dumper(
133
+ ByIdSerializer(ByIdRegistry()),
134
+ CoordinatingPathSerializer(sha256_b64.Sha256B64PathStream(), Once()),
135
+ _pickle.SourceResultPickler(),
136
+ ),
137
+ memo_uri,
138
+ invocation_metadata,
139
+ started_at,
140
+ ),
141
+ ty.cast(ty.Callable[[], T], do_work_return_result),
142
+ invocation_metadata.pipeline_id,
143
+ _extract_invocation_unique_key(memo_uri),
144
+ )
145
+ stop_lock() # not critical since we don't _own_ the lock, but keeps things cleaner
@@ -0,0 +1,71 @@
1
+ """Context-local, content-aware ser/de from/to a known URI prefix.
2
+
3
+ Basically, we take some generic pickle utilities and stitch them together into something
4
+ that efficiently serializes object graphs to a combination of locations at some URI prefix,
5
+ such that they are self-deserializing (via CallableUnpickler) on the other side.
6
+ """
7
+
8
+ import hashlib
9
+ import io
10
+ import pickle
11
+ import typing as ty
12
+ from pathlib import Path
13
+
14
+ from thds.core import hashing, log
15
+
16
+ from ..core.content_addressed import storage_content_addressed, wordybin_content_addressed
17
+ from ..core.serialize_paths import Downloader
18
+ from ..core.uris import active_storage_root, lookup_blob_store
19
+ from .pickles import UnpicklePathFromUri, UnpickleSimplePickleFromUri
20
+
21
+ logger = log.getLogger(__name__)
22
+ T = ty.TypeVar("T")
23
+
24
+
25
+ class Sha256B64PathStream:
26
+ def local_to_remote(self, path: Path, sha256: str) -> None:
27
+ """Return fully qualified remote information after put."""
28
+ # lazily fetches the active storage root.
29
+ full_remote_sha256 = storage_content_addressed(sha256, "sha256")
30
+ lookup_blob_store(full_remote_sha256).putfile(path, full_remote_sha256)
31
+
32
+ def get_downloader(self, remote_sha256: str) -> Downloader:
33
+ return UnpicklePathFromUri(storage_content_addressed(remote_sha256, "sha256")) # type: ignore # NamedTuple silliness
34
+
35
+
36
+ def _pickle_obj_and_upload_to_content_addressed_path(
37
+ obj: object, debug_name: str = ""
38
+ ) -> UnpickleSimplePickleFromUri:
39
+ # active_storage_root is lazily fetched because we may want to register the pickler
40
+ # somewhere before settling on the final destination of objects pickled.
41
+ storage_root = active_storage_root()
42
+ with io.BytesIO() as bio:
43
+ pickle.dump(obj, bio)
44
+ bio.seek(0)
45
+ fs = lookup_blob_store(storage_root)
46
+ bytes_uri, debug_uri = wordybin_content_addressed(
47
+ hashing.Hash("sha256", hashing.hash_using(bio, hashlib.sha256()).digest()),
48
+ storage_root,
49
+ debug_name=f"objname_{debug_name}" if debug_name else "",
50
+ )
51
+ fs.putbytes(bytes_uri, bio, type_hint="application/octet-stream")
52
+ if debug_uri:
53
+ # this name is purely for debugging and affects no part of the runtime.
54
+ fs.putbytes(debug_uri, "goodbeef".encode(), type_hint="text/plain")
55
+
56
+ return UnpickleSimplePickleFromUri(bytes_uri)
57
+
58
+
59
+ class Sha256B64Pickler:
60
+ """A type of CallbackPickler, intended for picklable objects that should be serialized
61
+ as pure bytes and stored at a content-addressed URI. Only used (currently) by the
62
+ ById/shared object serializer, most likely for something like a large dataframe.
63
+
64
+ Name exists solely for debugging purposes.
65
+ """
66
+
67
+ def __init__(self, name: str = ""):
68
+ self.name = name
69
+
70
+ def __call__(self, obj: ty.Any) -> UnpickleSimplePickleFromUri:
71
+ return _pickle_obj_and_upload_to_content_addressed_path(obj, self.name)
File without changes
@@ -0,0 +1,239 @@
1
+ """Joins pickle functionality and Blob Store functionality to run functions remotely.
2
+ """
3
+
4
+ import threading
5
+ import time
6
+ import typing as ty
7
+ from datetime import datetime, timedelta, timezone
8
+ from pathlib import Path
9
+
10
+ from thds.core import config, log, scope
11
+
12
+ from ..._utils.colorize import colorized, make_colorized_out
13
+ from ...config import max_concurrent_network_ops
14
+ from ..core import deferred_work, lock, memo, metadata, pipeline_id_mask, uris
15
+ from ..core.partial import unwrap_partial
16
+ from ..core.types import Args, Kwargs, NoResultAfterShimSuccess, T
17
+ from ..tools.summarize import run_summary
18
+ from . import strings, types
19
+
20
+ MAINTAIN_LOCKS = config.item("thds.mops.pure.local.maintain_locks", default=True, parse=config.tobool)
21
+
22
+ # these two semaphores allow us to prioritize getting meaningful units
23
+ # of progress _complete_, rather than issuing many instructions to the
24
+ # underlying client and allowing it to randomly order the operations
25
+ # such that it takes longer to get a full unit of work complete.
26
+ _BEFORE_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
27
+ # _OUT prioritizes uploading a single invocation and its dependencies so the Shim can start running.
28
+ _AFTER_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_network_ops()))
29
+ # _IN prioritizes retrieving the result of a Shim that has completed.
30
+
31
+ _DarkBlue = colorized(fg="white", bg="#00008b")
32
+ _GreenYellow = colorized(fg="black", bg="#adff2f")
33
+ _Purple = colorized(fg="white", bg="#800080")
34
+ logger = log.getLogger(__name__)
35
+ _LogKnownResult = make_colorized_out(_DarkBlue, out=logger.info, fmt_str=" {} ")
36
+ _LogNewInvocation = make_colorized_out(_GreenYellow, out=logger.info, fmt_str=" {} ")
37
+ _LogAwaitedResult = make_colorized_out(_Purple, out=logger.info, fmt_str=" {} ")
38
+
39
+
40
+ def invoke_via_shim_or_return_memoized( # noqa: C901
41
+ serialize_args_kwargs: types.SerializeArgsKwargs,
42
+ serialize_invocation: types.SerializeInvocation,
43
+ shim_builder: types.ShimBuilder,
44
+ get_meta_and_result: types.GetMetaAndResult,
45
+ run_directory: ty.Optional[Path] = None,
46
+ ) -> ty.Callable[[bool, str, ty.Callable[..., T], Args, Kwargs], T]:
47
+ @scope.bound
48
+ def create_invocation__check_result__wait_shim(
49
+ rerun_exceptions: bool,
50
+ function_memospace: str,
51
+ # by allowing the caller to set the function memospace, we allow 'redirects' to look up an old result by name.
52
+ # while still guaranteeing that the function arguments were the same.
53
+ func: ty.Callable[..., T],
54
+ args_: Args,
55
+ kwargs_: Kwargs,
56
+ ) -> T:
57
+ """This is the generic local runner. Its core abstractions are:
58
+
59
+ - serializers of some sort (for the function and its arguments)
60
+ - a runtime shim of some sort (can start a Python process somewhere else)
61
+ - a result and metadata deserializer
62
+ - URIs that are supported by a registered BlobStore implementation.
63
+
64
+ It uses a mops-internal locking mechanism to prevent concurrent invocations for the same function+args.
65
+ """
66
+ invoked_at = datetime.now(tz=timezone.utc)
67
+ # capture immediately, because many things may delay actual start.
68
+ storage_root = uris.get_root(function_memospace)
69
+ scope.enter(uris.ACTIVE_STORAGE_ROOT.set(storage_root))
70
+ fs = uris.lookup_blob_store(function_memospace)
71
+ val_or_res = "value" if rerun_exceptions else "result"
72
+
73
+ # we need to unwrap any partial object and combine its wrapped
74
+ # args, kwargs with the provided args, kwargs, otherwise the
75
+ # args and kwargs will not get properly considered in the memoization key.
76
+ func, args, kwargs = unwrap_partial(func, args_, kwargs_)
77
+ pipeline_id = scope.enter(pipeline_id_mask.including_function_docstr(func))
78
+ # TODO pipeline_id should probably be passed in explicitly
79
+
80
+ scope.enter(deferred_work.open_context()) # optimize Source objects during serialization
81
+
82
+ args_kwargs_bytes = serialize_args_kwargs(storage_root, func, args, kwargs)
83
+ memo_uri = fs.join(function_memospace, memo.args_kwargs_content_address(args_kwargs_bytes))
84
+
85
+ # Define some important and reusable 'chunks of work'
86
+
87
+ class ResultAndInvocationType(ty.NamedTuple):
88
+ value_or_error: ty.Union[memo.results.Success, memo.results.Error]
89
+ invoc_type: run_summary.InvocationType
90
+
91
+ def check_result(
92
+ invoc_type: run_summary.InvocationType,
93
+ ) -> ty.Union[ResultAndInvocationType, None]:
94
+ result = memo.results.check_if_result_exists(
95
+ memo_uri, rerun_excs=rerun_exceptions, before_raise=debug_required_result_failure
96
+ )
97
+ if not result:
98
+ return None
99
+
100
+ _LogKnownResult(
101
+ f"{invoc_type} {val_or_res} for {memo_uri} already exists and is being returned without invocation!"
102
+ )
103
+ return ResultAndInvocationType(result, invoc_type)
104
+
105
+ def unwrap_value_or_error(result_and_itype: ResultAndInvocationType) -> T:
106
+ result = result_and_itype.value_or_error
107
+ metadata = None
108
+ value_t = None
109
+ try:
110
+ if isinstance(result, memo.results.Success):
111
+ metadata, value_t = get_meta_and_result("value", result.value_uri)
112
+ return ty.cast(T, value_t)
113
+ else:
114
+ assert isinstance(result, memo.results.Error), "Must be Error or Success"
115
+ metadata, exc = get_meta_and_result("EXCEPTION", result.exception_uri)
116
+ raise exc
117
+ finally:
118
+ run_summary.log_function_execution(
119
+ *(run_directory, func, memo_uri, result_and_itype.invoc_type),
120
+ metadata=metadata,
121
+ runner_prefix=function_memospace.split(pipeline_id)[0],
122
+ was_error=not isinstance(result, memo.results.Success),
123
+ return_value=value_t,
124
+ )
125
+
126
+ def acquire_lock() -> ty.Optional[lock.LockAcquired]:
127
+ return lock.acquire(fs.join(memo_uri, "lock"), expire=timedelta(seconds=88))
128
+
129
+ def upload_invocation_and_deps() -> None:
130
+ # we're just about to transfer to a remote context,
131
+ # so it's time to perform any deferred work
132
+ deferred_work.perform_all()
133
+
134
+ fs.putbytes(
135
+ fs.join(memo_uri, strings.INVOCATION),
136
+ serialize_invocation(storage_root, func, args_kwargs_bytes),
137
+ type_hint="application/mops-invocation",
138
+ )
139
+
140
+ def debug_required_result_failure() -> None:
141
+ # This is entirely for the purpose of making debugging easier. It serves no internal functional purpose.
142
+ #
143
+ # first, upload the invocation as an accessible marker of what was expected to exist.
144
+ upload_invocation_and_deps()
145
+ # then use mops-inspect programmatically to print the IRE in the same format as usual.
146
+ from thds.mops.pure.tools.inspect import inspect_and_log
147
+
148
+ inspect_and_log(memo_uri)
149
+
150
+ # the network ops being grouped by _BEFORE_INVOCATION include one or more
151
+ # download attempts (consider possible Paths) plus
152
+ # one or more uploads (embedded Paths & Sources/refs, and then invocation).
153
+ with _BEFORE_INVOCATION_SEMAPHORE:
154
+ # now actually execute the chunks of work that are required...
155
+
156
+ # it's possible that our result may already exist from a previous run of this pipeline id.
157
+ # we can short-circuit the entire process by looking for that result and returning it immediately.
158
+ result = check_result("memoized")
159
+ if result:
160
+ return unwrap_value_or_error(result)
161
+
162
+ lock_owned = acquire_lock()
163
+ # if no result exists, the vastly most common outcome here will be acquiring
164
+ # the lock on the first try. this will lead to breaking out of
165
+ # the LOCK LOOP directly below and going on to the shim invocation.
166
+ # still, we release the semaphore b/c we can't sleep while holding a lock.
167
+
168
+ # LOCK LOOP: entering this loop (where we attempt to acquire the lock) is the common non-memoized case
169
+ while not result:
170
+ if lock_owned:
171
+ if MAINTAIN_LOCKS():
172
+ release_lock = lock.launch_daemon_lock_maintainer(lock_owned)
173
+ else:
174
+ release_lock = lock_owned.release
175
+ break # we own the invocation - invoke the shim ourselves (below)
176
+
177
+ # getting to this point ONLY happens if we failed to acquire the lock, which
178
+ # is not expected to be the usual situation. We log a differently-colored
179
+ # message here to make that clear to users.
180
+ _LogAwaitedResult(
181
+ f"{val_or_res} for {memo_uri} does not exist, but the lock is owned by another process."
182
+ )
183
+ time.sleep(22)
184
+
185
+ with _BEFORE_INVOCATION_SEMAPHORE:
186
+ result = check_result("awaited")
187
+ if result:
188
+ _LogAwaitedResult(
189
+ f"{val_or_res} for {memo_uri} was found after waiting for the lock."
190
+ )
191
+ return unwrap_value_or_error(result)
192
+
193
+ lock_owned = acquire_lock() # still inside the semaphore, as it's a network op
194
+
195
+ assert release_lock is not None
196
+ assert lock_owned is not None
197
+ # if/when we acquire the lock, we move forever into 'run this ourselves mode'.
198
+ # If something about our invocation fails,
199
+ # we fail just as we would have previously, without any attempt to go
200
+ # 'back' to waiting for someone else to compute the result.
201
+
202
+ try:
203
+ with _BEFORE_INVOCATION_SEMAPHORE:
204
+ _LogNewInvocation(f"Invoking {memo_uri}")
205
+ upload_invocation_and_deps()
206
+
207
+ # can't hold the semaphore while we block on the shim, though.
208
+ shim_ex = None
209
+ shim = shim_builder(func, args_, kwargs_)
210
+ shim( # ACTUAL INVOCATION (handoff to remote shim) HAPPENS HERE
211
+ (
212
+ memo_uri,
213
+ *metadata.format_invocation_cli_args(
214
+ metadata.InvocationMetadata.new(pipeline_id, invoked_at, lock_owned.writer_id)
215
+ ),
216
+ )
217
+ )
218
+ except Exception as ex:
219
+ # network or similar errors are very common and hard to completely eliminate.
220
+ # We know that if a result (or error) exists, then the network failure is
221
+ # not important, because results in blob storage are atomically populated (either fully there or not)
222
+ logger.exception("Error awaiting shim. Optimistically checking for result.")
223
+ shim_ex = ex
224
+
225
+ finally:
226
+ release_lock()
227
+
228
+ # the network ops being grouped by _AFTER_INVOCATION include one or more downloads.
229
+ with _AFTER_INVOCATION_SEMAPHORE:
230
+ value_or_error = memo.results.check_if_result_exists(memo_uri)
231
+ if not value_or_error:
232
+ if shim_ex:
233
+ raise shim_ex # re-raise the underlying exception rather than making up our own.
234
+ raise NoResultAfterShimSuccess(
235
+ f"The shim for {memo_uri} exited cleanly, but no result or exception was found."
236
+ )
237
+ return unwrap_value_or_error(ResultAndInvocationType(value_or_error, "invoked"))
238
+
239
+ return create_invocation__check_result__wait_shim
@@ -0,0 +1,25 @@
1
+ import inspect
2
+ import typing as ty
3
+
4
+ from ..core.types import Args, F, Kwargs
5
+ from .types import Shim, ShimBuilder
6
+
7
+
8
+ class _static_shim_builder:
9
+ def __init__(self, shim: Shim) -> None:
10
+ self.shim = shim
11
+
12
+ def __call__(self, _f: F, _args: Args, _kwargs: Kwargs) -> Shim:
13
+ return self.shim
14
+
15
+ def __repr__(self) -> str:
16
+ return f"<static_shim_builder for {self.shim}>"
17
+
18
+
19
+ def make_builder(shim: ty.Union[Shim, ShimBuilder]) -> ShimBuilder:
20
+ """If you have a Shim and you want to make it into the simplest possible ShimBuilder."""
21
+
22
+ if len(inspect.signature(shim).parameters) == 3:
23
+ return ty.cast(ShimBuilder, shim)
24
+
25
+ return _static_shim_builder(ty.cast(Shim, shim))
@@ -0,0 +1,21 @@
1
+ import subprocess
2
+ from typing import Sequence
3
+
4
+ from thds.core import log
5
+
6
+ from ..core.entry.runner_registry import run_named_entry_handler
7
+
8
+ logger = log.getLogger(__name__)
9
+
10
+
11
+ def samethread_shim(shim_args: Sequence[str]) -> None:
12
+ """Use this inside a memoizing Runner to get the memoization
13
+ without needing to transfer control to an external process.
14
+ """
15
+ logger.debug("Running a mops function locally in the current thread.")
16
+ run_named_entry_handler(*shim_args)
17
+
18
+
19
+ def subprocess_shim(shim_args: Sequence[str]) -> None:
20
+ logger.debug("Running a mops function locally in a new subprocess.")
21
+ subprocess.check_call(["python", "-m", "thds.mops.pure.core.entry.main", *shim_args])
@@ -0,0 +1 @@
1
+ INVOCATION = "invocation"
@@ -0,0 +1,28 @@
1
+ import typing as ty
2
+
3
+ from ..core.metadata import ResultMetadata
4
+ from ..core.types import Args, F, Kwargs
5
+
6
+ Shim = ty.Callable[[ty.Sequence[str]], ty.Any]
7
+ """A runner Shim is a way of getting back into a Python process with enough
8
+ context to download the uploaded function and its arguments from the
9
+ location where a runner placed it, and then invoke the function. All
10
+ arguments are strings because it is assumed that this represents some
11
+ kind of command line invocation.
12
+
13
+ The Shim must be a blocking call, and its result(s) must be available
14
+ immediately after its return.
15
+ """
16
+
17
+
18
+ class ShimBuilder(ty.Protocol):
19
+ def __call__(self, __f: F, __args: Args, __kwargs: Kwargs) -> Shim:
20
+ ... # pragma: no cover
21
+
22
+
23
+ StorageRootURI = str
24
+ SerializeArgsKwargs = ty.Callable[[StorageRootURI, F, Args, Kwargs], bytes]
25
+ SerializeInvocation = ty.Callable[[StorageRootURI, F, bytes], bytes]
26
+ # the bytes parameter is the previously-serialized args,kwargs
27
+ GetMetaAndResult = ty.Callable[[str, str], ty.Tuple[ty.Optional[ResultMetadata], ty.Any]]
28
+ # the above should probably not 'hide' the fetch of the bytes, but it is what it is for now.