thds.mops 3.6.20250219172032__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

Files changed (111) hide show
  1. thds/mops/__about__.py +8 -0
  2. thds/mops/__init__.py +3 -0
  3. thds/mops/_compat.py +6 -0
  4. thds/mops/_utils/__init__.py +0 -0
  5. thds/mops/_utils/colorize.py +110 -0
  6. thds/mops/_utils/config_tree.py +167 -0
  7. thds/mops/_utils/exception.py +16 -0
  8. thds/mops/_utils/locked_cache.py +78 -0
  9. thds/mops/_utils/names.py +23 -0
  10. thds/mops/_utils/on_slow.py +28 -0
  11. thds/mops/_utils/once.py +30 -0
  12. thds/mops/_utils/temp.py +32 -0
  13. thds/mops/config.py +60 -0
  14. thds/mops/impure/__init__.py +2 -0
  15. thds/mops/impure/keyfunc.py +14 -0
  16. thds/mops/impure/runner.py +73 -0
  17. thds/mops/k8s/__init__.py +27 -0
  18. thds/mops/k8s/_shared.py +3 -0
  19. thds/mops/k8s/apply_yaml.py +22 -0
  20. thds/mops/k8s/auth.py +49 -0
  21. thds/mops/k8s/config.py +37 -0
  22. thds/mops/k8s/container_registry.py +14 -0
  23. thds/mops/k8s/jobs.py +57 -0
  24. thds/mops/k8s/launch.py +234 -0
  25. thds/mops/k8s/logging.py +239 -0
  26. thds/mops/k8s/namespace.py +17 -0
  27. thds/mops/k8s/node_selection.py +58 -0
  28. thds/mops/k8s/retry.py +75 -0
  29. thds/mops/k8s/too_old_resource_version.py +42 -0
  30. thds/mops/k8s/tools/krsync.py +50 -0
  31. thds/mops/k8s/tools/krsync.sh +22 -0
  32. thds/mops/k8s/wait_job.py +72 -0
  33. thds/mops/k8s/warn_image_backoff.py +63 -0
  34. thds/mops/k8s/watch.py +266 -0
  35. thds/mops/meta.json +8 -0
  36. thds/mops/parallel.py +36 -0
  37. thds/mops/pure/__init__.py +43 -0
  38. thds/mops/pure/_magic/__init__.py +0 -0
  39. thds/mops/pure/_magic/api.py +114 -0
  40. thds/mops/pure/_magic/sauce.py +152 -0
  41. thds/mops/pure/_magic/shims.py +34 -0
  42. thds/mops/pure/adls/__init__.py +1 -0
  43. thds/mops/pure/adls/_files.py +22 -0
  44. thds/mops/pure/adls/blob_store.py +185 -0
  45. thds/mops/pure/adls/output_fqn.py +17 -0
  46. thds/mops/pure/core/__init__.py +0 -0
  47. thds/mops/pure/core/content_addressed.py +31 -0
  48. thds/mops/pure/core/deferred_work.py +83 -0
  49. thds/mops/pure/core/entry/__init__.py +2 -0
  50. thds/mops/pure/core/entry/main.py +47 -0
  51. thds/mops/pure/core/entry/route_result.py +66 -0
  52. thds/mops/pure/core/entry/runner_registry.py +31 -0
  53. thds/mops/pure/core/file_blob_store.py +120 -0
  54. thds/mops/pure/core/lock/__init__.py +7 -0
  55. thds/mops/pure/core/lock/_acquire.py +192 -0
  56. thds/mops/pure/core/lock/_funcs.py +37 -0
  57. thds/mops/pure/core/lock/cli.py +73 -0
  58. thds/mops/pure/core/lock/maintain.py +150 -0
  59. thds/mops/pure/core/lock/read.py +39 -0
  60. thds/mops/pure/core/lock/types.py +37 -0
  61. thds/mops/pure/core/lock/write.py +136 -0
  62. thds/mops/pure/core/memo/__init__.py +6 -0
  63. thds/mops/pure/core/memo/function_memospace.py +267 -0
  64. thds/mops/pure/core/memo/keyfunc.py +53 -0
  65. thds/mops/pure/core/memo/overwrite_params.py +61 -0
  66. thds/mops/pure/core/memo/results.py +103 -0
  67. thds/mops/pure/core/memo/unique_name_for_function.py +70 -0
  68. thds/mops/pure/core/metadata.py +230 -0
  69. thds/mops/pure/core/output_naming.py +52 -0
  70. thds/mops/pure/core/partial.py +15 -0
  71. thds/mops/pure/core/pipeline_id.py +62 -0
  72. thds/mops/pure/core/pipeline_id_mask.py +79 -0
  73. thds/mops/pure/core/script_support.py +25 -0
  74. thds/mops/pure/core/serialize_big_objs.py +73 -0
  75. thds/mops/pure/core/serialize_paths.py +149 -0
  76. thds/mops/pure/core/source.py +291 -0
  77. thds/mops/pure/core/types.py +142 -0
  78. thds/mops/pure/core/uris.py +81 -0
  79. thds/mops/pure/core/use_runner.py +47 -0
  80. thds/mops/pure/joblib/__init__.py +1 -0
  81. thds/mops/pure/joblib/backend.py +81 -0
  82. thds/mops/pure/joblib/batching.py +67 -0
  83. thds/mops/pure/pickling/__init__.py +3 -0
  84. thds/mops/pure/pickling/_pickle.py +193 -0
  85. thds/mops/pure/pickling/memoize_only.py +22 -0
  86. thds/mops/pure/pickling/mprunner.py +173 -0
  87. thds/mops/pure/pickling/pickles.py +149 -0
  88. thds/mops/pure/pickling/remote.py +145 -0
  89. thds/mops/pure/pickling/sha256_b64.py +71 -0
  90. thds/mops/pure/runner/__init__.py +0 -0
  91. thds/mops/pure/runner/local.py +239 -0
  92. thds/mops/pure/runner/shim_builder.py +25 -0
  93. thds/mops/pure/runner/simple_shims.py +21 -0
  94. thds/mops/pure/runner/strings.py +1 -0
  95. thds/mops/pure/runner/types.py +28 -0
  96. thds/mops/pure/tools/__init__.py +0 -0
  97. thds/mops/pure/tools/history.py +35 -0
  98. thds/mops/pure/tools/inspect.py +372 -0
  99. thds/mops/pure/tools/sha256_b64_addressed.py +40 -0
  100. thds/mops/pure/tools/stress.py +63 -0
  101. thds/mops/pure/tools/summarize/__init__.py +4 -0
  102. thds/mops/pure/tools/summarize/cli.py +293 -0
  103. thds/mops/pure/tools/summarize/run_summary.py +143 -0
  104. thds/mops/py.typed +0 -0
  105. thds/mops/testing/__init__.py +0 -0
  106. thds/mops/testing/deferred_imports.py +81 -0
  107. thds.mops-3.6.20250219172032.dist-info/METADATA +42 -0
  108. thds.mops-3.6.20250219172032.dist-info/RECORD +111 -0
  109. thds.mops-3.6.20250219172032.dist-info/WHEEL +5 -0
  110. thds.mops-3.6.20250219172032.dist-info/entry_points.txt +7 -0
  111. thds.mops-3.6.20250219172032.dist-info/top_level.txt +1 -0
@@ -0,0 +1,61 @@
1
+ import inspect
2
+ import typing as ty
3
+
4
+ from ..types import Args, Kwargs
5
+
6
+ F = ty.TypeVar("F", bound=ty.Callable)
7
+
8
+
9
+ def argument_transformer(
10
+ func: F,
11
+ names_to_transforms: ty.Mapping[str, ty.Callable[[ty.Any], ty.Any]],
12
+ ) -> ty.Callable[[Args, Kwargs], ty.Tuple[Args, Kwargs]]:
13
+ """Overwrite bound arguments at call time with the value returned
14
+ by each named transform, whose names must each correspond to a
15
+ named parameter on the function.
16
+
17
+ The named transforms will each receive the 'raw' argument value,
18
+ and may choose whether to return it or something else. A mapping
19
+ of identity functions (foo=lambda x: x) would be, therefore,
20
+ functionally a no-op.
21
+
22
+ This not a decorator, but a decorator can easily be built on top of it.
23
+ """
24
+ signature = inspect.signature(func)
25
+ parameter_names = list(signature.parameters.keys())
26
+ unknown_parameters = set(names_to_transforms) - set(parameter_names)
27
+ if unknown_parameters:
28
+ # don't let this bad situation go any further...
29
+ raise ValueError(f"The function {func} does not have parameters {unknown_parameters}")
30
+
31
+ def xf_args_kwargs(args: Args, kwargs: Kwargs) -> ty.Tuple[Args, Kwargs]:
32
+ # Iterate over positional arguments and replace named ones
33
+ # with the value received from the callable.
34
+ pos_args = list(args) # mutable copy
35
+ for i, arg in enumerate(args):
36
+ if i < len(parameter_names):
37
+ param_name = parameter_names[i]
38
+ if param_name in names_to_transforms:
39
+ pos_args[i] = names_to_transforms[param_name](arg)
40
+
41
+ # Iterate over keyword arguments and replace named ones
42
+ # with the value received from the callable.
43
+ kwargs = dict(kwargs) # mutable copy
44
+ for param_name, arg in kwargs.items():
45
+ if param_name in names_to_transforms:
46
+ kwargs[param_name] = names_to_transforms[param_name](arg)
47
+
48
+ return pos_args, kwargs
49
+
50
+ return xf_args_kwargs
51
+
52
+
53
+ def parameter_overwriter(
54
+ func: F, names_to_values: ty.Mapping[str, ty.Any]
55
+ ) -> ty.Callable[[Args, Kwargs], ty.Tuple[Args, Kwargs]]:
56
+ """Overwrite parameters without regard to the actual argument values."""
57
+
58
+ def give_val(val: ty.Any) -> ty.Callable[[ty.Any], ty.Any]:
59
+ return lambda _: val
60
+
61
+ return argument_transformer(func, {name: give_val(val) for name, val in names_to_values.items()})
@@ -0,0 +1,103 @@
1
+ """Sometimes you want to require that a memoized result exists.
2
+
3
+ A Runner should hook into this system to enforce that upon itself.
4
+ """
5
+
6
+ import os
7
+ import typing as ty
8
+ from contextlib import contextmanager
9
+
10
+ from thds.core import config, log, stack_context
11
+
12
+ from ...._utils import colorize
13
+ from ..uris import lookup_blob_store
14
+
15
+ _REQUIRE_ALL_RESULTS = config.item("require_all_results", default="")
16
+ _UNLESS_ENV = stack_context.StackContext("results_unless_env", "")
17
+ # please do not set the above globally unless you really, truly know what you're doing.
18
+ logger = log.getLogger(__name__)
19
+ ORANGE = colorize.colorized("#FF8200")
20
+
21
+ _NO_MSG = "xxxx-REQUIRED-xxxx"
22
+
23
+
24
+ @contextmanager
25
+ def require_all(message: str = _NO_MSG, *, unless_env: str = "") -> ty.Iterator[None]:
26
+ """Requires all results from this point down the stack, _unless_ the non-empty env
27
+ variable specified is present in os.environ.
28
+
29
+ An empty string message will still force results to be required.
30
+ """
31
+ with _REQUIRE_ALL_RESULTS.set_local(message or _NO_MSG), _UNLESS_ENV.set(unless_env):
32
+ try:
33
+ yield
34
+ except RequiredResultNotFound as rrnf:
35
+ # re-raising gives us a cleaner stack trace from the point where the result was required
36
+ raise RequiredResultNotFound(rrnf.args[0], rrnf.uri) from rrnf
37
+
38
+
39
+ # _REQUIRED_FUNC_NAMES = set()
40
+
41
+
42
+ # def required(func: ty.Callable) -> None:
43
+ # pass
44
+
45
+
46
+ def _should_require_result(memo_uri: str = "") -> str:
47
+ requirement_msg = _REQUIRE_ALL_RESULTS()
48
+ if not requirement_msg:
49
+ return ""
50
+ envvar_to_override = _UNLESS_ENV()
51
+ is_envvar_set = envvar_to_override and envvar_to_override in os.environ
52
+ if is_envvar_set:
53
+ return ""
54
+ if envvar_to_override:
55
+ return f"{requirement_msg}; Note that you can set the environment variable {envvar_to_override} to skip this check."
56
+ return requirement_msg
57
+
58
+
59
+ class Success(ty.NamedTuple):
60
+ value_uri: str
61
+
62
+
63
+ class Error(ty.NamedTuple):
64
+ exception_uri: str
65
+
66
+
67
+ RESULT = "result"
68
+ EXCEPTION = "exception"
69
+
70
+
71
+ class RequiredResultNotFound(Exception):
72
+ def __init__(self, message: str, uri: str):
73
+ super().__init__(message)
74
+ self.uri = uri
75
+
76
+
77
+ def check_if_result_exists(
78
+ memo_uri: str,
79
+ rerun_excs: bool = False,
80
+ before_raise: ty.Callable[[], ty.Any] = lambda: None,
81
+ ) -> ty.Union[None, Success, Error]:
82
+ fs = lookup_blob_store(memo_uri)
83
+ value_uri = fs.join(memo_uri, RESULT)
84
+ if fs.exists(value_uri):
85
+ return Success(value_uri)
86
+
87
+ required_msg = _should_require_result(memo_uri)
88
+ if required_msg: # might be custom or the default. either way it indicates a required result.
89
+ before_raise()
90
+ error_msg = f"Required a result for {ORANGE(memo_uri)} but that result was not found"
91
+ # i'm tired of visually scanning for these memo_uris in logs.
92
+ if required_msg != _NO_MSG:
93
+ error_msg += f": {required_msg}"
94
+ raise RequiredResultNotFound(error_msg, memo_uri)
95
+
96
+ if rerun_excs:
97
+ return None
98
+
99
+ error_uri = fs.join(memo_uri, EXCEPTION)
100
+ if fs.exists(error_uri):
101
+ return Error(error_uri)
102
+
103
+ return None
@@ -0,0 +1,70 @@
1
+ """Provides for precise cache invalidation via a known key in the docstring.
2
+
3
+ `function-logic-key`:
4
+
5
+ By modifying this key in the docstring of a use_runner-decorated
6
+ function or callable class, you can indicate to mops that although the
7
+ function name has not changed (perhaps because of refactoring
8
+ concerns), and although the parameters may be the same and take the
9
+ same values as a previous run, nevertheless the (value of the)
10
+ internal logic has changed and therefore no previously memoized
11
+ results should be returned when running this function.
12
+
13
+ This step is entirely optional and is expected only to be used for
14
+ advanced use cases where optimal caching/memoization is required.
15
+
16
+ The keys may be any string without spaces, but ideally should be
17
+ semantically meaningful to another developer or yourself, i.e. a name
18
+ or description of some sort. Examples might be:
19
+
20
+ function-logic-key: v1
21
+ function-logic-key: 2023-03-31
22
+ function-logic-key: try-uint8-math
23
+
24
+ """
25
+
26
+ import inspect
27
+ import re
28
+ import typing as ty
29
+ from functools import lru_cache
30
+
31
+ from thds.mops._utils.names import full_name_and_callable
32
+
33
+ _DOCSTRING_VERSION_RE = re.compile(r".*function-logic-key:\s+(?P<version>[^\s]+)\b", re.DOTALL)
34
+
35
+
36
+ def _parse_logic_key(doc: str) -> str:
37
+ m = _DOCSTRING_VERSION_RE.match(doc)
38
+ return m.group("version") if m else ""
39
+
40
+
41
+ def extract_function_logic_key_from_docstr(obj: ty.Any) -> str:
42
+ doc = getattr(obj, "__doc__", "") or ""
43
+ return _parse_logic_key(doc)
44
+
45
+
46
+ extract_logic_key_from_docstr = extract_function_logic_key_from_docstr
47
+
48
+
49
+ @lru_cache(maxsize=None)
50
+ def make_unique_name_including_docstring_key(f: ty.Any) -> str:
51
+ module_and_name, callable = full_name_and_callable(f)
52
+ version = ""
53
+ for attr, value in inspect.getmembers(callable):
54
+ if attr == "__doc__" and value:
55
+ version = _parse_logic_key(value)
56
+ return f"{module_and_name}@{version}".rstrip("@")
57
+
58
+
59
+ class FunctionComponents(ty.NamedTuple):
60
+ module: str
61
+ name: str
62
+ function_logic_key: str
63
+
64
+
65
+ def parse_unique_name(full_function_name: str) -> FunctionComponents:
66
+ assert "--" in full_function_name, f"Expected '--' in {full_function_name}"
67
+ module, name = full_function_name.split("--")
68
+ if "@" not in name:
69
+ return FunctionComponents(module, name, "")
70
+ return FunctionComponents(module, *name.split("@"))
@@ -0,0 +1,230 @@
1
+ """This is where we put implementation details having to do with the new metadata system in
2
+ v3 of mops.
3
+
4
+ Metadata is anything that is not critical to the core operation of mops but is useful for
5
+ debugging, monitoring, or other purposes.
6
+ """
7
+
8
+ import argparse
9
+ import getpass
10
+ import os
11
+ import typing as ty
12
+ from dataclasses import dataclass
13
+ from datetime import datetime
14
+
15
+ from thds.core import calgitver, config, hostname
16
+
17
+ try:
18
+ _CALGITVER = calgitver.calgitver()
19
+ except calgitver.git.NO_GIT:
20
+ _CALGITVER = ""
21
+
22
+
23
+ INVOKER_CODE_VERSION = config.item("mops.metadata.local.invoker_code_version", _CALGITVER)
24
+ INVOKED_BY = config.item("mops.metadata.local.invoked_by", "")
25
+ REMOTE_CODE_VERSION = config.item("mops.metadata.remote.code_version", "")
26
+ # set the remote code version inside your docker image or other environment.
27
+
28
+
29
+ def get_invoker_code_version() -> str:
30
+ return INVOKER_CODE_VERSION() or "unknown"
31
+
32
+
33
+ def get_invoked_by() -> str:
34
+ return INVOKED_BY() or f"{getpass.getuser()}@{hostname.friendly()}"
35
+
36
+
37
+ @dataclass
38
+ class InvocationMetadata:
39
+ """Metadata values may not contain spaces."""
40
+
41
+ invoked_at: datetime
42
+ invoked_by: str # a more semantic identifier of 'who' called the function. This should be
43
+ # passed recursively to other invocations.
44
+ invoker_code_version: str
45
+ # ^ Collectively: the 'ABC's of the invocation metadata.
46
+
47
+ invoker_uuid: str # the writer_uuid from the lock
48
+
49
+ pipeline_id: str
50
+ # technically not _just_ metadata, because it is used directly in
51
+ # memoization. but this is a more convenient way to pass alongside
52
+ # everything else that is used for debugging and monitoring.
53
+
54
+ @staticmethod
55
+ def new(pipeline_id: str, invoked_at: datetime, invoker_uuid: str) -> "InvocationMetadata":
56
+ return InvocationMetadata(
57
+ pipeline_id=pipeline_id,
58
+ invoker_code_version=get_invoker_code_version(),
59
+ invoker_uuid=invoker_uuid,
60
+ invoked_at=invoked_at,
61
+ invoked_by=get_invoked_by(),
62
+ )
63
+
64
+
65
+ def get_remote_code_version(invoker_code_version: str) -> str:
66
+ return (
67
+ REMOTE_CODE_VERSION()
68
+ or os.getenv("CALGITVER")
69
+ or os.getenv("THDS_APP_VERSION")
70
+ # these env var fallbacks are specifically for THDS internal use.
71
+ # Control is exposed via the official config item.
72
+ or invoker_code_version # in a local-run context, use whatever was set explicitly, if anything.
73
+ )
74
+
75
+
76
+ @dataclass
77
+ class ResultMetadata(InvocationMetadata):
78
+ remote_code_version: str
79
+ remote_started_at: datetime
80
+ remote_ended_at: datetime
81
+ # the below are redundant but useful to have precomputed:
82
+ remote_wall_minutes: float # between remote_started_at and remote_ended_at
83
+ result_wall_minutes: float # between remote_ended_at and invoked_at
84
+ # we're using minutes because it's a more human-friendly unit of time,
85
+ # and if you want the raw seconds you can always compute it from the original datetimes.
86
+
87
+ @staticmethod
88
+ def from_invocation(
89
+ invocation_metadata: InvocationMetadata, started_at: datetime, ended_at: datetime
90
+ ) -> "ResultMetadata":
91
+ return ResultMetadata(
92
+ **vars(invocation_metadata),
93
+ remote_code_version=get_remote_code_version(invocation_metadata.invoker_code_version),
94
+ remote_started_at=started_at,
95
+ remote_ended_at=ended_at,
96
+ remote_wall_minutes=(ended_at - started_at).total_seconds() / 60,
97
+ result_wall_minutes=(ended_at - invocation_metadata.invoked_at).total_seconds() / 60,
98
+ )
99
+
100
+
101
+ def invocation_metadata_parser(
102
+ parser: ty.Optional[argparse.ArgumentParser] = None,
103
+ ) -> argparse.ArgumentParser:
104
+ parser = parser or argparse.ArgumentParser()
105
+ assert parser
106
+ parser.add_argument(
107
+ "--invoked-by",
108
+ help="Who invoked this function. Will be used recursively (for nested functions).",
109
+ required=True,
110
+ )
111
+ parser.add_argument(
112
+ "--invoker-code-version",
113
+ help="The version of the code that is running. Usually a CalGitVer, but can be any non-empty string.",
114
+ required=True,
115
+ )
116
+ parser.add_argument(
117
+ "--invoked-at",
118
+ help="The time at which this function was invoked. Should be an ISO8601 timestamp.",
119
+ type=datetime.fromisoformat,
120
+ required=True,
121
+ )
122
+ parser.add_argument(
123
+ "--invoker-uuid",
124
+ help="The UUID of the invoker. This is generally the writer UUID from the lock.",
125
+ )
126
+ parser.add_argument("--pipeline-id", required=True)
127
+ return parser
128
+
129
+
130
+ def result_metadata_parser() -> argparse.ArgumentParser:
131
+ parser = invocation_metadata_parser()
132
+ parser.add_argument(
133
+ "--remote-code-version",
134
+ help="The version of the code that ran remotely. Usually a CalGitVer, but can be any non-empty string.",
135
+ )
136
+ parser.add_argument(
137
+ "--remote-started-at",
138
+ help="The time at which this function started. Should be an ISO8601 timestamp.",
139
+ type=datetime.fromisoformat,
140
+ required=True,
141
+ )
142
+ parser.add_argument(
143
+ "--remote-ended-at",
144
+ help="The time at which this function ended. Should be an ISO8601 timestamp.",
145
+ type=datetime.fromisoformat,
146
+ required=True,
147
+ )
148
+ parser.add_argument(
149
+ "--remote-wall-minutes",
150
+ help="The computed wall time in minutes between the remote start and end times.",
151
+ type=float,
152
+ )
153
+ parser.add_argument(
154
+ "--result-wall-minutes",
155
+ help="The computed wall time in minutes between the remote end and the invocation time.",
156
+ type=float,
157
+ )
158
+ return parser
159
+
160
+
161
+ def parse_invocation_metadata_args(args: ty.Sequence[str]) -> InvocationMetadata:
162
+ """Parse metadata args from the command line.
163
+
164
+ Metadata args are of the form --key-name=value.
165
+ """
166
+ metadata, _ = invocation_metadata_parser().parse_known_args(args)
167
+ return InvocationMetadata(**vars(metadata))
168
+
169
+
170
+ def parse_result_metadata(metadata_keyvals: ty.Sequence[str]) -> ResultMetadata:
171
+ """Parse metadata values from a result list.
172
+
173
+ Metadata args are of the form key=value, and are separated by commas.
174
+
175
+ Usually you'll be splitting an initial text string on newline, but we don't do that for you here.
176
+ """
177
+
178
+ def to_arg(kv: str) -> str:
179
+ key, value = kv.split("=", 1)
180
+ return f"--{key.replace('_', '-')}={value}"
181
+
182
+ metadata = result_metadata_parser().parse_args([to_arg(kv) for kv in metadata_keyvals if kv])
183
+ return ResultMetadata(**vars(metadata))
184
+
185
+
186
+ def _format_metadata(
187
+ metadata: ty.Union[InvocationMetadata, ResultMetadata], prefix: str
188
+ ) -> ty.List[str]:
189
+ """Format metadata args for the command line OR for the header in a result payload.
190
+
191
+ Metadata args are of the form key=value, and are separated by commas.
192
+ """
193
+
194
+ def to_str(value: ty.Any) -> str:
195
+ if isinstance(value, datetime):
196
+ return value.isoformat()
197
+ return str(value)
198
+
199
+ def nospaces_to_str(value: ty.Any) -> str:
200
+ s = to_str(value)
201
+ if " " in s:
202
+ raise ValueError(f"Metadata value {s} contains a space. This is illegal")
203
+ return s
204
+
205
+ return [
206
+ f"{prefix}{k.replace('_', '-')}={nospaces_to_str(v)}"
207
+ for k, v in vars(metadata).items()
208
+ if v is not None and v != ""
209
+ ]
210
+
211
+
212
+ def format_invocation_cli_args(metadata: InvocationMetadata) -> ty.List[str]:
213
+ return _format_metadata(metadata, prefix="--")
214
+
215
+
216
+ def format_result_header(metadata: ResultMetadata) -> str:
217
+ """Includes separating newlines and a trailing newline."""
218
+ return "\n".join(_format_metadata(metadata, prefix="")) + "\n"
219
+
220
+
221
+ def format_end_of_run_times(start_timestamp: float, maybe_metadata_args: ty.Sequence[str]) -> str:
222
+ import time
223
+
224
+ try:
225
+ meta = parse_invocation_metadata_args(maybe_metadata_args)
226
+ wait_time = start_timestamp - meta.invoked_at.timestamp()
227
+ total_time = time.time() - meta.invoked_at.timestamp()
228
+ return f" (waited {wait_time/60:.2f} minutes, total time {total_time/60:.2f} minutes) - version: {meta.invoker_code_version}"
229
+ except Exception:
230
+ return ""
@@ -0,0 +1,52 @@
1
+ ## utilities for providing remote context for naming things uniquely:
2
+ import typing as ty
3
+
4
+ from thds.core.stack_context import StackContext
5
+
6
+ from . import types, uris
7
+
8
+ PipelineFunctionUniqueKey = StackContext("Mops2PipelineFunctionUniqueKey", default="")
9
+ FunctionArgumentsHashUniqueKey = StackContext("Mops2FunctionArgumentsHashUniqueKey", default="")
10
+
11
+
12
+ def pipeline_function_invocation_unique_key() -> ty.Optional[ty.Tuple[str, str]]:
13
+ """A runner may provide a value for the underlying components, and
14
+ if it does, the first string is required to be unique across all
15
+ _separate_ functions running within a given pipeline id, and the
16
+ second string is required to be unique for every unique invocation
17
+ of that same function.
18
+
19
+ If your code is _not_ running inside a mops runner, or
20
+ the mops runner does not provide a value for this, you will
21
+ instead get None.
22
+ """
23
+ pfi_key = PipelineFunctionUniqueKey(), FunctionArgumentsHashUniqueKey()
24
+ if "" in pfi_key: # if either of the elements was not supplied, we don't have anything!
25
+ return None
26
+ return pfi_key
27
+
28
+
29
+ def invocation_output_uri(storage_root: uris.UriIsh = "", name: str = "") -> str:
30
+ """If your function only outputs a single blob, you can safely use this without
31
+ providing a name. However, if you have multiple outputs from the same invocation, you
32
+ must provide a meaningful name for each one.
33
+
34
+ As an example:
35
+
36
+ <pipeline> <function mod/name > <your name > <args,kwargs hash >
37
+ nppes/2023/thds.nppes.intake:run/<name goes here>/CoastOilAsset.IVZ9KplQKlNgxQHav0jIMUS9p4Kbn3N481e0Uvs
38
+ """
39
+ storage_root = str(storage_root or uris.ACTIVE_STORAGE_ROOT())
40
+ pf_fa = pipeline_function_invocation_unique_key()
41
+ if not pf_fa:
42
+ raise types.NotARunnerContext(
43
+ "`invocation_output_uri` must be used in a `thds.mops.pure` runner context."
44
+ )
45
+ pipeline_function_key, function_arguments_key = pf_fa
46
+ return uris.lookup_blob_store(storage_root).join(
47
+ storage_root,
48
+ pipeline_function_key,
49
+ "--".join(filter(None, [name, function_arguments_key])),
50
+ name,
51
+ # we use the name twice now, so that the final part of the path also has a file extension
52
+ )
@@ -0,0 +1,15 @@
1
+ # Sometimes you need to 'unwrap' a partial. This will let you do that.
2
+ import typing as ty
3
+ from functools import partial
4
+
5
+ from .types import Args, Kwargs, T
6
+
7
+
8
+ def unwrap_partial(
9
+ func: ty.Callable[..., T], args: Args, kwargs: Kwargs
10
+ ) -> ty.Tuple[ty.Callable[..., T], Args, Kwargs]:
11
+ while isinstance(func, partial):
12
+ args = func.args + tuple(args)
13
+ kwargs = {**func.keywords, **kwargs}
14
+ func = func.func
15
+ return func, args, kwargs
@@ -0,0 +1,62 @@
1
+ # This file must not import anything else from `remote.core` - it is a 'leaf' of our tree
2
+ # because it is depended upon by so many other things.
3
+ import os
4
+ from datetime import datetime
5
+
6
+ from thds.core import hostname, log, meta
7
+
8
+ from ..._utils.colorize import colorized
9
+
10
+ # this is a global instead of a StackContext because we _do_ want it
11
+ # to spill over automatically into new threads.
12
+ _PIPELINE_ID = ""
13
+ logger = log.getLogger(__name__)
14
+
15
+
16
+ def __set_or_generate_pipeline_id_if_empty() -> None:
17
+ some_unique_name = meta.get_repo_name() or os.getenv("THDS_DOCKER_IMAGE_NAME") or ""
18
+ clean_commit = meta.get_commit()[:7] if meta.is_clean() else ""
19
+ named_clean_commit = (
20
+ f"{some_unique_name}/{clean_commit}" if some_unique_name and clean_commit else ""
21
+ )
22
+
23
+ def gen_pipeline_id() -> str:
24
+ pipeline_id = (
25
+ hostname.friendly() # host name can be a group/directory now
26
+ + "/"
27
+ + "-".join(
28
+ [
29
+ datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
30
+ f"p{os.getpid()}",
31
+ ]
32
+ )
33
+ )
34
+ logger.warning(
35
+ colorized(fg="black", bg="yellow")(f"Generated pipeline id '{pipeline_id}' for this run")
36
+ )
37
+ return pipeline_id
38
+
39
+ set_pipeline_id(named_clean_commit or gen_pipeline_id())
40
+
41
+
42
+ def get_pipeline_id() -> str:
43
+ """This will return the stack-local pipeline id, if set, or, if
44
+ that is not set, will generate a global pipeline id and return
45
+ that.
46
+
47
+ Once a global pipeline id is generated, it will not be
48
+ regenerated, although it can be overridden as a global with
49
+ set_pipeline_id, and overridden for the stack with
50
+ """
51
+ if not _PIPELINE_ID:
52
+ __set_or_generate_pipeline_id_if_empty()
53
+ assert _PIPELINE_ID
54
+ return _PIPELINE_ID
55
+
56
+
57
+ def set_pipeline_id(new_pipeline_id: str) -> None:
58
+ """Override the current global pipeline id."""
59
+ if not new_pipeline_id:
60
+ return # quietly disallow empty strings, since we always want a value here.
61
+ global _PIPELINE_ID
62
+ _PIPELINE_ID = new_pipeline_id
@@ -0,0 +1,79 @@
1
+ """Public API for masking the mops pipeline id.
2
+ """
3
+
4
+ import re
5
+ import typing as ty
6
+ from contextlib import contextmanager
7
+ from functools import lru_cache
8
+
9
+ from thds.core.stack_context import StackContext
10
+
11
+ from .pipeline_id import get_pipeline_id
12
+
13
+ _PIPELINE_ID_MASK = StackContext("PIPELINE_ID_MASK", "")
14
+
15
+
16
+ def get_pipeline_id_mask() -> str:
17
+ """Returns the 'current' pipeline id, preferring a mask over the global."""
18
+ return _PIPELINE_ID_MASK() or get_pipeline_id()
19
+
20
+
21
+ @contextmanager
22
+ def pipeline_id_mask(pipeline_id: str) -> ty.Iterator[bool]:
23
+ """Sets the pipeline id, but if it's already set, then the outer
24
+ mask will take precedence over this one, i.e. this will be a
25
+ no-op.
26
+
27
+ Is a decorator as well as a ContextManager, thanks to the magic of
28
+ @contextmanager. 🤯
29
+
30
+ When used as a Context Manager, be aware that it will not be
31
+ applied to threads launched by the current thread. To cross thread
32
+ boundaries, prefer decorating an actual function that will then be
33
+ launched in the thread.
34
+
35
+ When used as a context manager, return True if this is the
36
+ outermost layer and will actually be applied to the function;
37
+ return False if not.
38
+
39
+ The outermost configuration on this particular thread/green thread
40
+ stack will be used. This pattern is very useful for libraries that
41
+ want to define a default pipeline_id for their
42
+ use_runner-decorated function.
43
+
44
+ """
45
+ if _PIPELINE_ID_MASK():
46
+ yield False
47
+ else:
48
+ with _PIPELINE_ID_MASK.set(pipeline_id):
49
+ yield True
50
+
51
+
52
+ F = ty.TypeVar("F", bound=ty.Callable)
53
+ _DOCSTRING_MASK_RE = re.compile(r".*pipeline-id(?:-mask)?:\s*(?P<pipeline_id>[^\s]+)\b", re.DOTALL)
54
+ # for backward-compatibility, we support pipeline-id-mask, even though the clearer name is
55
+ # ultimately pipeline-id.
56
+
57
+
58
+ @lru_cache(maxsize=32)
59
+ def extract_from_docstr(func: F, require: bool = True) -> str:
60
+ if not func.__doc__:
61
+ if not require:
62
+ return ""
63
+ raise ValueError(f"Function {func} must have a non-empty docstring to extract pipeline-id")
64
+ m = _DOCSTRING_MASK_RE.match(func.__doc__)
65
+ if not m:
66
+ if "pipeline-id:" in func.__doc__ or "pipeline-id-mask:" in func.__doc__:
67
+ raise ValueError("pipeline-id is present but empty - this is probably an accident")
68
+ if not require:
69
+ return ""
70
+ raise ValueError(f"Cannot extract pipeline-id from docstring for {func}")
71
+ mask = m.group("pipeline_id")
72
+ assert mask, "pipeline-id should not have matched if it is empty"
73
+ return mask
74
+
75
+
76
+ @contextmanager
77
+ def including_function_docstr(f: F) -> ty.Iterator[str]:
78
+ with pipeline_id_mask(extract_from_docstr(f, require=False)):
79
+ yield get_pipeline_id_mask()