thds.mops 3.6.20250219172032__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.mops might be problematic. Click here for more details.
- thds/mops/__about__.py +8 -0
- thds/mops/__init__.py +3 -0
- thds/mops/_compat.py +6 -0
- thds/mops/_utils/__init__.py +0 -0
- thds/mops/_utils/colorize.py +110 -0
- thds/mops/_utils/config_tree.py +167 -0
- thds/mops/_utils/exception.py +16 -0
- thds/mops/_utils/locked_cache.py +78 -0
- thds/mops/_utils/names.py +23 -0
- thds/mops/_utils/on_slow.py +28 -0
- thds/mops/_utils/once.py +30 -0
- thds/mops/_utils/temp.py +32 -0
- thds/mops/config.py +60 -0
- thds/mops/impure/__init__.py +2 -0
- thds/mops/impure/keyfunc.py +14 -0
- thds/mops/impure/runner.py +73 -0
- thds/mops/k8s/__init__.py +27 -0
- thds/mops/k8s/_shared.py +3 -0
- thds/mops/k8s/apply_yaml.py +22 -0
- thds/mops/k8s/auth.py +49 -0
- thds/mops/k8s/config.py +37 -0
- thds/mops/k8s/container_registry.py +14 -0
- thds/mops/k8s/jobs.py +57 -0
- thds/mops/k8s/launch.py +234 -0
- thds/mops/k8s/logging.py +239 -0
- thds/mops/k8s/namespace.py +17 -0
- thds/mops/k8s/node_selection.py +58 -0
- thds/mops/k8s/retry.py +75 -0
- thds/mops/k8s/too_old_resource_version.py +42 -0
- thds/mops/k8s/tools/krsync.py +50 -0
- thds/mops/k8s/tools/krsync.sh +22 -0
- thds/mops/k8s/wait_job.py +72 -0
- thds/mops/k8s/warn_image_backoff.py +63 -0
- thds/mops/k8s/watch.py +266 -0
- thds/mops/meta.json +8 -0
- thds/mops/parallel.py +36 -0
- thds/mops/pure/__init__.py +43 -0
- thds/mops/pure/_magic/__init__.py +0 -0
- thds/mops/pure/_magic/api.py +114 -0
- thds/mops/pure/_magic/sauce.py +152 -0
- thds/mops/pure/_magic/shims.py +34 -0
- thds/mops/pure/adls/__init__.py +1 -0
- thds/mops/pure/adls/_files.py +22 -0
- thds/mops/pure/adls/blob_store.py +185 -0
- thds/mops/pure/adls/output_fqn.py +17 -0
- thds/mops/pure/core/__init__.py +0 -0
- thds/mops/pure/core/content_addressed.py +31 -0
- thds/mops/pure/core/deferred_work.py +83 -0
- thds/mops/pure/core/entry/__init__.py +2 -0
- thds/mops/pure/core/entry/main.py +47 -0
- thds/mops/pure/core/entry/route_result.py +66 -0
- thds/mops/pure/core/entry/runner_registry.py +31 -0
- thds/mops/pure/core/file_blob_store.py +120 -0
- thds/mops/pure/core/lock/__init__.py +7 -0
- thds/mops/pure/core/lock/_acquire.py +192 -0
- thds/mops/pure/core/lock/_funcs.py +37 -0
- thds/mops/pure/core/lock/cli.py +73 -0
- thds/mops/pure/core/lock/maintain.py +150 -0
- thds/mops/pure/core/lock/read.py +39 -0
- thds/mops/pure/core/lock/types.py +37 -0
- thds/mops/pure/core/lock/write.py +136 -0
- thds/mops/pure/core/memo/__init__.py +6 -0
- thds/mops/pure/core/memo/function_memospace.py +267 -0
- thds/mops/pure/core/memo/keyfunc.py +53 -0
- thds/mops/pure/core/memo/overwrite_params.py +61 -0
- thds/mops/pure/core/memo/results.py +103 -0
- thds/mops/pure/core/memo/unique_name_for_function.py +70 -0
- thds/mops/pure/core/metadata.py +230 -0
- thds/mops/pure/core/output_naming.py +52 -0
- thds/mops/pure/core/partial.py +15 -0
- thds/mops/pure/core/pipeline_id.py +62 -0
- thds/mops/pure/core/pipeline_id_mask.py +79 -0
- thds/mops/pure/core/script_support.py +25 -0
- thds/mops/pure/core/serialize_big_objs.py +73 -0
- thds/mops/pure/core/serialize_paths.py +149 -0
- thds/mops/pure/core/source.py +291 -0
- thds/mops/pure/core/types.py +142 -0
- thds/mops/pure/core/uris.py +81 -0
- thds/mops/pure/core/use_runner.py +47 -0
- thds/mops/pure/joblib/__init__.py +1 -0
- thds/mops/pure/joblib/backend.py +81 -0
- thds/mops/pure/joblib/batching.py +67 -0
- thds/mops/pure/pickling/__init__.py +3 -0
- thds/mops/pure/pickling/_pickle.py +193 -0
- thds/mops/pure/pickling/memoize_only.py +22 -0
- thds/mops/pure/pickling/mprunner.py +173 -0
- thds/mops/pure/pickling/pickles.py +149 -0
- thds/mops/pure/pickling/remote.py +145 -0
- thds/mops/pure/pickling/sha256_b64.py +71 -0
- thds/mops/pure/runner/__init__.py +0 -0
- thds/mops/pure/runner/local.py +239 -0
- thds/mops/pure/runner/shim_builder.py +25 -0
- thds/mops/pure/runner/simple_shims.py +21 -0
- thds/mops/pure/runner/strings.py +1 -0
- thds/mops/pure/runner/types.py +28 -0
- thds/mops/pure/tools/__init__.py +0 -0
- thds/mops/pure/tools/history.py +35 -0
- thds/mops/pure/tools/inspect.py +372 -0
- thds/mops/pure/tools/sha256_b64_addressed.py +40 -0
- thds/mops/pure/tools/stress.py +63 -0
- thds/mops/pure/tools/summarize/__init__.py +4 -0
- thds/mops/pure/tools/summarize/cli.py +293 -0
- thds/mops/pure/tools/summarize/run_summary.py +143 -0
- thds/mops/py.typed +0 -0
- thds/mops/testing/__init__.py +0 -0
- thds/mops/testing/deferred_imports.py +81 -0
- thds.mops-3.6.20250219172032.dist-info/METADATA +42 -0
- thds.mops-3.6.20250219172032.dist-info/RECORD +111 -0
- thds.mops-3.6.20250219172032.dist-info/WHEEL +5 -0
- thds.mops-3.6.20250219172032.dist-info/entry_points.txt +7 -0
- thds.mops-3.6.20250219172032.dist-info/top_level.txt +1 -0
thds/mops/__about__.py
ADDED
thds/mops/__init__.py
ADDED
thds/mops/_compat.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
import random
|
|
3
|
+
import typing as ty
|
|
4
|
+
from functools import partial
|
|
5
|
+
|
|
6
|
+
from colors import color, csscolors
|
|
7
|
+
|
|
8
|
+
pref = "\033["
|
|
9
|
+
reset = f"{pref}0m"
|
|
10
|
+
|
|
11
|
+
_RESERVED_COLORS = [
|
|
12
|
+
"black",
|
|
13
|
+
# Various whitish-looking colors
|
|
14
|
+
"aliceblue",
|
|
15
|
+
"antiquewhite",
|
|
16
|
+
"floralwhite",
|
|
17
|
+
"ghostwhite",
|
|
18
|
+
"ivory",
|
|
19
|
+
"white",
|
|
20
|
+
"whitesmoke",
|
|
21
|
+
"snow",
|
|
22
|
+
"seashell",
|
|
23
|
+
"mintcream",
|
|
24
|
+
"honeydew",
|
|
25
|
+
"azure",
|
|
26
|
+
"beige",
|
|
27
|
+
"cornsilk",
|
|
28
|
+
"floralwhite",
|
|
29
|
+
# These are pretty illegible on a black background
|
|
30
|
+
"darkblue",
|
|
31
|
+
"indigo",
|
|
32
|
+
"mediumblue",
|
|
33
|
+
"navy",
|
|
34
|
+
"purple",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
_PREFERRED_COLORS = [
|
|
38
|
+
"mediumseagreen",
|
|
39
|
+
"cornflowerblue",
|
|
40
|
+
"gold",
|
|
41
|
+
"salmon",
|
|
42
|
+
"violet",
|
|
43
|
+
"limegreen",
|
|
44
|
+
"dodgerblue",
|
|
45
|
+
"goldenrod",
|
|
46
|
+
"indianred",
|
|
47
|
+
"fuchsia",
|
|
48
|
+
"forestgreen",
|
|
49
|
+
"royalblue",
|
|
50
|
+
"yellow",
|
|
51
|
+
"chocolate",
|
|
52
|
+
"palevioletred",
|
|
53
|
+
"mediumspringgreen",
|
|
54
|
+
"deepskyblue",
|
|
55
|
+
"khaki",
|
|
56
|
+
"red",
|
|
57
|
+
"deeppink",
|
|
58
|
+
"seagreen",
|
|
59
|
+
"cyan",
|
|
60
|
+
"greenyellow",
|
|
61
|
+
"sandybrown",
|
|
62
|
+
"orchid",
|
|
63
|
+
"lightgreen",
|
|
64
|
+
"steelblue",
|
|
65
|
+
"darkgoldenrod",
|
|
66
|
+
"coral",
|
|
67
|
+
"darkorchid",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _start_from(color_list: ty.List[str], index: int) -> ty.List[str]:
|
|
72
|
+
return color_list[index:] + color_list[:index]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _preferred_randgreen_start() -> ty.List[str]:
|
|
76
|
+
return _start_from(_PREFERRED_COLORS, random.randint(0, 6) * 5)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _all_colors() -> ty.List[str]:
|
|
80
|
+
forbidden_colors = {csscolors.css_colors[name] for name in _RESERVED_COLORS}
|
|
81
|
+
used_colors = {csscolors.css_colors[name] for name in _PREFERRED_COLORS}
|
|
82
|
+
assert len(used_colors) == len(_PREFERRED_COLORS) # assert no RGB dupes in the preferred list
|
|
83
|
+
all_colors = list(csscolors.css_colors.items())
|
|
84
|
+
random.shuffle(all_colors)
|
|
85
|
+
return _preferred_randgreen_start() + [
|
|
86
|
+
name
|
|
87
|
+
for name, rgb in all_colors
|
|
88
|
+
if rgb not in used_colors
|
|
89
|
+
and not used_colors.add(rgb) # type: ignore
|
|
90
|
+
and rgb not in forbidden_colors
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
next_color = ty.cast(ty.Callable[[], str], partial(next, itertools.cycle(_all_colors())))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def colorized(fg: str, bg: str = "", style: str = "") -> ty.Callable[[str], str]:
|
|
98
|
+
def colorize(s: str) -> str:
|
|
99
|
+
return color(s, fg=fg, bg=bg, style=style)
|
|
100
|
+
|
|
101
|
+
return colorize
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def make_colorized_out(
|
|
105
|
+
colorized: ty.Callable[[str], str], *, fmt_str: str = "{}", out: ty.Callable[[str], ty.Any] = print
|
|
106
|
+
) -> ty.Callable[[str], None]:
|
|
107
|
+
def _out(s: str) -> None:
|
|
108
|
+
out(colorized(fmt_str.format(s)))
|
|
109
|
+
|
|
110
|
+
return _out
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import types
|
|
3
|
+
import typing as ty
|
|
4
|
+
from functools import partial
|
|
5
|
+
|
|
6
|
+
from thds import core
|
|
7
|
+
|
|
8
|
+
from .names import full_name_and_callable
|
|
9
|
+
|
|
10
|
+
IGNORED_PACKAGES = ["thds.mops"]
|
|
11
|
+
# if a library needs to build on top of mops, it can put itself into this
|
|
12
|
+
# list and we'll ignore it when looking for the 'true calling frame'
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_first_external_module(ignore_packages: ty.Collection[str] = IGNORED_PACKAGES) -> str:
|
|
16
|
+
frame = inspect.currentframe()
|
|
17
|
+
if not frame:
|
|
18
|
+
return ""
|
|
19
|
+
|
|
20
|
+
while frame := frame.f_back: # type: ignore
|
|
21
|
+
module_name = frame.f_globals["__name__"]
|
|
22
|
+
is_ignored = False
|
|
23
|
+
for ignore_package in ignore_packages:
|
|
24
|
+
if module_name.startswith(ignore_package):
|
|
25
|
+
is_ignored = True
|
|
26
|
+
break
|
|
27
|
+
|
|
28
|
+
if not is_ignored:
|
|
29
|
+
return module_name
|
|
30
|
+
return "" # fallback if no external caller found
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
Pathable = ty.Union[str, types.ModuleType, ty.Callable, None]
|
|
34
|
+
_NONE = object()
|
|
35
|
+
V = ty.TypeVar("V")
|
|
36
|
+
logger = core.log.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def to_dotted_path(pathable: Pathable) -> str:
|
|
40
|
+
if isinstance(pathable, str):
|
|
41
|
+
return pathable
|
|
42
|
+
|
|
43
|
+
if pathable is None:
|
|
44
|
+
if not (module_path := _get_first_external_module()):
|
|
45
|
+
raise ValueError(f"Found no module outside mops within {pathable}")
|
|
46
|
+
return module_path
|
|
47
|
+
|
|
48
|
+
if isinstance(pathable, types.ModuleType):
|
|
49
|
+
return pathable.__name__
|
|
50
|
+
|
|
51
|
+
return full_name_and_callable(pathable)[0].replace("--", ".")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ConfigTree(ty.Generic[V]):
|
|
55
|
+
"""This is a cute little utility class for applying homogeneously-typed configuration
|
|
56
|
+
following hierarchical (tree-like) paths.
|
|
57
|
+
|
|
58
|
+
Generally, the config closest to the 'leaf' path will be used, but there is also a
|
|
59
|
+
'mask' option to override subtrees.
|
|
60
|
+
|
|
61
|
+
Imagine you have some modules:
|
|
62
|
+
- foo.bar.baz.materialize
|
|
63
|
+
- foo.bar.quux.materialize
|
|
64
|
+
- foo.george.materialize
|
|
65
|
+
- foo.steve.materialize
|
|
66
|
+
|
|
67
|
+
in each of which you have several materialization functions using mops.
|
|
68
|
+
|
|
69
|
+
Some API might construct one of these objects to afford you a way to 'set' the config
|
|
70
|
+
at each level of your hierarchy.
|
|
71
|
+
|
|
72
|
+
Inside foo.bar.__init__.py, you could call
|
|
73
|
+
|
|
74
|
+
- `the_api.setv(a_config_object)`
|
|
75
|
+
|
|
76
|
+
and this would set the config for anything where the module path to it included foo.bar.
|
|
77
|
+
|
|
78
|
+
But if foo.bar.baz.materialize wanted to set config for everything inside itself,
|
|
79
|
+
at the top of that module you'd call
|
|
80
|
+
|
|
81
|
+
- `the_api.setv(diff_config_object)`
|
|
82
|
+
|
|
83
|
+
and this would set the config for that module only.
|
|
84
|
+
|
|
85
|
+
If you need to _override_ the config for an entire subtree, we call this masking.
|
|
86
|
+
You can call the_api.setv(value, mask=True) to mask the subtree.
|
|
87
|
+
|
|
88
|
+
- `the_api.setv(value, 'foo', mask=True)`
|
|
89
|
+
|
|
90
|
+
will mask everything under foo, including bar, george, and steve.
|
|
91
|
+
|
|
92
|
+
This isn't truly limited to modules, either - you can pass any module _or_ callable
|
|
93
|
+
in to setv as the object from which you want us to derive a dot-separated path.
|
|
94
|
+
Or you can pass in an arbitrary dot-separated string and we'll use it verbatim.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(self, name: str, parse: ty.Optional[ty.Callable[[ty.Any], V]] = None):
|
|
98
|
+
"""If provided, parse must be an idempotent parser. In other words, parse(parse(x)) == parse(x)"""
|
|
99
|
+
self.registry = core.config.ConfigRegistry(name)
|
|
100
|
+
self.parse = parse or (lambda v: v)
|
|
101
|
+
self._make_config = partial(
|
|
102
|
+
core.config.ConfigItem[V], registry=self.registry, name_transform=lambda s: s, parse=parse
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
def getv(self, path: str, default: V = ty.cast(V, _NONE)) -> V:
|
|
106
|
+
parts = [*path.split(".")]
|
|
107
|
+
mask = "__mask"
|
|
108
|
+
for i in range(0, len(parts) + 1):
|
|
109
|
+
prefix = ".".join([mask, *parts[:i]])
|
|
110
|
+
# we do an 'in' check b/c the value might not be truthy, or even non-None
|
|
111
|
+
if prefix in self.registry:
|
|
112
|
+
return self.registry[prefix]()
|
|
113
|
+
|
|
114
|
+
# If not masked, fall back to normal hierarchical lookup
|
|
115
|
+
return self._get_most_specific_v(path, parts, default)
|
|
116
|
+
|
|
117
|
+
def _get_most_specific_v(
|
|
118
|
+
self, path: str, parts: ty.Sequence[str], default: V = ty.cast(V, _NONE)
|
|
119
|
+
) -> V:
|
|
120
|
+
for i in range(len(parts), -1, -1):
|
|
121
|
+
prefix = ".".join(parts[:i])
|
|
122
|
+
# we do an 'in' check b/c the value might not be truthy, or even non-None
|
|
123
|
+
if prefix in self.registry:
|
|
124
|
+
return self.registry[prefix]()
|
|
125
|
+
assert prefix == ""
|
|
126
|
+
|
|
127
|
+
if default is not _NONE:
|
|
128
|
+
return default
|
|
129
|
+
|
|
130
|
+
name = self.registry.name
|
|
131
|
+
raise RuntimeError(f"No {name} configuration matches {path} and no global config was set")
|
|
132
|
+
|
|
133
|
+
def setv(
|
|
134
|
+
self, value: V, pathable: Pathable = None, *, mask: bool = False
|
|
135
|
+
) -> core.config.ConfigItem[V]:
|
|
136
|
+
"""Set the value for the given Pathable, or the current module if no Pathable is given.
|
|
137
|
+
By default, greater overlap in paths will supersede less overlap.
|
|
138
|
+
|
|
139
|
+
mask=True will override any 'more specific' config below it in the hierarchy.
|
|
140
|
+
"""
|
|
141
|
+
config_path = to_dotted_path(pathable)
|
|
142
|
+
if mask:
|
|
143
|
+
config_path = ".".join(filter(None, ["__mask", config_path]))
|
|
144
|
+
log_msg = "Masking all [%s] config under '%s' with %s"
|
|
145
|
+
else:
|
|
146
|
+
log_msg = "Setting [%s] '%s' to %s"
|
|
147
|
+
logger.debug(log_msg, self.registry.name, config_path, value)
|
|
148
|
+
if config_item := self.registry.get(config_path):
|
|
149
|
+
config_item.set_global(self.parse(value))
|
|
150
|
+
else:
|
|
151
|
+
config_item = self._make_config(config_path, default=value) # also registers the ConfigItem
|
|
152
|
+
return config_item
|
|
153
|
+
|
|
154
|
+
def __setitem__(self, key: str, value: V) -> None:
|
|
155
|
+
self.setv(value, pathable=key)
|
|
156
|
+
|
|
157
|
+
def load_config(self, config: ty.Mapping[str, ty.Any]) -> None:
|
|
158
|
+
"""Loads things with an inner key matching this name into the config."""
|
|
159
|
+
mask_name = f".__mask.{self.registry.name}"
|
|
160
|
+
conf_name = f".{self.registry.name}"
|
|
161
|
+
logger.debug("Loading config for %s", self.registry.name)
|
|
162
|
+
for key, value in core.config.flatten_config(config).items():
|
|
163
|
+
if key.endswith(conf_name):
|
|
164
|
+
self.setv(value, key[: -len(conf_name)], mask=key.endswith(mask_name))
|
|
165
|
+
|
|
166
|
+
def __repr__(self) -> str:
|
|
167
|
+
return f"ConfigTree('{self.registry.name}', {list(self.registry.items())})"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import typing as ty
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@contextlib.contextmanager
|
|
6
|
+
def catch(allow: ty.Callable[[Exception], bool]) -> ty.Iterator:
|
|
7
|
+
"""try-except but flexible. Catch only Exceptions matching the filter.
|
|
8
|
+
|
|
9
|
+
Useful for libraries like azure where all the Exceptions have the
|
|
10
|
+
same type.
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
yield
|
|
14
|
+
except Exception as e:
|
|
15
|
+
if not allow(e):
|
|
16
|
+
raise
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import typing as ty
|
|
3
|
+
from threading import Lock, RLock
|
|
4
|
+
from typing import Optional, Union
|
|
5
|
+
|
|
6
|
+
from cachetools import keys
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from cachetools.func import _CacheInfo # type: ignore
|
|
10
|
+
except ImportError:
|
|
11
|
+
# this moved between 5.2.1 and 5.3.
|
|
12
|
+
from cachetools import _CacheInfo # type: ignore
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
F = ty.TypeVar("F", bound=ty.Callable)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def locked_cached(
|
|
19
|
+
cache: ty.Any, typed: bool = False, lock: Optional[Union[RLock, Lock]] = None
|
|
20
|
+
) -> ty.Callable[[F], F]:
|
|
21
|
+
"""Like cachetools.func._cache, except it locks the actual
|
|
22
|
+
function call but does _not_ lock reading from the cache the first
|
|
23
|
+
time, so most of the time, cache hits are nearly free, but you
|
|
24
|
+
don't call the function more than once for the same arguments.
|
|
25
|
+
"""
|
|
26
|
+
maxsize = cache.maxsize
|
|
27
|
+
|
|
28
|
+
def decorator(func: F) -> F:
|
|
29
|
+
key = keys.typedkey if typed else keys.hashkey
|
|
30
|
+
hits = misses = 0
|
|
31
|
+
_lock = lock or RLock()
|
|
32
|
+
|
|
33
|
+
def wrapper(*args, **kwargs): # type: ignore
|
|
34
|
+
nonlocal hits, misses
|
|
35
|
+
k = key(*args, **kwargs)
|
|
36
|
+
|
|
37
|
+
# optimistic lookup on a cache that is threadsafe for reads
|
|
38
|
+
try:
|
|
39
|
+
v = cache[k]
|
|
40
|
+
hits += 1
|
|
41
|
+
return v
|
|
42
|
+
except KeyError:
|
|
43
|
+
with _lock:
|
|
44
|
+
try:
|
|
45
|
+
v = cache[k]
|
|
46
|
+
hits += 1
|
|
47
|
+
return v
|
|
48
|
+
except KeyError:
|
|
49
|
+
misses += 1
|
|
50
|
+
|
|
51
|
+
v = func(*args, **kwargs)
|
|
52
|
+
# in case of a race, prefer the item already in the cache
|
|
53
|
+
try:
|
|
54
|
+
return cache.setdefault(k, v)
|
|
55
|
+
except ValueError:
|
|
56
|
+
return v # value too large
|
|
57
|
+
|
|
58
|
+
def cache_info() -> _CacheInfo:
|
|
59
|
+
with _lock:
|
|
60
|
+
maxsize = cache.maxsize
|
|
61
|
+
currsize = cache.currsize
|
|
62
|
+
return _CacheInfo(hits, misses, maxsize, currsize)
|
|
63
|
+
|
|
64
|
+
def cache_clear() -> None:
|
|
65
|
+
nonlocal hits, misses
|
|
66
|
+
with _lock:
|
|
67
|
+
try:
|
|
68
|
+
cache.clear()
|
|
69
|
+
finally:
|
|
70
|
+
hits = misses = 0
|
|
71
|
+
|
|
72
|
+
wrapper.cache_info = cache_info # type: ignore
|
|
73
|
+
wrapper.cache_clear = cache_clear # type: ignore
|
|
74
|
+
wrapper.cache_parameters = lambda: {"maxsize": maxsize, "typed": typed} # type: ignore
|
|
75
|
+
functools.update_wrapper(wrapper, func)
|
|
76
|
+
return ty.cast(F, wrapper)
|
|
77
|
+
|
|
78
|
+
return decorator
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import typing as ty
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def full_name_and_callable(func: ty.Any) -> ty.Tuple[str, ty.Callable]:
|
|
5
|
+
"""return {module}--{name} for an actual (non-wrapped) function or class,
|
|
6
|
+
plus the unwrapped callable itself.
|
|
7
|
+
"""
|
|
8
|
+
if hasattr(func, "func"): # support functools.partial
|
|
9
|
+
return full_name_and_callable(func.func)
|
|
10
|
+
|
|
11
|
+
module = func.__module__
|
|
12
|
+
try:
|
|
13
|
+
name = func.__name__
|
|
14
|
+
except AttributeError:
|
|
15
|
+
try:
|
|
16
|
+
# for some reason, __name__ does not exist on instances of objects,
|
|
17
|
+
# nor does it exist as a 'member' of the __class__ attribute, but
|
|
18
|
+
# we can just pull it out directly like this for callable classes.
|
|
19
|
+
name = func.__class__.__name__
|
|
20
|
+
except AttributeError:
|
|
21
|
+
name = "MOPS_UNKNOWN_NAME"
|
|
22
|
+
|
|
23
|
+
return f"{module}--{name}", func
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import typing as ty
|
|
2
|
+
from functools import wraps
|
|
3
|
+
from timeit import default_timer
|
|
4
|
+
|
|
5
|
+
from thds.core import log
|
|
6
|
+
|
|
7
|
+
from .colorize import colorized, make_colorized_out
|
|
8
|
+
|
|
9
|
+
F = ty.TypeVar("F", bound=ty.Callable)
|
|
10
|
+
logger = log.getLogger(__name__)
|
|
11
|
+
_SLOW = colorized(fg="yellow", bg="black")
|
|
12
|
+
LogSlow = make_colorized_out(_SLOW, out=logger.warning)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def on_slow(callback: ty.Callable[[float], None], slow_seconds: float = 3.0) -> ty.Callable[[F], F]:
|
|
16
|
+
def deco(f: F) -> F:
|
|
17
|
+
@wraps(f)
|
|
18
|
+
def wrapper(*args, **kwargs): # type: ignore
|
|
19
|
+
start_time = default_timer()
|
|
20
|
+
r = f(*args, **kwargs)
|
|
21
|
+
elapsed_s = default_timer() - start_time
|
|
22
|
+
if elapsed_s > slow_seconds:
|
|
23
|
+
callback(elapsed_s)
|
|
24
|
+
return r
|
|
25
|
+
|
|
26
|
+
return ty.cast(F, wrapper)
|
|
27
|
+
|
|
28
|
+
return deco
|
thds/mops/_utils/once.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import typing as ty
|
|
3
|
+
|
|
4
|
+
FNone = ty.TypeVar("FNone", bound=ty.Callable[[], None])
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Once:
|
|
8
|
+
"""Uses unique IDs to guarantee that an operation has only run
|
|
9
|
+
once in the lifetime of this object, and waits for it to be complete.
|
|
10
|
+
|
|
11
|
+
Is a potential source of memory leaks, since each event will be
|
|
12
|
+
stored until the entire Once object is disposed.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self.lock = threading.Lock()
|
|
17
|
+
self.events: ty.Dict[ty.Hashable, threading.Event] = dict()
|
|
18
|
+
|
|
19
|
+
def run_once(self, run_id: ty.Hashable, f: FNone) -> None:
|
|
20
|
+
needs_run = False
|
|
21
|
+
if run_id not in self.events:
|
|
22
|
+
with self.lock:
|
|
23
|
+
if run_id not in self.events:
|
|
24
|
+
needs_run = True
|
|
25
|
+
self.events[run_id] = threading.Event()
|
|
26
|
+
if needs_run:
|
|
27
|
+
f()
|
|
28
|
+
self.events[run_id].set()
|
|
29
|
+
else:
|
|
30
|
+
self.events[run_id].wait()
|
thds/mops/_utils/temp.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Utility to make returning files via their Paths less confusing in the application code."""
|
|
2
|
+
import typing as ty
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from thds.core import lazy, scope, tmp
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@contextmanager
|
|
10
|
+
def _temp_dir() -> ty.Iterator[Path]:
|
|
11
|
+
with tmp.temppath_same_fs() as p:
|
|
12
|
+
p.mkdir()
|
|
13
|
+
yield p
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_FOREVER_SCOPE = scope.Scope("until_mops_exit")
|
|
17
|
+
_SINGLE_REMOTE_TMP_DIR = lazy.Lazy(lambda: _FOREVER_SCOPE.enter(_temp_dir()))
|
|
18
|
+
# there's really no obvious reason why you'd ever need more than one of these as long as
|
|
19
|
+
# you're giving your actual output files names, so we create one as a global for general
|
|
20
|
+
# use.
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def new_tempdir() -> Path:
|
|
24
|
+
return _FOREVER_SCOPE.enter(_temp_dir())
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def tempdir() -> Path:
|
|
28
|
+
"""Lazily creates a global/shared temporary directory and returns it as a Path.
|
|
29
|
+
|
|
30
|
+
The files will get cleaned up when the interpreter exits.
|
|
31
|
+
"""
|
|
32
|
+
return _SINGLE_REMOTE_TMP_DIR()
|
thds/mops/config.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import typing as ty
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from thds.core import config, log
|
|
6
|
+
from thds.mops._compat import tomllib
|
|
7
|
+
|
|
8
|
+
logger = log.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def find_first_upward_mops_toml() -> ty.Optional[Path]:
|
|
12
|
+
current = Path.cwd()
|
|
13
|
+
while True:
|
|
14
|
+
try:
|
|
15
|
+
mops_path = current / ".mops.toml"
|
|
16
|
+
if mops_path.is_file() and os.access(mops_path, os.R_OK):
|
|
17
|
+
return mops_path.resolve()
|
|
18
|
+
if current == current.parent: # At root
|
|
19
|
+
return None
|
|
20
|
+
current = current.parent
|
|
21
|
+
except PermissionError:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def first_found_config_file() -> ty.Optional[Path]:
|
|
26
|
+
paths = [
|
|
27
|
+
Path(os.environ.get("MOPS_CONFIG", "")),
|
|
28
|
+
find_first_upward_mops_toml(),
|
|
29
|
+
Path(f"{Path.home()}/.mops.toml"),
|
|
30
|
+
]
|
|
31
|
+
for path in paths:
|
|
32
|
+
if path and path.is_file():
|
|
33
|
+
return path
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def load(config_file: ty.Optional[Path], name: str = "mops") -> ty.Dict[str, ty.Any]:
|
|
38
|
+
if config_file:
|
|
39
|
+
logger.debug("Loading %s config from %s", name, config_file)
|
|
40
|
+
return tomllib.load(open(config_file, "rb"))
|
|
41
|
+
return dict()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
max_concurrent_network_ops = config.item("mops.max_concurrent_network_ops", 8, parse=int)
|
|
45
|
+
# 8 clients has been obtained experimentally via the `stress_test`
|
|
46
|
+
# application running on a Mac M1 laptop running 200 parallel 5 second
|
|
47
|
+
# tasks, though no significant difference was obtained between 5 and
|
|
48
|
+
# 20 clients. Running a similar stress test from your orchestrator may
|
|
49
|
+
# be a good idea if you are dealing with hundreds of micro (<20
|
|
50
|
+
# second) remote tasks.
|
|
51
|
+
|
|
52
|
+
open_files_limit = config.item("mops.resources.max_open_files", 10000)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _filter_to_known_mops_config(config: ty.Dict[str, ty.Any]) -> ty.Dict[str, ty.Any]:
|
|
56
|
+
return {k: v for k, v in config.items() if k.startswith("mops.") or k.startswith("thds.mops")}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# load this after creating the config items
|
|
60
|
+
config.set_global_defaults(_filter_to_known_mops_config(load(first_found_config_file())))
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Impure keyfunctions that are useful for common cases."""
|
|
2
|
+
import typing as ty
|
|
3
|
+
|
|
4
|
+
from ..pure.core.memo.keyfunc import Args, Keyfunc, Kwargs
|
|
5
|
+
from ..pure.core.memo.overwrite_params import parameter_overwriter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def nil_args(*named_parameters: str) -> Keyfunc:
|
|
9
|
+
def nil_args_impure_keyfunc(
|
|
10
|
+
c: ty.Callable, args: Args, kwargs: Kwargs
|
|
11
|
+
) -> ty.Tuple[ty.Callable, Args, Kwargs]:
|
|
12
|
+
return c, *parameter_overwriter(c, {name: None for name in named_parameters})(args, kwargs)
|
|
13
|
+
|
|
14
|
+
return nil_args_impure_keyfunc
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Builds on top of the pure.MemoizingPicklingRunner to provide
|
|
2
|
+
impure, customizable memoization.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import typing as ty
|
|
6
|
+
|
|
7
|
+
from typing_extensions import ParamSpec
|
|
8
|
+
|
|
9
|
+
from thds.core import log
|
|
10
|
+
from thds.core.stack_context import StackContext
|
|
11
|
+
|
|
12
|
+
from ..pure.core.memo.keyfunc import ArgsOnlyKeyfunc, Keyfunc, autowrap_args_only_keyfunc
|
|
13
|
+
from ..pure.core.types import Args, Kwargs
|
|
14
|
+
from ..pure.core.uris import UriResolvable
|
|
15
|
+
from ..pure.pickling.mprunner import NO_REDIRECT, MemoizingPicklingRunner, Redirect
|
|
16
|
+
from ..pure.runner.simple_shims import samethread_shim
|
|
17
|
+
|
|
18
|
+
logger = log.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
R = ty.TypeVar("R")
|
|
22
|
+
P = ParamSpec("P")
|
|
23
|
+
F_Args_Kwargs = ty.Tuple[ty.Callable, Args, Kwargs]
|
|
24
|
+
_ORIGINAL_F_ARGS_KWARGS: StackContext[ty.Optional[F_Args_Kwargs]] = StackContext("f_args_kwargs", None)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _perform_original_invocation(*_args: ty.Any, **_kwargs: ty.Any) -> ty.Any:
|
|
28
|
+
f_args_kwargs = _ORIGINAL_F_ARGS_KWARGS()
|
|
29
|
+
assert (
|
|
30
|
+
f_args_kwargs is not None
|
|
31
|
+
), "_perform_original_invocation() must be called from within a runner"
|
|
32
|
+
f, args, kwargs = f_args_kwargs
|
|
33
|
+
return f(*args, **kwargs)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class KeyedLocalRunner(MemoizingPicklingRunner):
|
|
37
|
+
"""The only purpose for using this is to reify/memoize your results.
|
|
38
|
+
|
|
39
|
+
Allows changing the memoization key, at the expense of
|
|
40
|
+
(theoretical) purity, since now we're memoizing on something you
|
|
41
|
+
made up, rather than something directly derived from the full set
|
|
42
|
+
of arguments passed to your function.
|
|
43
|
+
|
|
44
|
+
When the 'remote' side is reached, the original (args, kwargs)
|
|
45
|
+
will be passed to the result of change_function, or the original
|
|
46
|
+
function if change_function is the default (identity).
|
|
47
|
+
|
|
48
|
+
This runs the 'remote' function in the same process - your
|
|
49
|
+
function, if no memoized result is found, will execute in the same
|
|
50
|
+
thread where it was originally called. This runner will use the
|
|
51
|
+
return values of change_key_elements _only_ for the purposes of
|
|
52
|
+
keying the cache.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
blob_storage_root: UriResolvable,
|
|
58
|
+
*,
|
|
59
|
+
keyfunc: ty.Union[ArgsOnlyKeyfunc, Keyfunc],
|
|
60
|
+
redirect: Redirect = NO_REDIRECT,
|
|
61
|
+
):
|
|
62
|
+
self._impure_keyfunc = autowrap_args_only_keyfunc(keyfunc)
|
|
63
|
+
self._pre_pickle_redirect = redirect
|
|
64
|
+
super().__init__(
|
|
65
|
+
samethread_shim,
|
|
66
|
+
blob_storage_root,
|
|
67
|
+
redirect=lambda _f, _args, _kwargs: _perform_original_invocation,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def __call__(self, raw_func: ty.Callable[P, R], raw_args: P.args, raw_kwargs: P.kwargs) -> R:
|
|
71
|
+
actual_function_to_call = self._pre_pickle_redirect(raw_func, raw_args, raw_kwargs)
|
|
72
|
+
with _ORIGINAL_F_ARGS_KWARGS.set((actual_function_to_call, raw_args, raw_kwargs)):
|
|
73
|
+
return super().__call__(*self._impure_keyfunc(raw_func, raw_args, raw_kwargs))
|