thds.mops 3.6.20250219172032__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

Files changed (111) hide show
  1. thds/mops/__about__.py +8 -0
  2. thds/mops/__init__.py +3 -0
  3. thds/mops/_compat.py +6 -0
  4. thds/mops/_utils/__init__.py +0 -0
  5. thds/mops/_utils/colorize.py +110 -0
  6. thds/mops/_utils/config_tree.py +167 -0
  7. thds/mops/_utils/exception.py +16 -0
  8. thds/mops/_utils/locked_cache.py +78 -0
  9. thds/mops/_utils/names.py +23 -0
  10. thds/mops/_utils/on_slow.py +28 -0
  11. thds/mops/_utils/once.py +30 -0
  12. thds/mops/_utils/temp.py +32 -0
  13. thds/mops/config.py +60 -0
  14. thds/mops/impure/__init__.py +2 -0
  15. thds/mops/impure/keyfunc.py +14 -0
  16. thds/mops/impure/runner.py +73 -0
  17. thds/mops/k8s/__init__.py +27 -0
  18. thds/mops/k8s/_shared.py +3 -0
  19. thds/mops/k8s/apply_yaml.py +22 -0
  20. thds/mops/k8s/auth.py +49 -0
  21. thds/mops/k8s/config.py +37 -0
  22. thds/mops/k8s/container_registry.py +14 -0
  23. thds/mops/k8s/jobs.py +57 -0
  24. thds/mops/k8s/launch.py +234 -0
  25. thds/mops/k8s/logging.py +239 -0
  26. thds/mops/k8s/namespace.py +17 -0
  27. thds/mops/k8s/node_selection.py +58 -0
  28. thds/mops/k8s/retry.py +75 -0
  29. thds/mops/k8s/too_old_resource_version.py +42 -0
  30. thds/mops/k8s/tools/krsync.py +50 -0
  31. thds/mops/k8s/tools/krsync.sh +22 -0
  32. thds/mops/k8s/wait_job.py +72 -0
  33. thds/mops/k8s/warn_image_backoff.py +63 -0
  34. thds/mops/k8s/watch.py +266 -0
  35. thds/mops/meta.json +8 -0
  36. thds/mops/parallel.py +36 -0
  37. thds/mops/pure/__init__.py +43 -0
  38. thds/mops/pure/_magic/__init__.py +0 -0
  39. thds/mops/pure/_magic/api.py +114 -0
  40. thds/mops/pure/_magic/sauce.py +152 -0
  41. thds/mops/pure/_magic/shims.py +34 -0
  42. thds/mops/pure/adls/__init__.py +1 -0
  43. thds/mops/pure/adls/_files.py +22 -0
  44. thds/mops/pure/adls/blob_store.py +185 -0
  45. thds/mops/pure/adls/output_fqn.py +17 -0
  46. thds/mops/pure/core/__init__.py +0 -0
  47. thds/mops/pure/core/content_addressed.py +31 -0
  48. thds/mops/pure/core/deferred_work.py +83 -0
  49. thds/mops/pure/core/entry/__init__.py +2 -0
  50. thds/mops/pure/core/entry/main.py +47 -0
  51. thds/mops/pure/core/entry/route_result.py +66 -0
  52. thds/mops/pure/core/entry/runner_registry.py +31 -0
  53. thds/mops/pure/core/file_blob_store.py +120 -0
  54. thds/mops/pure/core/lock/__init__.py +7 -0
  55. thds/mops/pure/core/lock/_acquire.py +192 -0
  56. thds/mops/pure/core/lock/_funcs.py +37 -0
  57. thds/mops/pure/core/lock/cli.py +73 -0
  58. thds/mops/pure/core/lock/maintain.py +150 -0
  59. thds/mops/pure/core/lock/read.py +39 -0
  60. thds/mops/pure/core/lock/types.py +37 -0
  61. thds/mops/pure/core/lock/write.py +136 -0
  62. thds/mops/pure/core/memo/__init__.py +6 -0
  63. thds/mops/pure/core/memo/function_memospace.py +267 -0
  64. thds/mops/pure/core/memo/keyfunc.py +53 -0
  65. thds/mops/pure/core/memo/overwrite_params.py +61 -0
  66. thds/mops/pure/core/memo/results.py +103 -0
  67. thds/mops/pure/core/memo/unique_name_for_function.py +70 -0
  68. thds/mops/pure/core/metadata.py +230 -0
  69. thds/mops/pure/core/output_naming.py +52 -0
  70. thds/mops/pure/core/partial.py +15 -0
  71. thds/mops/pure/core/pipeline_id.py +62 -0
  72. thds/mops/pure/core/pipeline_id_mask.py +79 -0
  73. thds/mops/pure/core/script_support.py +25 -0
  74. thds/mops/pure/core/serialize_big_objs.py +73 -0
  75. thds/mops/pure/core/serialize_paths.py +149 -0
  76. thds/mops/pure/core/source.py +291 -0
  77. thds/mops/pure/core/types.py +142 -0
  78. thds/mops/pure/core/uris.py +81 -0
  79. thds/mops/pure/core/use_runner.py +47 -0
  80. thds/mops/pure/joblib/__init__.py +1 -0
  81. thds/mops/pure/joblib/backend.py +81 -0
  82. thds/mops/pure/joblib/batching.py +67 -0
  83. thds/mops/pure/pickling/__init__.py +3 -0
  84. thds/mops/pure/pickling/_pickle.py +193 -0
  85. thds/mops/pure/pickling/memoize_only.py +22 -0
  86. thds/mops/pure/pickling/mprunner.py +173 -0
  87. thds/mops/pure/pickling/pickles.py +149 -0
  88. thds/mops/pure/pickling/remote.py +145 -0
  89. thds/mops/pure/pickling/sha256_b64.py +71 -0
  90. thds/mops/pure/runner/__init__.py +0 -0
  91. thds/mops/pure/runner/local.py +239 -0
  92. thds/mops/pure/runner/shim_builder.py +25 -0
  93. thds/mops/pure/runner/simple_shims.py +21 -0
  94. thds/mops/pure/runner/strings.py +1 -0
  95. thds/mops/pure/runner/types.py +28 -0
  96. thds/mops/pure/tools/__init__.py +0 -0
  97. thds/mops/pure/tools/history.py +35 -0
  98. thds/mops/pure/tools/inspect.py +372 -0
  99. thds/mops/pure/tools/sha256_b64_addressed.py +40 -0
  100. thds/mops/pure/tools/stress.py +63 -0
  101. thds/mops/pure/tools/summarize/__init__.py +4 -0
  102. thds/mops/pure/tools/summarize/cli.py +293 -0
  103. thds/mops/pure/tools/summarize/run_summary.py +143 -0
  104. thds/mops/py.typed +0 -0
  105. thds/mops/testing/__init__.py +0 -0
  106. thds/mops/testing/deferred_imports.py +81 -0
  107. thds.mops-3.6.20250219172032.dist-info/METADATA +42 -0
  108. thds.mops-3.6.20250219172032.dist-info/RECORD +111 -0
  109. thds.mops-3.6.20250219172032.dist-info/WHEEL +5 -0
  110. thds.mops-3.6.20250219172032.dist-info/entry_points.txt +7 -0
  111. thds.mops-3.6.20250219172032.dist-info/top_level.txt +1 -0
thds/mops/__about__.py ADDED
@@ -0,0 +1,8 @@
1
+ from thds.core import meta
2
+
3
+ __version__ = meta.get_version("thds.mops")
4
+ __commit__ = meta.read_metadata(__name__).git_commit
5
+
6
+
7
+ def backward_compatible_with() -> int:
8
+ return 2 # v2 is the current major version.
thds/mops/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from . import parallel # noqa
2
+ from .__about__ import __commit__, __version__ # noqa
3
+ from ._utils.temp import tempdir # noqa
thds/mops/_compat.py ADDED
@@ -0,0 +1,6 @@
1
+ # compatibility shims
2
+
3
+ try:
4
+ import tomllib # type: ignore [import-not-found] # noqa: F401
5
+ except ImportError:
6
+ import tomli as tomllib # noqa: F401
File without changes
@@ -0,0 +1,110 @@
1
+ import itertools
2
+ import random
3
+ import typing as ty
4
+ from functools import partial
5
+
6
+ from colors import color, csscolors
7
+
8
+ pref = "\033["
9
+ reset = f"{pref}0m"
10
+
11
+ _RESERVED_COLORS = [
12
+ "black",
13
+ # Various whitish-looking colors
14
+ "aliceblue",
15
+ "antiquewhite",
16
+ "floralwhite",
17
+ "ghostwhite",
18
+ "ivory",
19
+ "white",
20
+ "whitesmoke",
21
+ "snow",
22
+ "seashell",
23
+ "mintcream",
24
+ "honeydew",
25
+ "azure",
26
+ "beige",
27
+ "cornsilk",
28
+ "floralwhite",
29
+ # These are pretty illegible on a black background
30
+ "darkblue",
31
+ "indigo",
32
+ "mediumblue",
33
+ "navy",
34
+ "purple",
35
+ ]
36
+
37
+ _PREFERRED_COLORS = [
38
+ "mediumseagreen",
39
+ "cornflowerblue",
40
+ "gold",
41
+ "salmon",
42
+ "violet",
43
+ "limegreen",
44
+ "dodgerblue",
45
+ "goldenrod",
46
+ "indianred",
47
+ "fuchsia",
48
+ "forestgreen",
49
+ "royalblue",
50
+ "yellow",
51
+ "chocolate",
52
+ "palevioletred",
53
+ "mediumspringgreen",
54
+ "deepskyblue",
55
+ "khaki",
56
+ "red",
57
+ "deeppink",
58
+ "seagreen",
59
+ "cyan",
60
+ "greenyellow",
61
+ "sandybrown",
62
+ "orchid",
63
+ "lightgreen",
64
+ "steelblue",
65
+ "darkgoldenrod",
66
+ "coral",
67
+ "darkorchid",
68
+ ]
69
+
70
+
71
+ def _start_from(color_list: ty.List[str], index: int) -> ty.List[str]:
72
+ return color_list[index:] + color_list[:index]
73
+
74
+
75
+ def _preferred_randgreen_start() -> ty.List[str]:
76
+ return _start_from(_PREFERRED_COLORS, random.randint(0, 6) * 5)
77
+
78
+
79
+ def _all_colors() -> ty.List[str]:
80
+ forbidden_colors = {csscolors.css_colors[name] for name in _RESERVED_COLORS}
81
+ used_colors = {csscolors.css_colors[name] for name in _PREFERRED_COLORS}
82
+ assert len(used_colors) == len(_PREFERRED_COLORS) # assert no RGB dupes in the preferred list
83
+ all_colors = list(csscolors.css_colors.items())
84
+ random.shuffle(all_colors)
85
+ return _preferred_randgreen_start() + [
86
+ name
87
+ for name, rgb in all_colors
88
+ if rgb not in used_colors
89
+ and not used_colors.add(rgb) # type: ignore
90
+ and rgb not in forbidden_colors
91
+ ]
92
+
93
+
94
+ next_color = ty.cast(ty.Callable[[], str], partial(next, itertools.cycle(_all_colors())))
95
+
96
+
97
+ def colorized(fg: str, bg: str = "", style: str = "") -> ty.Callable[[str], str]:
98
+ def colorize(s: str) -> str:
99
+ return color(s, fg=fg, bg=bg, style=style)
100
+
101
+ return colorize
102
+
103
+
104
+ def make_colorized_out(
105
+ colorized: ty.Callable[[str], str], *, fmt_str: str = "{}", out: ty.Callable[[str], ty.Any] = print
106
+ ) -> ty.Callable[[str], None]:
107
+ def _out(s: str) -> None:
108
+ out(colorized(fmt_str.format(s)))
109
+
110
+ return _out
@@ -0,0 +1,167 @@
1
+ import inspect
2
+ import types
3
+ import typing as ty
4
+ from functools import partial
5
+
6
+ from thds import core
7
+
8
+ from .names import full_name_and_callable
9
+
10
+ IGNORED_PACKAGES = ["thds.mops"]
11
+ # if a library needs to build on top of mops, it can put itself into this
12
+ # list and we'll ignore it when looking for the 'true calling frame'
13
+
14
+
15
+ def _get_first_external_module(ignore_packages: ty.Collection[str] = IGNORED_PACKAGES) -> str:
16
+ frame = inspect.currentframe()
17
+ if not frame:
18
+ return ""
19
+
20
+ while frame := frame.f_back: # type: ignore
21
+ module_name = frame.f_globals["__name__"]
22
+ is_ignored = False
23
+ for ignore_package in ignore_packages:
24
+ if module_name.startswith(ignore_package):
25
+ is_ignored = True
26
+ break
27
+
28
+ if not is_ignored:
29
+ return module_name
30
+ return "" # fallback if no external caller found
31
+
32
+
33
+ Pathable = ty.Union[str, types.ModuleType, ty.Callable, None]
34
+ _NONE = object()
35
+ V = ty.TypeVar("V")
36
+ logger = core.log.getLogger(__name__)
37
+
38
+
39
+ def to_dotted_path(pathable: Pathable) -> str:
40
+ if isinstance(pathable, str):
41
+ return pathable
42
+
43
+ if pathable is None:
44
+ if not (module_path := _get_first_external_module()):
45
+ raise ValueError(f"Found no module outside mops within {pathable}")
46
+ return module_path
47
+
48
+ if isinstance(pathable, types.ModuleType):
49
+ return pathable.__name__
50
+
51
+ return full_name_and_callable(pathable)[0].replace("--", ".")
52
+
53
+
54
+ class ConfigTree(ty.Generic[V]):
55
+ """This is a cute little utility class for applying homogeneously-typed configuration
56
+ following hierarchical (tree-like) paths.
57
+
58
+ Generally, the config closest to the 'leaf' path will be used, but there is also a
59
+ 'mask' option to override subtrees.
60
+
61
+ Imagine you have some modules:
62
+ - foo.bar.baz.materialize
63
+ - foo.bar.quux.materialize
64
+ - foo.george.materialize
65
+ - foo.steve.materialize
66
+
67
+ in each of which you have several materialization functions using mops.
68
+
69
+ Some API might construct one of these objects to afford you a way to 'set' the config
70
+ at each level of your hierarchy.
71
+
72
+ Inside foo.bar.__init__.py, you could call
73
+
74
+ - `the_api.setv(a_config_object)`
75
+
76
+ and this would set the config for anything where the module path to it included foo.bar.
77
+
78
+ But if foo.bar.baz.materialize wanted to set config for everything inside itself,
79
+ at the top of that module you'd call
80
+
81
+ - `the_api.setv(diff_config_object)`
82
+
83
+ and this would set the config for that module only.
84
+
85
+ If you need to _override_ the config for an entire subtree, we call this masking.
86
+ You can call the_api.setv(value, mask=True) to mask the subtree.
87
+
88
+ - `the_api.setv(value, 'foo', mask=True)`
89
+
90
+ will mask everything under foo, including bar, george, and steve.
91
+
92
+ This isn't truly limited to modules, either - you can pass any module _or_ callable
93
+ in to setv as the object from which you want us to derive a dot-separated path.
94
+ Or you can pass in an arbitrary dot-separated string and we'll use it verbatim.
95
+ """
96
+
97
+ def __init__(self, name: str, parse: ty.Optional[ty.Callable[[ty.Any], V]] = None):
98
+ """If provided, parse must be an idempotent parser. In other words, parse(parse(x)) == parse(x)"""
99
+ self.registry = core.config.ConfigRegistry(name)
100
+ self.parse = parse or (lambda v: v)
101
+ self._make_config = partial(
102
+ core.config.ConfigItem[V], registry=self.registry, name_transform=lambda s: s, parse=parse
103
+ )
104
+
105
+ def getv(self, path: str, default: V = ty.cast(V, _NONE)) -> V:
106
+ parts = [*path.split(".")]
107
+ mask = "__mask"
108
+ for i in range(0, len(parts) + 1):
109
+ prefix = ".".join([mask, *parts[:i]])
110
+ # we do an 'in' check b/c the value might not be truthy, or even non-None
111
+ if prefix in self.registry:
112
+ return self.registry[prefix]()
113
+
114
+ # If not masked, fall back to normal hierarchical lookup
115
+ return self._get_most_specific_v(path, parts, default)
116
+
117
+ def _get_most_specific_v(
118
+ self, path: str, parts: ty.Sequence[str], default: V = ty.cast(V, _NONE)
119
+ ) -> V:
120
+ for i in range(len(parts), -1, -1):
121
+ prefix = ".".join(parts[:i])
122
+ # we do an 'in' check b/c the value might not be truthy, or even non-None
123
+ if prefix in self.registry:
124
+ return self.registry[prefix]()
125
+ assert prefix == ""
126
+
127
+ if default is not _NONE:
128
+ return default
129
+
130
+ name = self.registry.name
131
+ raise RuntimeError(f"No {name} configuration matches {path} and no global config was set")
132
+
133
+ def setv(
134
+ self, value: V, pathable: Pathable = None, *, mask: bool = False
135
+ ) -> core.config.ConfigItem[V]:
136
+ """Set the value for the given Pathable, or the current module if no Pathable is given.
137
+ By default, greater overlap in paths will supersede less overlap.
138
+
139
+ mask=True will override any 'more specific' config below it in the hierarchy.
140
+ """
141
+ config_path = to_dotted_path(pathable)
142
+ if mask:
143
+ config_path = ".".join(filter(None, ["__mask", config_path]))
144
+ log_msg = "Masking all [%s] config under '%s' with %s"
145
+ else:
146
+ log_msg = "Setting [%s] '%s' to %s"
147
+ logger.debug(log_msg, self.registry.name, config_path, value)
148
+ if config_item := self.registry.get(config_path):
149
+ config_item.set_global(self.parse(value))
150
+ else:
151
+ config_item = self._make_config(config_path, default=value) # also registers the ConfigItem
152
+ return config_item
153
+
154
+ def __setitem__(self, key: str, value: V) -> None:
155
+ self.setv(value, pathable=key)
156
+
157
+ def load_config(self, config: ty.Mapping[str, ty.Any]) -> None:
158
+ """Loads things with an inner key matching this name into the config."""
159
+ mask_name = f".__mask.{self.registry.name}"
160
+ conf_name = f".{self.registry.name}"
161
+ logger.debug("Loading config for %s", self.registry.name)
162
+ for key, value in core.config.flatten_config(config).items():
163
+ if key.endswith(conf_name):
164
+ self.setv(value, key[: -len(conf_name)], mask=key.endswith(mask_name))
165
+
166
+ def __repr__(self) -> str:
167
+ return f"ConfigTree('{self.registry.name}', {list(self.registry.items())})"
@@ -0,0 +1,16 @@
1
+ import contextlib
2
+ import typing as ty
3
+
4
+
5
+ @contextlib.contextmanager
6
+ def catch(allow: ty.Callable[[Exception], bool]) -> ty.Iterator:
7
+ """try-except but flexible. Catch only Exceptions matching the filter.
8
+
9
+ Useful for libraries like azure where all the Exceptions have the
10
+ same type.
11
+ """
12
+ try:
13
+ yield
14
+ except Exception as e:
15
+ if not allow(e):
16
+ raise
@@ -0,0 +1,78 @@
1
+ import functools
2
+ import typing as ty
3
+ from threading import Lock, RLock
4
+ from typing import Optional, Union
5
+
6
+ from cachetools import keys
7
+
8
+ try:
9
+ from cachetools.func import _CacheInfo # type: ignore
10
+ except ImportError:
11
+ # this moved between 5.2.1 and 5.3.
12
+ from cachetools import _CacheInfo # type: ignore
13
+
14
+
15
+ F = ty.TypeVar("F", bound=ty.Callable)
16
+
17
+
18
+ def locked_cached(
19
+ cache: ty.Any, typed: bool = False, lock: Optional[Union[RLock, Lock]] = None
20
+ ) -> ty.Callable[[F], F]:
21
+ """Like cachetools.func._cache, except it locks the actual
22
+ function call but does _not_ lock reading from the cache the first
23
+ time, so most of the time, cache hits are nearly free, but you
24
+ don't call the function more than once for the same arguments.
25
+ """
26
+ maxsize = cache.maxsize
27
+
28
+ def decorator(func: F) -> F:
29
+ key = keys.typedkey if typed else keys.hashkey
30
+ hits = misses = 0
31
+ _lock = lock or RLock()
32
+
33
+ def wrapper(*args, **kwargs): # type: ignore
34
+ nonlocal hits, misses
35
+ k = key(*args, **kwargs)
36
+
37
+ # optimistic lookup on a cache that is threadsafe for reads
38
+ try:
39
+ v = cache[k]
40
+ hits += 1
41
+ return v
42
+ except KeyError:
43
+ with _lock:
44
+ try:
45
+ v = cache[k]
46
+ hits += 1
47
+ return v
48
+ except KeyError:
49
+ misses += 1
50
+
51
+ v = func(*args, **kwargs)
52
+ # in case of a race, prefer the item already in the cache
53
+ try:
54
+ return cache.setdefault(k, v)
55
+ except ValueError:
56
+ return v # value too large
57
+
58
+ def cache_info() -> _CacheInfo:
59
+ with _lock:
60
+ maxsize = cache.maxsize
61
+ currsize = cache.currsize
62
+ return _CacheInfo(hits, misses, maxsize, currsize)
63
+
64
+ def cache_clear() -> None:
65
+ nonlocal hits, misses
66
+ with _lock:
67
+ try:
68
+ cache.clear()
69
+ finally:
70
+ hits = misses = 0
71
+
72
+ wrapper.cache_info = cache_info # type: ignore
73
+ wrapper.cache_clear = cache_clear # type: ignore
74
+ wrapper.cache_parameters = lambda: {"maxsize": maxsize, "typed": typed} # type: ignore
75
+ functools.update_wrapper(wrapper, func)
76
+ return ty.cast(F, wrapper)
77
+
78
+ return decorator
@@ -0,0 +1,23 @@
1
+ import typing as ty
2
+
3
+
4
+ def full_name_and_callable(func: ty.Any) -> ty.Tuple[str, ty.Callable]:
5
+ """return {module}--{name} for an actual (non-wrapped) function or class,
6
+ plus the unwrapped callable itself.
7
+ """
8
+ if hasattr(func, "func"): # support functools.partial
9
+ return full_name_and_callable(func.func)
10
+
11
+ module = func.__module__
12
+ try:
13
+ name = func.__name__
14
+ except AttributeError:
15
+ try:
16
+ # for some reason, __name__ does not exist on instances of objects,
17
+ # nor does it exist as a 'member' of the __class__ attribute, but
18
+ # we can just pull it out directly like this for callable classes.
19
+ name = func.__class__.__name__
20
+ except AttributeError:
21
+ name = "MOPS_UNKNOWN_NAME"
22
+
23
+ return f"{module}--{name}", func
@@ -0,0 +1,28 @@
1
+ import typing as ty
2
+ from functools import wraps
3
+ from timeit import default_timer
4
+
5
+ from thds.core import log
6
+
7
+ from .colorize import colorized, make_colorized_out
8
+
9
+ F = ty.TypeVar("F", bound=ty.Callable)
10
+ logger = log.getLogger(__name__)
11
+ _SLOW = colorized(fg="yellow", bg="black")
12
+ LogSlow = make_colorized_out(_SLOW, out=logger.warning)
13
+
14
+
15
+ def on_slow(callback: ty.Callable[[float], None], slow_seconds: float = 3.0) -> ty.Callable[[F], F]:
16
+ def deco(f: F) -> F:
17
+ @wraps(f)
18
+ def wrapper(*args, **kwargs): # type: ignore
19
+ start_time = default_timer()
20
+ r = f(*args, **kwargs)
21
+ elapsed_s = default_timer() - start_time
22
+ if elapsed_s > slow_seconds:
23
+ callback(elapsed_s)
24
+ return r
25
+
26
+ return ty.cast(F, wrapper)
27
+
28
+ return deco
@@ -0,0 +1,30 @@
1
+ import threading
2
+ import typing as ty
3
+
4
+ FNone = ty.TypeVar("FNone", bound=ty.Callable[[], None])
5
+
6
+
7
+ class Once:
8
+ """Uses unique IDs to guarantee that an operation has only run
9
+ once in the lifetime of this object, and waits for it to be complete.
10
+
11
+ Is a potential source of memory leaks, since each event will be
12
+ stored until the entire Once object is disposed.
13
+ """
14
+
15
+ def __init__(self) -> None:
16
+ self.lock = threading.Lock()
17
+ self.events: ty.Dict[ty.Hashable, threading.Event] = dict()
18
+
19
+ def run_once(self, run_id: ty.Hashable, f: FNone) -> None:
20
+ needs_run = False
21
+ if run_id not in self.events:
22
+ with self.lock:
23
+ if run_id not in self.events:
24
+ needs_run = True
25
+ self.events[run_id] = threading.Event()
26
+ if needs_run:
27
+ f()
28
+ self.events[run_id].set()
29
+ else:
30
+ self.events[run_id].wait()
@@ -0,0 +1,32 @@
1
+ """Utility to make returning files via their Paths less confusing in the application code."""
2
+ import typing as ty
3
+ from contextlib import contextmanager
4
+ from pathlib import Path
5
+
6
+ from thds.core import lazy, scope, tmp
7
+
8
+
9
+ @contextmanager
10
+ def _temp_dir() -> ty.Iterator[Path]:
11
+ with tmp.temppath_same_fs() as p:
12
+ p.mkdir()
13
+ yield p
14
+
15
+
16
+ _FOREVER_SCOPE = scope.Scope("until_mops_exit")
17
+ _SINGLE_REMOTE_TMP_DIR = lazy.Lazy(lambda: _FOREVER_SCOPE.enter(_temp_dir()))
18
+ # there's really no obvious reason why you'd ever need more than one of these as long as
19
+ # you're giving your actual output files names, so we create one as a global for general
20
+ # use.
21
+
22
+
23
+ def new_tempdir() -> Path:
24
+ return _FOREVER_SCOPE.enter(_temp_dir())
25
+
26
+
27
+ def tempdir() -> Path:
28
+ """Lazily creates a global/shared temporary directory and returns it as a Path.
29
+
30
+ The files will get cleaned up when the interpreter exits.
31
+ """
32
+ return _SINGLE_REMOTE_TMP_DIR()
thds/mops/config.py ADDED
@@ -0,0 +1,60 @@
1
+ import os
2
+ import typing as ty
3
+ from pathlib import Path
4
+
5
+ from thds.core import config, log
6
+ from thds.mops._compat import tomllib
7
+
8
+ logger = log.getLogger(__name__)
9
+
10
+
11
+ def find_first_upward_mops_toml() -> ty.Optional[Path]:
12
+ current = Path.cwd()
13
+ while True:
14
+ try:
15
+ mops_path = current / ".mops.toml"
16
+ if mops_path.is_file() and os.access(mops_path, os.R_OK):
17
+ return mops_path.resolve()
18
+ if current == current.parent: # At root
19
+ return None
20
+ current = current.parent
21
+ except PermissionError:
22
+ return None
23
+
24
+
25
+ def first_found_config_file() -> ty.Optional[Path]:
26
+ paths = [
27
+ Path(os.environ.get("MOPS_CONFIG", "")),
28
+ find_first_upward_mops_toml(),
29
+ Path(f"{Path.home()}/.mops.toml"),
30
+ ]
31
+ for path in paths:
32
+ if path and path.is_file():
33
+ return path
34
+ return None
35
+
36
+
37
+ def load(config_file: ty.Optional[Path], name: str = "mops") -> ty.Dict[str, ty.Any]:
38
+ if config_file:
39
+ logger.debug("Loading %s config from %s", name, config_file)
40
+ return tomllib.load(open(config_file, "rb"))
41
+ return dict()
42
+
43
+
44
+ max_concurrent_network_ops = config.item("mops.max_concurrent_network_ops", 8, parse=int)
45
+ # 8 clients has been obtained experimentally via the `stress_test`
46
+ # application running on a Mac M1 laptop running 200 parallel 5 second
47
+ # tasks, though no significant difference was obtained between 5 and
48
+ # 20 clients. Running a similar stress test from your orchestrator may
49
+ # be a good idea if you are dealing with hundreds of micro (<20
50
+ # second) remote tasks.
51
+
52
+ open_files_limit = config.item("mops.resources.max_open_files", 10000)
53
+
54
+
55
+ def _filter_to_known_mops_config(config: ty.Dict[str, ty.Any]) -> ty.Dict[str, ty.Any]:
56
+ return {k: v for k, v in config.items() if k.startswith("mops.") or k.startswith("thds.mops")}
57
+
58
+
59
+ # load this after creating the config items
60
+ config.set_global_defaults(_filter_to_known_mops_config(load(first_found_config_file())))
@@ -0,0 +1,2 @@
1
+ from .keyfunc import nil_args # noqa
2
+ from .runner import KeyedLocalRunner # noqa
@@ -0,0 +1,14 @@
1
+ """Impure keyfunctions that are useful for common cases."""
2
+ import typing as ty
3
+
4
+ from ..pure.core.memo.keyfunc import Args, Keyfunc, Kwargs
5
+ from ..pure.core.memo.overwrite_params import parameter_overwriter
6
+
7
+
8
+ def nil_args(*named_parameters: str) -> Keyfunc:
9
+ def nil_args_impure_keyfunc(
10
+ c: ty.Callable, args: Args, kwargs: Kwargs
11
+ ) -> ty.Tuple[ty.Callable, Args, Kwargs]:
12
+ return c, *parameter_overwriter(c, {name: None for name in named_parameters})(args, kwargs)
13
+
14
+ return nil_args_impure_keyfunc
@@ -0,0 +1,73 @@
1
+ """Builds on top of the pure.MemoizingPicklingRunner to provide
2
+ impure, customizable memoization.
3
+ """
4
+
5
+ import typing as ty
6
+
7
+ from typing_extensions import ParamSpec
8
+
9
+ from thds.core import log
10
+ from thds.core.stack_context import StackContext
11
+
12
+ from ..pure.core.memo.keyfunc import ArgsOnlyKeyfunc, Keyfunc, autowrap_args_only_keyfunc
13
+ from ..pure.core.types import Args, Kwargs
14
+ from ..pure.core.uris import UriResolvable
15
+ from ..pure.pickling.mprunner import NO_REDIRECT, MemoizingPicklingRunner, Redirect
16
+ from ..pure.runner.simple_shims import samethread_shim
17
+
18
+ logger = log.getLogger(__name__)
19
+
20
+
21
+ R = ty.TypeVar("R")
22
+ P = ParamSpec("P")
23
+ F_Args_Kwargs = ty.Tuple[ty.Callable, Args, Kwargs]
24
+ _ORIGINAL_F_ARGS_KWARGS: StackContext[ty.Optional[F_Args_Kwargs]] = StackContext("f_args_kwargs", None)
25
+
26
+
27
+ def _perform_original_invocation(*_args: ty.Any, **_kwargs: ty.Any) -> ty.Any:
28
+ f_args_kwargs = _ORIGINAL_F_ARGS_KWARGS()
29
+ assert (
30
+ f_args_kwargs is not None
31
+ ), "_perform_original_invocation() must be called from within a runner"
32
+ f, args, kwargs = f_args_kwargs
33
+ return f(*args, **kwargs)
34
+
35
+
36
+ class KeyedLocalRunner(MemoizingPicklingRunner):
37
+ """The only purpose for using this is to reify/memoize your results.
38
+
39
+ Allows changing the memoization key, at the expense of
40
+ (theoretical) purity, since now we're memoizing on something you
41
+ made up, rather than something directly derived from the full set
42
+ of arguments passed to your function.
43
+
44
+ When the 'remote' side is reached, the original (args, kwargs)
45
+ will be passed to the result of change_function, or the original
46
+ function if change_function is the default (identity).
47
+
48
+ This runs the 'remote' function in the same process - your
49
+ function, if no memoized result is found, will execute in the same
50
+ thread where it was originally called. This runner will use the
51
+ return values of change_key_elements _only_ for the purposes of
52
+ keying the cache.
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ blob_storage_root: UriResolvable,
58
+ *,
59
+ keyfunc: ty.Union[ArgsOnlyKeyfunc, Keyfunc],
60
+ redirect: Redirect = NO_REDIRECT,
61
+ ):
62
+ self._impure_keyfunc = autowrap_args_only_keyfunc(keyfunc)
63
+ self._pre_pickle_redirect = redirect
64
+ super().__init__(
65
+ samethread_shim,
66
+ blob_storage_root,
67
+ redirect=lambda _f, _args, _kwargs: _perform_original_invocation,
68
+ )
69
+
70
+ def __call__(self, raw_func: ty.Callable[P, R], raw_args: P.args, raw_kwargs: P.kwargs) -> R:
71
+ actual_function_to_call = self._pre_pickle_redirect(raw_func, raw_args, raw_kwargs)
72
+ with _ORIGINAL_F_ARGS_KWARGS.set((actual_function_to_call, raw_args, raw_kwargs)):
73
+ return super().__call__(*self._impure_keyfunc(raw_func, raw_args, raw_kwargs))