thds.mops 3.6.20250219172032__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

Files changed (111) hide show
  1. thds/mops/__about__.py +8 -0
  2. thds/mops/__init__.py +3 -0
  3. thds/mops/_compat.py +6 -0
  4. thds/mops/_utils/__init__.py +0 -0
  5. thds/mops/_utils/colorize.py +110 -0
  6. thds/mops/_utils/config_tree.py +167 -0
  7. thds/mops/_utils/exception.py +16 -0
  8. thds/mops/_utils/locked_cache.py +78 -0
  9. thds/mops/_utils/names.py +23 -0
  10. thds/mops/_utils/on_slow.py +28 -0
  11. thds/mops/_utils/once.py +30 -0
  12. thds/mops/_utils/temp.py +32 -0
  13. thds/mops/config.py +60 -0
  14. thds/mops/impure/__init__.py +2 -0
  15. thds/mops/impure/keyfunc.py +14 -0
  16. thds/mops/impure/runner.py +73 -0
  17. thds/mops/k8s/__init__.py +27 -0
  18. thds/mops/k8s/_shared.py +3 -0
  19. thds/mops/k8s/apply_yaml.py +22 -0
  20. thds/mops/k8s/auth.py +49 -0
  21. thds/mops/k8s/config.py +37 -0
  22. thds/mops/k8s/container_registry.py +14 -0
  23. thds/mops/k8s/jobs.py +57 -0
  24. thds/mops/k8s/launch.py +234 -0
  25. thds/mops/k8s/logging.py +239 -0
  26. thds/mops/k8s/namespace.py +17 -0
  27. thds/mops/k8s/node_selection.py +58 -0
  28. thds/mops/k8s/retry.py +75 -0
  29. thds/mops/k8s/too_old_resource_version.py +42 -0
  30. thds/mops/k8s/tools/krsync.py +50 -0
  31. thds/mops/k8s/tools/krsync.sh +22 -0
  32. thds/mops/k8s/wait_job.py +72 -0
  33. thds/mops/k8s/warn_image_backoff.py +63 -0
  34. thds/mops/k8s/watch.py +266 -0
  35. thds/mops/meta.json +8 -0
  36. thds/mops/parallel.py +36 -0
  37. thds/mops/pure/__init__.py +43 -0
  38. thds/mops/pure/_magic/__init__.py +0 -0
  39. thds/mops/pure/_magic/api.py +114 -0
  40. thds/mops/pure/_magic/sauce.py +152 -0
  41. thds/mops/pure/_magic/shims.py +34 -0
  42. thds/mops/pure/adls/__init__.py +1 -0
  43. thds/mops/pure/adls/_files.py +22 -0
  44. thds/mops/pure/adls/blob_store.py +185 -0
  45. thds/mops/pure/adls/output_fqn.py +17 -0
  46. thds/mops/pure/core/__init__.py +0 -0
  47. thds/mops/pure/core/content_addressed.py +31 -0
  48. thds/mops/pure/core/deferred_work.py +83 -0
  49. thds/mops/pure/core/entry/__init__.py +2 -0
  50. thds/mops/pure/core/entry/main.py +47 -0
  51. thds/mops/pure/core/entry/route_result.py +66 -0
  52. thds/mops/pure/core/entry/runner_registry.py +31 -0
  53. thds/mops/pure/core/file_blob_store.py +120 -0
  54. thds/mops/pure/core/lock/__init__.py +7 -0
  55. thds/mops/pure/core/lock/_acquire.py +192 -0
  56. thds/mops/pure/core/lock/_funcs.py +37 -0
  57. thds/mops/pure/core/lock/cli.py +73 -0
  58. thds/mops/pure/core/lock/maintain.py +150 -0
  59. thds/mops/pure/core/lock/read.py +39 -0
  60. thds/mops/pure/core/lock/types.py +37 -0
  61. thds/mops/pure/core/lock/write.py +136 -0
  62. thds/mops/pure/core/memo/__init__.py +6 -0
  63. thds/mops/pure/core/memo/function_memospace.py +267 -0
  64. thds/mops/pure/core/memo/keyfunc.py +53 -0
  65. thds/mops/pure/core/memo/overwrite_params.py +61 -0
  66. thds/mops/pure/core/memo/results.py +103 -0
  67. thds/mops/pure/core/memo/unique_name_for_function.py +70 -0
  68. thds/mops/pure/core/metadata.py +230 -0
  69. thds/mops/pure/core/output_naming.py +52 -0
  70. thds/mops/pure/core/partial.py +15 -0
  71. thds/mops/pure/core/pipeline_id.py +62 -0
  72. thds/mops/pure/core/pipeline_id_mask.py +79 -0
  73. thds/mops/pure/core/script_support.py +25 -0
  74. thds/mops/pure/core/serialize_big_objs.py +73 -0
  75. thds/mops/pure/core/serialize_paths.py +149 -0
  76. thds/mops/pure/core/source.py +291 -0
  77. thds/mops/pure/core/types.py +142 -0
  78. thds/mops/pure/core/uris.py +81 -0
  79. thds/mops/pure/core/use_runner.py +47 -0
  80. thds/mops/pure/joblib/__init__.py +1 -0
  81. thds/mops/pure/joblib/backend.py +81 -0
  82. thds/mops/pure/joblib/batching.py +67 -0
  83. thds/mops/pure/pickling/__init__.py +3 -0
  84. thds/mops/pure/pickling/_pickle.py +193 -0
  85. thds/mops/pure/pickling/memoize_only.py +22 -0
  86. thds/mops/pure/pickling/mprunner.py +173 -0
  87. thds/mops/pure/pickling/pickles.py +149 -0
  88. thds/mops/pure/pickling/remote.py +145 -0
  89. thds/mops/pure/pickling/sha256_b64.py +71 -0
  90. thds/mops/pure/runner/__init__.py +0 -0
  91. thds/mops/pure/runner/local.py +239 -0
  92. thds/mops/pure/runner/shim_builder.py +25 -0
  93. thds/mops/pure/runner/simple_shims.py +21 -0
  94. thds/mops/pure/runner/strings.py +1 -0
  95. thds/mops/pure/runner/types.py +28 -0
  96. thds/mops/pure/tools/__init__.py +0 -0
  97. thds/mops/pure/tools/history.py +35 -0
  98. thds/mops/pure/tools/inspect.py +372 -0
  99. thds/mops/pure/tools/sha256_b64_addressed.py +40 -0
  100. thds/mops/pure/tools/stress.py +63 -0
  101. thds/mops/pure/tools/summarize/__init__.py +4 -0
  102. thds/mops/pure/tools/summarize/cli.py +293 -0
  103. thds/mops/pure/tools/summarize/run_summary.py +143 -0
  104. thds/mops/py.typed +0 -0
  105. thds/mops/testing/__init__.py +0 -0
  106. thds/mops/testing/deferred_imports.py +81 -0
  107. thds.mops-3.6.20250219172032.dist-info/METADATA +42 -0
  108. thds.mops-3.6.20250219172032.dist-info/RECORD +111 -0
  109. thds.mops-3.6.20250219172032.dist-info/WHEEL +5 -0
  110. thds.mops-3.6.20250219172032.dist-info/entry_points.txt +7 -0
  111. thds.mops-3.6.20250219172032.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,35 @@
1
+ """Find out how long a run took by looking at outputs to ADLS."""
2
+
3
+ import typing as ty
4
+ from datetime import timezone
5
+
6
+ from thds.adls.global_client import get_global_fs_client
7
+
8
+ from ..adls._files import yield_files
9
+
10
+
11
+ def summarize(sa: str, container: str, pipeline_root_dir: str) -> ty.Dict[str, ty.Any]:
12
+ times = list()
13
+ durations = list()
14
+ total_functions = 0
15
+ for azure_file in yield_files(get_global_fs_client(sa, container), pipeline_root_dir):
16
+ if azure_file.name.endswith("invocation"):
17
+ total_functions += 1
18
+ times.append(azure_file.creation_time)
19
+ last_modified = azure_file.last_modified.replace(tzinfo=timezone.utc)
20
+ durations.append(last_modified - azure_file.creation_time)
21
+
22
+ durations = sorted(durations)
23
+ times = sorted(times)
24
+
25
+ start = times[0]
26
+ end = times[-1]
27
+
28
+ max_duration = durations[-1]
29
+ return dict(
30
+ start=start,
31
+ end=end,
32
+ duration=end - start,
33
+ slowest_file_upload=max_duration,
34
+ total_functions=total_functions,
35
+ )
@@ -0,0 +1,372 @@
1
+ """Inspect mops control files and unpickle them for debugging.
2
+
3
+ Note that this really only works with ADLS-like Blob Stores, and
4
+ only with the MemoizingPicklingRunner, which is the only implementation
5
+ we have as of 2024-09-24, and will probably be the only implementation ever...
6
+ but if you're reading this in the distant future - those are its limitations.
7
+ """
8
+
9
+ import argparse
10
+ import functools
11
+ import os
12
+ import re
13
+ import subprocess
14
+ import typing as ty
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from pprint import pprint
18
+
19
+ from thds import adls
20
+ from thds.core import log, scope, tmp
21
+ from thds.mops.parallel import Thunk
22
+ from thds.mops.pure.core import uris
23
+ from thds.mops.pure.core.memo import results
24
+ from thds.mops.pure.pickling._pickle import (
25
+ CallableUnpickler,
26
+ read_metadata_and_object,
27
+ unfreeze_args_kwargs,
28
+ )
29
+ from thds.mops.pure.pickling.pickles import Invocation
30
+ from thds.mops.pure.runner import strings
31
+
32
+ logger = log.getLogger(__name__)
33
+
34
+
35
+ class _MopsInspectPrettyPartial(functools.partial):
36
+ def __repr__(self) -> str:
37
+ return f"partial({self.func.__name__}, {self.args}, {self.keywords})"
38
+
39
+ def __rich_repr__(self) -> ty.Iterable[ty.Tuple[str, ty.Any]]:
40
+ """I don't much like how partial does its repr. Especially with nested partials,
41
+ it becomes almost impossible to follow.
42
+ """
43
+ yield "function", self.func.__name__
44
+ yield "args", self.args
45
+ yield "keywords", self.keywords
46
+
47
+
48
+ class PartialViewingUnpickler(CallableUnpickler):
49
+ def find_class(self, module: str, name: str) -> ty.Any:
50
+ if module == "functools" and name == "partial":
51
+ return _MopsInspectPrettyPartial
52
+ return super().find_class(module, name)
53
+
54
+
55
+ def _unpickle_object_for_debugging(uri: str) -> ty.Any:
56
+ try:
57
+ if uri.endswith("/" + strings.INVOCATION):
58
+ _no_header, invoc_raw = read_metadata_and_object(strings.INVOCATION, uri)
59
+ invoc = ty.cast(Invocation, invoc_raw)
60
+ args, kwargs = unfreeze_args_kwargs(invoc.args_kwargs_pickle, PartialViewingUnpickler)
61
+ return Thunk(getattr(invoc, "f", None) or invoc.func, *args, **kwargs)
62
+ header, obj = read_metadata_and_object("output", uri)
63
+ return obj, header
64
+ except ImportError as ie:
65
+ logger.error(f"Could not import the module ({ie}) needed to unpickle the object.")
66
+ logger.error("Try re-running this tool in the environment where the above module is available.")
67
+ raise
68
+
69
+
70
+ def _resolved_uri(uri: str) -> str:
71
+ if not uri:
72
+ return ""
73
+ if fqn := adls.uri.resolve_uri(uri):
74
+ return str(fqn)
75
+ return uri
76
+
77
+
78
+ _KNOWN_CONTROL_FILES = [strings.INVOCATION, results.RESULT, results.EXCEPTION]
79
+
80
+ # prefix with forward-slash because these live in a blob store 'directory'
81
+
82
+
83
+ @dataclass
84
+ class IRE:
85
+ invocation: ty.Any
86
+ result: ty.Any # a.k.a. return_value
87
+ exception: ty.Any
88
+
89
+
90
+ _NOTHING = object()
91
+
92
+
93
+ def _control_uri(uri: str) -> str:
94
+ for control_file in _KNOWN_CONTROL_FILES:
95
+ if uri.endswith("/" + control_file):
96
+ return control_file
97
+ return ""
98
+
99
+
100
+ @scope.bound
101
+ def get_control_file(uri: str) -> ty.Any:
102
+ """Returns _NOTHING if 'normal' errors occur."""
103
+ try:
104
+ uri = _resolved_uri(uri)
105
+ except Exception as e:
106
+ logger.error(f"Error while resolving {uri}: {e}")
107
+ return _NOTHING
108
+
109
+ if not _control_uri(uri):
110
+ fs = uris.lookup_blob_store(uri)
111
+ logger.debug(f"Attempting to fetch all control files for {uri}")
112
+ return IRE(**{cf: get_control_file(fs.join(uri, cf)) for cf in _KNOWN_CONTROL_FILES})
113
+
114
+ has_storage_root = bool(uris.ACTIVE_STORAGE_ROOT())
115
+ try:
116
+ scope.enter(uris.ACTIVE_STORAGE_ROOT.set(uris.get_root(uri)))
117
+ return _unpickle_object_for_debugging(uri)
118
+ except Exception as e:
119
+ if uris.lookup_blob_store(uri).is_blob_not_found(e):
120
+ if has_storage_root or uri not in str(e):
121
+ logger.warning(str(e))
122
+ return None
123
+ logger.exception("Unexpected error while unpickling the object.")
124
+ raise
125
+
126
+
127
+ def _embed(o: object) -> None:
128
+ print('\nObject will be available as "o". Perform embedded URI fetches with "get_control_file"\n')
129
+ try:
130
+ __import__("IPython").embed()
131
+ except ImportError:
132
+ print("IPython not found, falling back to standard Python shell.")
133
+ import code
134
+
135
+ code.interact(local=locals())
136
+
137
+
138
+ def _pprint(obj: object, file: ty.Any = None, uri: str = "") -> None:
139
+ if uri:
140
+ print(uri, file=file)
141
+
142
+ try:
143
+ from rich import console, pretty # type: ignore[import]
144
+
145
+ if file:
146
+ console.Console(file=file, color_system=None).print(
147
+ pretty.Pretty(
148
+ obj, # highlighter=lambda x: x if file else None
149
+ )
150
+ )
151
+ else:
152
+ pretty.pprint(obj)
153
+ except ModuleNotFoundError:
154
+ pprint(obj, indent=4, width=60, sort_dicts=False, stream=file)
155
+
156
+
157
+ def inspect(uri: str, embed: bool = False) -> ty.Any:
158
+ obj = get_control_file(uri)
159
+ if obj is _NOTHING:
160
+ return
161
+
162
+ if embed:
163
+ _embed(obj)
164
+ else:
165
+ print()
166
+ _pprint(obj)
167
+ return obj
168
+
169
+
170
+ def inspect_and_log(memo_uri: str) -> None:
171
+ inspect(memo_uri)
172
+ logger.error(
173
+ "A required result was not found."
174
+ " You can compare the above output with other invocations"
175
+ f" by running `mops-inspect {memo_uri}`"
176
+ " in your local Python environment."
177
+ )
178
+
179
+
180
+ @dataclass
181
+ class Ignores:
182
+ permanent_ignores_file: Path
183
+ known_ignores: ty.Set[str]
184
+
185
+ def __post_init__(self) -> None:
186
+ self.permanent_ignores_file.parent.mkdir(parents=True, exist_ok=True)
187
+ if not self.permanent_ignores_file.exists():
188
+ self.permanent_ignores_file.touch()
189
+ self.known_ignores = set(filter(None, open(self.permanent_ignores_file).read().splitlines()))
190
+
191
+ def ignore_uri(self, ignore_uri: str) -> None:
192
+ self.known_ignores.add(ignore_uri)
193
+ # possible race condition here if multiple runs of mops-inspect are happening
194
+ # in parallel?
195
+ with open(self.permanent_ignores_file, "a") as wf:
196
+ wf.write(ignore_uri + "\n")
197
+
198
+ def __contains__(self, uri: str) -> bool:
199
+ return uri in self.known_ignores
200
+
201
+
202
+ @dataclass
203
+ class Matches:
204
+ must_match: ty.List[str]
205
+ must_not_match: ty.List[str]
206
+
207
+ def add_regex(self, regex: str) -> ty.Literal["ignore", "match"]:
208
+ """These are not permanent"""
209
+ if regex.startswith("!"):
210
+ self.must_not_match.append(regex[1:])
211
+ return "ignore"
212
+
213
+ self.must_match.append(regex)
214
+ return "match"
215
+
216
+ def matches(self, ire_str: str) -> bool:
217
+ for regex in self.must_not_match:
218
+ if re.search(regex, ire_str):
219
+ logger.debug('Ignoring because of regex: "%s"', regex)
220
+ return False
221
+
222
+ if not self.must_match:
223
+ logger.debug("No regexes must match")
224
+ return True
225
+
226
+ all_match = all(re.search(regex, ire_str) for regex in self.must_match)
227
+ if all_match:
228
+ logger.debug("Matches all required regexes")
229
+ return True
230
+
231
+ logger.debug("Does not match all of the %d required regexes.", len(self.must_match))
232
+ return False
233
+
234
+
235
+ _IGNORES = Ignores(Path("~/.mops-inspect-ignores").expanduser(), set())
236
+ _MATCHES = Matches(list(), list())
237
+ DIFF_TOOL = os.environ.get("DIFF_TOOL") or "difft" # nicer diffs by default
238
+
239
+
240
+ def _check_diff_tool() -> None:
241
+ global DIFF_TOOL
242
+ try:
243
+ subprocess.run([DIFF_TOOL, "--version"], check=True, capture_output=True)
244
+ except subprocess.CalledProcessError:
245
+ logger.warning("You may want to `brew install difft` for nicer diffs.")
246
+ DIFF_TOOL = "diff"
247
+
248
+
249
+ def _run_diff_tool(path_old: Path, path_new: Path) -> None:
250
+ subprocess.run([DIFF_TOOL, str(path_old), str(path_new)], check=True)
251
+
252
+
253
+ def _write_ire_to_path(ire: IRE, path: Path, uri: str) -> None:
254
+ with open(path, "w") as wf:
255
+ _pprint(ire, file=wf, uri=uri)
256
+
257
+
258
+ def _diff_memospace(uri: str, new_control: IRE) -> None:
259
+ """Diff all siblings in the memospace against the new invocation.
260
+
261
+ Ignore any that have been ignored previously.
262
+ """
263
+ # this code operates on the assumption that you've provided
264
+ # it with the 'new' invocation, and you're trying to figure out
265
+ # what is 'new' as compared to other 'existing' (old) invocations.
266
+ # Therefore, the 'green' highlighted text will be the 'new' invocation,
267
+ # and the red will be all the old ones that we loop over below.
268
+ fs = uris.lookup_blob_store(uri)
269
+
270
+ control_type = _control_uri(uri)
271
+ memospace_uri = fs.join(*fs.split(uri)[: -2 if control_type else -1])
272
+ # go up two levels to find the memospace if necessary.
273
+
274
+ path_new = scope.enter(tmp.temppath_same_fs())
275
+ _write_ire_to_path(new_control, path_new, uri)
276
+
277
+ logger.info(f"Diffing against all siblings in the memospace {memospace_uri}")
278
+
279
+ def sibling_menu(sibling_uri: str) -> None:
280
+ choice = input(
281
+ "Enter to continue, Ctrl-C to quit, `i` to permanently ignore this URI,"
282
+ " or type a regex to filter future results (prefix with ! to find non-matches, otherwise will find matches: "
283
+ )
284
+ if "i" == choice.lower():
285
+ _IGNORES.ignore_uri(sibling_uri)
286
+ elif choice:
287
+ regex = choice
288
+ type = _MATCHES.add_regex(regex)
289
+ logger.info(f"Added <{type}> regex /{regex}/")
290
+
291
+ sibling_uris = fs.list(memospace_uri) # type: ignore
292
+ found_siblings = False
293
+
294
+ for sibling_uri in sibling_uris:
295
+ if uri.startswith(sibling_uri):
296
+ continue
297
+
298
+ found_siblings = True
299
+ sibling_uri = sibling_uri.rstrip("/")
300
+
301
+ if sibling_uri in _IGNORES:
302
+ continue
303
+
304
+ full_uri = fs.join(sibling_uri, control_type)
305
+ control_sibling = get_control_file(full_uri)
306
+ with tmp.temppath_same_fs() as path_sibling:
307
+ _write_ire_to_path(control_sibling, path_sibling, full_uri)
308
+ if not _MATCHES.matches(path_sibling.read_text()):
309
+ continue
310
+
311
+ _run_diff_tool(path_sibling, path_new)
312
+
313
+ sibling_menu(sibling_uri)
314
+
315
+ if not found_siblings:
316
+ logger.warning(
317
+ f"No memospace siblings found for '{memospace_uri}'"
318
+ " - check your pipeline ID, function-logic-key (if any),"
319
+ " and whether you're running in prod or dev."
320
+ )
321
+
322
+
323
+ @scope.bound
324
+ def _inspect_uri(uri: str, diff_memospace: bool, embed: bool) -> None:
325
+ uri = _resolved_uri(uri)
326
+ ire_curr = inspect(uri, embed) # print the main uri
327
+
328
+ if diff_memospace:
329
+ _diff_memospace(uri, ire_curr)
330
+
331
+
332
+ def main() -> None:
333
+ parser = argparse.ArgumentParser(description=__doc__)
334
+ parser.add_argument(
335
+ "uri",
336
+ type=str,
337
+ help="The URI of the first object to inspect. Can be adls:// or https:// or even abfss://",
338
+ )
339
+ parser.add_argument(
340
+ "--diff-memospace",
341
+ "-d",
342
+ action="store_true",
343
+ help=(
344
+ "Find the diff between the invocation at the provided URI,"
345
+ " and all other invocations that match the same function memospace."
346
+ " This will only work if your Blob Store is capable of listing files."
347
+ " It is highly recommended that you `brew install difftastic` to get more precise diffs."
348
+ ),
349
+ )
350
+ parser.add_argument(
351
+ "--loop",
352
+ action="store_true",
353
+ help="Keep prompting for URIs to inspect - basically just an embedded while loop.",
354
+ )
355
+ parser.add_argument("--embed", action="store_true", help="Embed an IPython shell after inspection.")
356
+ args = parser.parse_args()
357
+ args.uri = args.uri.rstrip("/")
358
+ if args.diff_memospace:
359
+ _check_diff_tool()
360
+
361
+ _inspect_uri(args.uri, args.diff_memospace, args.embed)
362
+
363
+ if args.loop:
364
+ prompt = "\nEnter another URI to inspect, or empty string to exit: "
365
+ uri = input(prompt)
366
+ while uri:
367
+ _inspect_uri(uri, args.diff_memospace, args.embed)
368
+ uri = input(prompt)
369
+
370
+
371
+ if __name__ == "__main__":
372
+ main()
@@ -0,0 +1,40 @@
1
+ """Upload a file to the location under a given storage root where a
2
+ pathlib.Path would be put by the MemoizingPicklingFunctionRunner.
3
+ """
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+ from thds.adls.defaults import mops_root
9
+
10
+ from ..._utils.once import Once
11
+ from ..core import uris
12
+ from ..core.serialize_paths import CoordinatingPathSerializer, human_sha256b64_file_at_paths
13
+ from ..pickling import sha256_b64
14
+
15
+
16
+ def main() -> None:
17
+ parser = argparse.ArgumentParser(description=__doc__)
18
+
19
+ parser.add_argument("file", help="Must be an actual file")
20
+ parser.add_argument(
21
+ "--upload-root-uri",
22
+ "-u",
23
+ help=f"Actually upload, using this URI as storage root. Example: {mops_root()}",
24
+ )
25
+
26
+ args = parser.parse_args()
27
+
28
+ the_path = Path(args.file)
29
+ human_hash = human_sha256b64_file_at_paths(the_path)
30
+
31
+ print(human_hash)
32
+
33
+ if args.upload_root_uri:
34
+ storage_root = args.upload_root_uri.rstrip("/") + "/"
35
+ with uris.ACTIVE_STORAGE_ROOT.set(storage_root):
36
+ CoordinatingPathSerializer(sha256_b64.Sha256B64PathStream(), Once())(the_path)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ main()
@@ -0,0 +1,63 @@
1
+ import subprocess
2
+ import time
3
+ import typing as ty
4
+ from timeit import default_timer
5
+
6
+ from thds.adls import defaults
7
+ from thds.core.log import getLogger
8
+ from thds.mops._utils.colorize import colorized
9
+ from thds.mops.config import max_concurrent_network_ops
10
+ from thds.mops.parallel import Thunk, parallel_yield_results
11
+ from thds.mops.pure import MemoizingPicklingRunner, use_runner
12
+
13
+ BROWN = colorized(fg="brown", bg="black")
14
+
15
+ logger = getLogger(__name__)
16
+
17
+
18
+ def _subprocess_remote(args_list: ty.Sequence[str]) -> None:
19
+ logger.info(f"Invoking 'remote' runner with args {args_list}")
20
+ subprocess.run(args_list)
21
+ logger.info("Completed 'remote' runner")
22
+
23
+
24
+ runner = MemoizingPicklingRunner(_subprocess_remote, defaults.mops_root)
25
+ adls_shim = use_runner(runner)
26
+
27
+
28
+ @adls_shim
29
+ def run_and_sleep(i: int, data: ty.List[float], sleep: int) -> float:
30
+ """Runs 'remotely' - arguments are pickled and passed via ADLS; result is returned via ADLS."""
31
+ the_sum = sum(data)
32
+ print(BROWN(f"remote {i} - sum: {the_sum} - sleeping!"))
33
+ time.sleep(sleep)
34
+ return the_sum
35
+
36
+
37
+ def stress(max_clients: int, n: int, sleep: int) -> None:
38
+ """MemoizingPicklingRunner will perform 4 local ADLS operations (1 file
39
+ exists, 1 push, 1 file exists and 1 file pull) per task. The
40
+ remote runner will perform 2 more ADLS operations, which in this
41
+ case will also be occurring on the local machine, using a
42
+ different client per runner. This gives a total of 6 ADLS
43
+ operations for this test, whereas a properly remote worker would
44
+ allow those 2 remote operations to be offloaded.
45
+
46
+ The computation by definition takes N seconds, but can in theory
47
+ be perfectly parallelized, so this gives some idea of how the
48
+ overhead of launching and retrieving task results increases as the
49
+ length of the task decreases relative to the number of total tasks.
50
+ """
51
+ start = default_timer()
52
+ with max_concurrent_network_ops.set_local(max_clients):
53
+ tasks = [Thunk(run_and_sleep, i, list(range(i * n, (i + 1) * n)), sleep) for i in range(n)]
54
+
55
+ assert len(list(parallel_yield_results(tasks))) == n
56
+
57
+ total = default_timer() - start
58
+ print(
59
+ f"With max_clients {max_clients}; n {n}; sleep {sleep}, took {total:.1f} seconds,"
60
+ f" which is {total/n:.2f} seconds per task."
61
+ " Prior experiments have found this to stabilize with increasing N in the vicinity of 0.2 seconds"
62
+ " of overhead per task as long as the # of tasks dominates (>=20x) the length (in seconds) of the tasks."
63
+ )
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()