thds.mops 3.9.20250721231027__py3-none-any.whl → 3.9.20250722163657__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

Files changed (37) hide show
  1. thds/mops/impure/runner.py +1 -1
  2. thds/mops/k8s/__init__.py +1 -3
  3. thds/mops/k8s/config.py +1 -1
  4. thds/mops/k8s/jobs.py +0 -4
  5. thds/mops/k8s/{_launch.py → launch.py} +57 -56
  6. thds/mops/k8s/logging.py +5 -37
  7. thds/mops/k8s/watch.py +62 -120
  8. thds/mops/pure/__init__.py +1 -2
  9. thds/mops/pure/_magic/sauce.py +3 -11
  10. thds/mops/pure/_magic/shims.py +2 -2
  11. thds/mops/pure/core/deferred_work.py +12 -15
  12. thds/mops/pure/core/entry/runner_registry.py +10 -1
  13. thds/mops/pure/core/lock/__init__.py +0 -1
  14. thds/mops/pure/core/lock/_acquire.py +2 -2
  15. thds/mops/pure/core/lock/maintain.py +3 -22
  16. thds/mops/pure/core/lock/write.py +19 -19
  17. thds/mops/pure/core/memo/__init__.py +1 -1
  18. thds/mops/pure/core/memo/results.py +4 -5
  19. thds/mops/pure/core/use_runner.py +7 -21
  20. thds/mops/pure/pickling/mprunner.py +14 -21
  21. thds/mops/pure/pickling/pickles.py +8 -19
  22. thds/mops/pure/pickling/remote.py +1 -3
  23. thds/mops/pure/runner/local.py +87 -58
  24. thds/mops/pure/runner/shim_builder.py +7 -7
  25. thds/mops/pure/runner/simple_shims.py +0 -7
  26. thds/mops/pure/runner/types.py +4 -15
  27. thds/mops/pure/tools/summarize/run_summary.py +8 -9
  28. {thds_mops-3.9.20250721231027.dist-info → thds_mops-3.9.20250722163657.dist-info}/METADATA +1 -1
  29. {thds_mops-3.9.20250721231027.dist-info → thds_mops-3.9.20250722163657.dist-info}/RECORD +32 -37
  30. thds/mops/k8s/batching.py +0 -198
  31. thds/mops/k8s/counts.py +0 -28
  32. thds/mops/k8s/job_future.py +0 -109
  33. thds/mops/k8s/uncertain_future.py +0 -160
  34. thds/mops/pure/runner/get_results.py +0 -106
  35. {thds_mops-3.9.20250721231027.dist-info → thds_mops-3.9.20250722163657.dist-info}/WHEEL +0 -0
  36. {thds_mops-3.9.20250721231027.dist-info → thds_mops-3.9.20250722163657.dist-info}/entry_points.txt +0 -0
  37. {thds_mops-3.9.20250721231027.dist-info → thds_mops-3.9.20250722163657.dist-info}/top_level.txt +0 -0
@@ -3,23 +3,15 @@
3
3
  # this could be _any_ kind of work, but is only uploads as of initial abstraction.
4
4
  # this basic idea was stolen from `pure.core.source` as a form of optimization for
5
5
  # uploading Sources and their hashrefs.
6
- import concurrent.futures
7
6
  import typing as ty
8
7
  from contextlib import contextmanager
9
8
 
10
9
  from thds import core
11
- from thds.core import config, refcount
12
10
  from thds.core.stack_context import StackContext
13
11
 
14
12
  _DEFERRED_INVOCATION_WORK: StackContext[
15
13
  ty.Optional[ty.Dict[ty.Hashable, ty.Callable[[], ty.Any]]]
16
14
  ] = StackContext("DEFERRED_INVOCATION_WORK", None)
17
- _MAX_DEFERRED_WORK_THREADS = config.item("max_deferred_work_threads", default=50, parse=int)
18
- _DEFERRED_WORK_THREADPOOL = refcount.Resource[concurrent.futures.ThreadPoolExecutor](
19
- lambda: concurrent.futures.ThreadPoolExecutor(
20
- max_workers=_MAX_DEFERRED_WORK_THREADS(), **core.concurrency.initcontext()
21
- )
22
- )
23
15
  logger = core.log.getLogger(__name__)
24
16
 
25
17
 
@@ -32,7 +24,15 @@ def open_context() -> ty.Iterator[None]:
32
24
  The idea is that you'd call perform_all() inside your Shim which transfers
33
25
  execution to a remote environment, but _not_ call it if you're transferring execution
34
26
  to a local environment, as the upload will not be needed.
27
+
28
+ This is not re-entrant. If this is called while the dictionary is non-empty, an
29
+ exception will be raised. This is only because I can think of no reason why anyone
30
+ would want it to be re-entrant, so it seems better to raise an error. If for some
31
+ reason re-entrancy were desired, we could just silently pass if the dictionary already
32
+ has deferred work.
35
33
  """
34
+ existing_work = _DEFERRED_INVOCATION_WORK()
35
+ assert existing_work is None, f"deferred work context is not re-entrant! {existing_work}"
36
36
  with _DEFERRED_INVOCATION_WORK.set(dict()):
37
37
  logger.debug("Opening deferred work context")
38
38
  yield
@@ -74,13 +74,10 @@ def perform_all() -> None:
74
74
  work_items = _DEFERRED_INVOCATION_WORK()
75
75
  if work_items:
76
76
  logger.info("Performing %s items of deferred work", len(work_items))
77
- with _DEFERRED_WORK_THREADPOOL.get() as thread_pool_executor:
78
- for key, _ in core.parallel.failfast(
79
- core.parallel.yield_all(dict(work_items).items(), executor_cm=thread_pool_executor)
80
- ):
81
- # consume iterator but don't keep results in memory.
82
- logger.debug("Popping deferred work %s from %s", key, id(work_items))
83
- work_items.pop(key)
77
+ for key, _result in core.parallel.yield_all(dict(work_items).items()):
78
+ # consume iterator but don't keep results in memory.
79
+ logger.debug("Popping deferred work %s from %s", key, id(work_items))
80
+ work_items.pop(key)
84
81
 
85
82
  logger.debug("Done performing deferred work on %s", id(work_items))
86
83
  assert not work_items, f"Some deferred work was not performed! {work_items}"
@@ -5,6 +5,14 @@ In practice we only have a single Runner type registered, the MemoizingPicklingR
5
5
 
6
6
  import typing as ty
7
7
 
8
+ from thds.core import stack_context
9
+
10
+ RUNNER_ENTRY_COUNT = stack_context.StackContext("runner_entry_count", 0)
11
+
12
+
13
+ def entry_count() -> int:
14
+ return RUNNER_ENTRY_COUNT()
15
+
8
16
 
9
17
  class EntryHandler(ty.Protocol):
10
18
  def __call__(self, *__args: str) -> ty.Any:
@@ -19,4 +27,5 @@ def register_entry_handler(name: str, mh: EntryHandler) -> None:
19
27
 
20
28
 
21
29
  def run_named_entry_handler(name: str, *args: str) -> None:
22
- ENTRY_HANDLERS[name](*args)
30
+ with RUNNER_ENTRY_COUNT.set(RUNNER_ENTRY_COUNT() + 1):
31
+ ENTRY_HANDLERS[name](*args)
@@ -2,7 +2,6 @@ from ._acquire import acquire # noqa: F401
2
2
  from .maintain import ( # noqa: F401
3
3
  CannotMaintainLock,
4
4
  launch_daemon_lock_maintainer,
5
- maintain_to_release,
6
5
  remote_lock_maintain,
7
6
  )
8
7
  from .types import LockAcquired # noqa: F401
@@ -32,7 +32,7 @@ from thds.core import log
32
32
  from . import _funcs
33
33
  from .read import get_writer_id, make_read_lockfile
34
34
  from .types import LockAcquired, LockContents
35
- from .write import LockEmitter, LockfileWriter
35
+ from .write import LockfileWriter, make_lock_contents
36
36
 
37
37
  logger = log.getLogger(__name__)
38
38
 
@@ -106,7 +106,7 @@ def acquire( # noqa: C901
106
106
  lockfile_writer = LockfileWriter(
107
107
  my_writer_id,
108
108
  lock_dir_uri,
109
- LockEmitter(my_writer_id, expire),
109
+ make_lock_contents(my_writer_id, expire),
110
110
  expire.total_seconds(),
111
111
  debug=debug,
112
112
  )
@@ -15,14 +15,12 @@ from datetime import datetime, timedelta
15
15
  from functools import partial
16
16
  from threading import Thread
17
17
 
18
- from thds.core import config, log
18
+ from thds.core import log
19
19
 
20
20
  from ._funcs import make_lock_uri
21
21
  from .read import get_writer_id, make_read_lockfile
22
22
  from .types import LockAcquired
23
- from .write import LockEmitter, LockfileWriter
24
-
25
- MAINTAIN_LOCKS = config.item("thds.mops.pure.local.maintain_locks", default=True, parse=config.tobool)
23
+ from .write import LockfileWriter, make_lock_contents
26
24
 
27
25
  logger = log.getLogger(__name__)
28
26
 
@@ -105,7 +103,7 @@ def remote_lock_maintain(lock_dir_uri: str, expected_writer_id: str = "") -> Loc
105
103
  lockfile_writer = LockfileWriter(
106
104
  current_writer_id,
107
105
  lock_dir_uri,
108
- LockEmitter(get_writer_id(lock_contents), timedelta(seconds=expire_s)),
106
+ make_lock_contents(get_writer_id(lock_contents), timedelta(seconds=expire_s)),
109
107
  expire_s,
110
108
  writer_name="remote",
111
109
  )
@@ -150,20 +148,3 @@ def launch_daemon_lock_maintainer(lock_acq: LockAcquired) -> ty.Callable[[], Non
150
148
  lock_acq.release()
151
149
 
152
150
  return stop_maintaining
153
-
154
-
155
- def maintain_to_release(
156
- acquired_lock: LockAcquired,
157
- ) -> ty.Callable[[], None]:
158
- """Depending on configuration, potentially start maintaining the lock.
159
-
160
- Return a callable that will release the lock when called.
161
- """
162
- if MAINTAIN_LOCKS():
163
- return launch_daemon_lock_maintainer(acquired_lock)
164
-
165
- return acquired_lock.release
166
-
167
-
168
- def no_maintain() -> None:
169
- MAINTAIN_LOCKS.set_global(False)
@@ -1,6 +1,5 @@
1
1
  import os
2
2
  import typing as ty
3
- from dataclasses import dataclass
4
3
  from datetime import datetime, timedelta
5
4
 
6
5
  from thds.core import hostname, log
@@ -11,37 +10,38 @@ from .types import LockContents
11
10
  logger = log.getLogger(__name__)
12
11
 
13
12
 
14
- @dataclass
15
- class LockEmitter:
16
- writer_id: str
17
- expire: timedelta
13
+ def make_lock_contents(
14
+ writer_id: str, expire: timedelta
15
+ ) -> ty.Callable[[ty.Optional[datetime]], LockContents]:
16
+ """Impure - Resets written_at to 'right now' to keep the lock 'live'."""
17
+ write_count = 0
18
+ first_written_at = ""
18
19
 
19
- write_count: int = 0
20
- first_written_at: str = ""
20
+ assert (
21
+ "/" not in writer_id
22
+ ), f"{writer_id} should not contain a slash - maybe you passed a URI instead?"
21
23
 
22
- def __post_init__(self) -> None:
23
- assert (
24
- "/" not in self.writer_id
25
- ), f"{self.writer_id} should not contain a slash - maybe you passed a URI instead?"
26
-
27
- def __call__(self, first_acquired_at: ty.Optional[datetime]) -> LockContents:
28
- self.write_count += 1
24
+ def lock_contents(first_acquired_at: ty.Optional[datetime]) -> LockContents:
25
+ nonlocal write_count, first_written_at
26
+ write_count += 1
29
27
  now = _funcs.utc_now().isoformat()
30
- self.first_written_at = self.first_written_at or now
28
+ first_written_at = first_written_at or now
31
29
 
32
30
  return {
33
- "writer_id": self.writer_id,
31
+ "writer_id": writer_id,
34
32
  "written_at": now,
35
- "expire_s": self.expire.total_seconds(),
33
+ "expire_s": expire.total_seconds(),
36
34
  # debug stuff:
37
- "write_count": self.write_count,
35
+ "write_count": write_count,
38
36
  "hostname": hostname.friendly(),
39
37
  "pid": str(os.getpid()),
40
- "first_written_at": self.first_written_at,
38
+ "first_written_at": first_written_at,
41
39
  "first_acquired_at": first_acquired_at.isoformat() if first_acquired_at else "",
42
40
  "released_at": "",
43
41
  }
44
42
 
43
+ return lock_contents
44
+
45
45
 
46
46
  class LockfileWriter:
47
47
  """The core purpose of this class is to allow setting of first_acquired_at immediately
@@ -1,4 +1,4 @@
1
- from . import calls, results, unique_name_for_function # noqa: F401
1
+ from . import calls, unique_name_for_function # noqa: F401
2
2
  from .function_memospace import ( # noqa
3
3
  args_kwargs_content_address,
4
4
  make_function_memospace,
@@ -76,8 +76,8 @@ class RequiredResultNotFound(Exception):
76
76
 
77
77
  def check_if_result_exists(
78
78
  memo_uri: str,
79
- check_for_exception: bool = False,
80
- before_raise: ty.Optional[ty.Callable[[], ty.Any]] = None,
79
+ rerun_excs: bool = False,
80
+ before_raise: ty.Callable[[], ty.Any] = lambda: None,
81
81
  ) -> ty.Union[None, Success, Error]:
82
82
  fs = lookup_blob_store(memo_uri)
83
83
  value_uri = fs.join(memo_uri, RESULT)
@@ -86,15 +86,14 @@ def check_if_result_exists(
86
86
 
87
87
  required_msg = _should_require_result(memo_uri)
88
88
  if required_msg: # might be custom or the default. either way it indicates a required result.
89
- if before_raise:
90
- before_raise()
89
+ before_raise()
91
90
  error_msg = f"Required a result for {ORANGE(memo_uri)} but that result was not found"
92
91
  # i'm tired of visually scanning for these memo_uris in logs.
93
92
  if required_msg != _NO_MSG:
94
93
  error_msg += f": {required_msg}"
95
94
  raise RequiredResultNotFound(error_msg, memo_uri)
96
95
 
97
- if not check_for_exception:
96
+ if rerun_excs:
98
97
  return None
99
98
 
100
99
  error_uri = fs.join(memo_uri, EXCEPTION)
@@ -4,29 +4,21 @@ You can transfer control to a Runner without this, but decorators are a Pythonic
4
4
  """
5
5
 
6
6
  import typing as ty
7
- from contextlib import contextmanager
8
7
  from functools import wraps
9
8
 
10
9
  from thds.core import log, stack_context
11
- from thds.mops._utils.names import full_name_and_callable
12
10
 
11
+ from .entry.runner_registry import entry_count
13
12
  from .types import Runner
14
13
 
15
- _USE_RUNNER_BYPASS = stack_context.StackContext[set[str]]("use_runner_bypass", set())
16
- # use this in a Runner remote entry point to allow the remote function call
17
- # to bypass any use_runner decorator. Also necessary in case somebody is doing advanced
18
- # things like using a remote runner to run a manifest of _other_ remote functions...
19
-
20
14
  logger = log.getLogger(__name__)
21
15
  F = ty.TypeVar("F", bound=ty.Callable)
16
+ FUNCTION_UNWRAP_COUNT = stack_context.StackContext("function_unwrap_count", 0)
22
17
 
23
18
 
24
- @contextmanager
25
- def unwrap_use_runner(f: F) -> ty.Iterator[None]:
26
- full_name, _ = full_name_and_callable(f)
27
- with _USE_RUNNER_BYPASS.set({full_name}):
28
- # this is a no-op if the function is not wrapped
29
- yield
19
+ def _is_runner_entry() -> bool:
20
+ """Function is being called in the context of a Runner."""
21
+ return entry_count() > FUNCTION_UNWRAP_COUNT()
30
22
 
31
23
 
32
24
  def use_runner(runner: Runner, skip: ty.Callable[[], bool] = lambda: False) -> ty.Callable[[F], F]:
@@ -42,15 +34,9 @@ def use_runner(runner: Runner, skip: ty.Callable[[], bool] = lambda: False) -> t
42
34
  def deco(f: F) -> F:
43
35
  @wraps(f)
44
36
  def __use_runner_wrapper(*args, **kwargs): # type: ignore
45
- def should_bypass() -> bool:
46
- if skip():
47
- return True
48
- full_name, _ = full_name_and_callable(f)
49
- return full_name in _USE_RUNNER_BYPASS()
50
-
51
- if should_bypass():
37
+ if _is_runner_entry() or skip():
52
38
  logger.debug("Calling function %s directly...", f)
53
- with unwrap_use_runner(f):
39
+ with FUNCTION_UNWRAP_COUNT.set(FUNCTION_UNWRAP_COUNT() + 1):
54
40
  return f(*args, **kwargs)
55
41
 
56
42
  logger.debug("Forwarding local function %s call to runner...", f)
@@ -11,7 +11,7 @@ from functools import partial
11
11
 
12
12
  from typing_extensions import Self
13
13
 
14
- from thds.core import cache, futures, log
14
+ from thds.core import cache, log
15
15
  from thds.core.stack_context import StackContext
16
16
 
17
17
  from ..._utils.once import Once
@@ -20,7 +20,7 @@ from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
20
20
  from ..core.serialize_paths import CoordinatingPathSerializer
21
21
  from ..core.types import Args, F, Kwargs, Serializer, T
22
22
  from ..runner import local, shim_builder
23
- from ..runner.types import FutureShim, Shim, ShimBuilder
23
+ from ..runner.types import Shim, ShimBuilder
24
24
  from ..tools.summarize import run_summary
25
25
  from . import _pickle, pickles, sha256_b64
26
26
 
@@ -32,7 +32,7 @@ _KWARGS_CONTEXT = StackContext[ty.Mapping]("args_kwargs", dict())
32
32
  logger = log.getLogger(__name__)
33
33
 
34
34
 
35
- def mp_shim(base_shim: ty.Union[Shim, FutureShim], shim_args: ty.Sequence[str]) -> ty.Any:
35
+ def mp_shim(base_shim: Shim, shim_args: ty.Sequence[str]) -> ty.Any:
36
36
  return base_shim((RUNNER_NAME, *shim_args))
37
37
 
38
38
 
@@ -48,7 +48,7 @@ class MemoizingPicklingRunner:
48
48
 
49
49
  def __init__(
50
50
  self,
51
- shim: ty.Union[ShimBuilder, Shim, FutureShim],
51
+ shim: ty.Union[ShimBuilder, Shim],
52
52
  blob_storage_root: uris.UriResolvable,
53
53
  *,
54
54
  rerun_exceptions: bool = True,
@@ -159,14 +159,20 @@ class MemoizingPicklingRunner:
159
159
  ),
160
160
  )
161
161
 
162
- def _wrap_shim_builder(self, func: F, args: Args, kwargs: Kwargs) -> ty.Union[Shim, FutureShim]:
162
+ def _wrap_shim_builder(self, func: F, args: Args, kwargs: Kwargs) -> Shim:
163
163
  base_shim = self._shim_builder(func, args, kwargs)
164
164
  return partial(mp_shim, base_shim)
165
165
 
166
- def submit(self, func: ty.Callable[..., T], *args: ty.Any, **kwargs: ty.Any) -> futures.PFuture[T]:
167
- """Now that mops supports Futures, we can have an 'inner' API that returns a PFuture.
166
+ def __call__(self, func: ty.Callable[..., T], args: Args, kwargs: Kwargs) -> T:
167
+ """Return result of running this function remotely via the shim.
168
+
169
+ Passes data to shim process via pickles in a Blob Store.
168
170
 
169
- We are trying to mimic the interface that concurrent.futures.Executors provide.
171
+ May return cached (previously-computed) results found via the
172
+ derived function memo URI, which contains the determinstic
173
+ hashed bytes of all the function arguments, but also
174
+ additional namespacing including pipeline_id as documented
175
+ in memo.function_memospace.py.
170
176
  """
171
177
  logger.debug("Preparing to run function via remote shim")
172
178
  with _ARGS_CONTEXT.set(args), _KWARGS_CONTEXT.set(kwargs):
@@ -186,16 +192,3 @@ class MemoizingPicklingRunner:
186
192
  args,
187
193
  kwargs,
188
194
  )
189
-
190
- def __call__(self, func: ty.Callable[..., T], args: Args, kwargs: Kwargs) -> T:
191
- """Return result of running this function remotely via the shim.
192
-
193
- Passes data to shim process via pickles in a Blob Store.
194
-
195
- May return cached (previously-computed) results found via the
196
- derived function memo URI, which contains the determinstic
197
- hashed bytes of all the function arguments, but also
198
- additional namespacing including pipeline_id as documented
199
- in memo.function_memospace.py.
200
- """
201
- return self.submit(func, *args, **kwargs).result()
@@ -60,30 +60,19 @@ class PicklableFunction:
60
60
  def __repr__(self) -> str:
61
61
  return str(self)
62
62
 
63
- def _resolve(self) -> ty.Callable:
64
- """Resolve the function if it hasn't been resolved yet."""
65
- if self.f is None:
66
- logger.debug(f"Dynamically importing function {str(self)}")
67
- if self.fmod == "__main__":
68
- self.f = get_main_module_function(self.fname) # type: ignore
69
- else:
70
- mod = importlib.import_module(self.fmod)
71
- self.f = getattr(mod, self.fname)
72
- assert self.f is not None
73
- return self.f
74
- return self.f
75
-
76
- @property
77
- def func(self) -> ty.Callable:
78
- """This is a property so we aren't ruining backward pickle compatibility."""
79
- return self._resolve()
80
-
81
63
  @property
82
64
  def __name__(self) -> str:
83
65
  return self.fname
84
66
 
85
67
  def __call__(self, *args: ty.Any, **kwargs: ty.Any) -> ty.Any:
86
- return self._resolve()(*args, **kwargs)
68
+ logger.debug(f"Dynamically importing function {str(self)}")
69
+ if self.fmod == "__main__":
70
+ self.f = get_main_module_function(self.fname) # type: ignore
71
+ else:
72
+ mod = importlib.import_module(self.fmod)
73
+ self.f = getattr(mod, self.fname)
74
+ assert self.f
75
+ return self.f(*args, **kwargs)
87
76
 
88
77
 
89
78
  class UnpickleSimplePickleFromUri:
@@ -12,7 +12,6 @@ from ..core.memo import results
12
12
  from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
13
13
  from ..core.serialize_paths import CoordinatingPathSerializer
14
14
  from ..core.types import Args, BlobStore, Kwargs, T
15
- from ..core.use_runner import unwrap_use_runner
16
15
  from ..runner import strings
17
16
  from . import _pickle, mprunner, pickles, sha256_b64
18
17
 
@@ -124,8 +123,7 @@ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
124
123
  def do_work_return_result() -> object:
125
124
  # ONLY failures in this code should transmit an EXCEPTION
126
125
  # back to the orchestrator side.
127
- with unwrap_use_runner(func):
128
- return func(*args, **kwargs)
126
+ return func(*args, **kwargs)
129
127
 
130
128
  route_return_value_or_exception(
131
129
  _ResultExcWithMetadataChannel(