thds.mops 3.9.20250730184538__py3-none-any.whl → 3.9.20250805191550__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thds/mops/pure/core/lock/__init__.py +3 -2
- thds/mops/pure/core/lock/_acquire.py +6 -1
- thds/mops/pure/core/lock/maintain.py +139 -63
- thds/mops/pure/pickling/remote.py +51 -12
- thds/mops/pure/runner/local.py +11 -1
- {thds_mops-3.9.20250730184538.dist-info → thds_mops-3.9.20250805191550.dist-info}/METADATA +1 -1
- {thds_mops-3.9.20250730184538.dist-info → thds_mops-3.9.20250805191550.dist-info}/RECORD +10 -10
- {thds_mops-3.9.20250730184538.dist-info → thds_mops-3.9.20250805191550.dist-info}/WHEEL +0 -0
- {thds_mops-3.9.20250730184538.dist-info → thds_mops-3.9.20250805191550.dist-info}/entry_points.txt +0 -0
- {thds_mops-3.9.20250730184538.dist-info → thds_mops-3.9.20250805191550.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from ._acquire import acquire # noqa: F401
|
|
2
2
|
from .maintain import ( # noqa: F401
|
|
3
3
|
CannotMaintainLock,
|
|
4
|
-
|
|
4
|
+
LockWasStolenError,
|
|
5
|
+
add_lock_to_maintenance_daemon,
|
|
5
6
|
maintain_to_release,
|
|
6
|
-
|
|
7
|
+
make_remote_lock_writer,
|
|
7
8
|
)
|
|
8
9
|
from .types import LockAcquired # noqa: F401
|
|
@@ -101,7 +101,12 @@ def acquire( # noqa: C901
|
|
|
101
101
|
|
|
102
102
|
start = _funcs.utc_now()
|
|
103
103
|
|
|
104
|
-
my_writer_id = humenc.encode(uuid4().bytes)
|
|
104
|
+
my_writer_id = humenc.encode(uuid4().bytes, num_bytes=2)
|
|
105
|
+
# we do not expect there to be many writers, so we make the humenc part of the writer
|
|
106
|
+
# id relatively short so it doesn't 'look' like other uses of humenc. Making the rest
|
|
107
|
+
# of the string identical to the base64 encoding (by choosing a multiple of 3) is not
|
|
108
|
+
# useful to us because we are only using this as a big UUID, not as a hash of an
|
|
109
|
+
# actual input.
|
|
105
110
|
|
|
106
111
|
lockfile_writer = LockfileWriter(
|
|
107
112
|
my_writer_id,
|
|
@@ -9,13 +9,15 @@ it when they get started, and from then on keep the `written_at` timestamp up to
|
|
|
9
9
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
+
import heapq
|
|
13
|
+
import os
|
|
14
|
+
import threading
|
|
12
15
|
import time
|
|
13
16
|
import typing as ty
|
|
17
|
+
from dataclasses import dataclass
|
|
14
18
|
from datetime import datetime, timedelta
|
|
15
|
-
from functools import partial
|
|
16
|
-
from threading import Thread
|
|
17
19
|
|
|
18
|
-
from thds.core import config, log
|
|
20
|
+
from thds.core import cache, config, log, scope
|
|
19
21
|
|
|
20
22
|
from ._funcs import make_lock_uri
|
|
21
23
|
from .read import get_writer_id, make_read_lockfile
|
|
@@ -23,32 +25,137 @@ from .types import LockAcquired
|
|
|
23
25
|
from .write import LockEmitter, LockfileWriter
|
|
24
26
|
|
|
25
27
|
MAINTAIN_LOCKS = config.item("thds.mops.pure.local.maintain_locks", default=True, parse=config.tobool)
|
|
28
|
+
_MAINTENANCE_MARGIN = 0.5 # multiplier for the expire time
|
|
29
|
+
assert _MAINTENANCE_MARGIN < 1, "Maintenance margin must be less than 1 or locks will expire!"
|
|
30
|
+
|
|
31
|
+
_MAX_LOCKS_PER_THREAD = 200 # I want to leave lots of margin so that locks don't expire.
|
|
26
32
|
|
|
27
33
|
logger = log.getLogger(__name__)
|
|
28
34
|
|
|
29
35
|
|
|
30
|
-
class
|
|
31
|
-
|
|
36
|
+
class _LockMaintenanceKit(ty.NamedTuple):
|
|
37
|
+
wakeup_time: float
|
|
38
|
+
lock_acquired: LockAcquired
|
|
39
|
+
should_exit: ty.Callable[[], bool]
|
|
32
40
|
|
|
33
|
-
maintain: ty.Callable[[], None]
|
|
34
|
-
expire_s: float
|
|
35
|
-
release: ty.Callable[[], None]
|
|
36
41
|
|
|
42
|
+
class _LockMaintenanceThreadState(ty.NamedTuple):
|
|
43
|
+
heap: list[_LockMaintenanceKit]
|
|
44
|
+
heap_lock: threading.Lock
|
|
45
|
+
lock_added_event: threading.Event
|
|
37
46
|
|
|
38
|
-
class _MaintainForever(ty.Protocol):
|
|
39
|
-
def __call__(self) -> None:
|
|
40
|
-
... # pragma: no cover
|
|
41
47
|
|
|
48
|
+
@scope.bound
|
|
49
|
+
def _maintenance_daemon(state: _LockMaintenanceThreadState, daemon_num: int) -> None:
|
|
50
|
+
"""Daemon thread that maintains a set of locks."""
|
|
51
|
+
scope.enter(log.logger_context(pid=os.getpid(), maint_daemon_num=daemon_num))
|
|
52
|
+
log_at_level = logger.warning if daemon_num > 0 else logger.debug
|
|
53
|
+
log_at_level("Starting lock maintenance daemon thread %s", daemon_num)
|
|
42
54
|
|
|
43
|
-
def _maintain_forever(
|
|
44
|
-
maintain: ty.Callable[[], ty.Any], expire_s: float, should_exit: ty.Callable[[], bool]
|
|
45
|
-
) -> None:
|
|
46
55
|
while True:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
56
|
+
with state.heap_lock:
|
|
57
|
+
if not state.heap:
|
|
58
|
+
next_wakeup_time = None
|
|
59
|
+
else:
|
|
60
|
+
next_wakeup_time = state.heap[0].wakeup_time
|
|
61
|
+
|
|
62
|
+
if next_wakeup_time is None:
|
|
63
|
+
logger.debug("No locks to maintain; waiting indefinitely for new ones")
|
|
64
|
+
state.lock_added_event.wait()
|
|
65
|
+
state.lock_added_event.clear()
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
# Wait until either: next maintenance time OR new lock added
|
|
69
|
+
sleep_duration = max(0, next_wakeup_time - time.monotonic())
|
|
70
|
+
woke_early = state.lock_added_event.wait(timeout=sleep_duration)
|
|
71
|
+
state.lock_added_event.clear()
|
|
72
|
+
|
|
73
|
+
if woke_early:
|
|
74
|
+
continue # go back to the beginning and check for the highest priority lock
|
|
75
|
+
|
|
76
|
+
# Time to do maintenance
|
|
77
|
+
while state.heap and state.heap[0].wakeup_time <= time.monotonic():
|
|
78
|
+
with state.heap_lock:
|
|
79
|
+
_, lock_obj, should_exit_fn = heapq.heappop(state.heap)
|
|
80
|
+
|
|
81
|
+
if not should_exit_fn():
|
|
82
|
+
try:
|
|
83
|
+
logger.debug("Maintaining lock %s", lock_obj.writer_id)
|
|
84
|
+
lock_obj.maintain()
|
|
85
|
+
# Re-schedule for next maintenance
|
|
86
|
+
with state.heap_lock:
|
|
87
|
+
next_maintenance = time.monotonic() + (lock_obj.expire_s * _MAINTENANCE_MARGIN)
|
|
88
|
+
heapq.heappush(
|
|
89
|
+
state.heap,
|
|
90
|
+
_LockMaintenanceKit(next_maintenance, lock_obj, should_exit_fn),
|
|
91
|
+
)
|
|
92
|
+
except Exception:
|
|
93
|
+
logger.exception(f"Failed to maintain lock: {lock_obj}")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class _ShouldExit:
|
|
98
|
+
lock_acquired: LockAcquired
|
|
99
|
+
should_exit: bool = False
|
|
100
|
+
|
|
101
|
+
def check_status(self) -> bool:
|
|
102
|
+
return self.should_exit
|
|
103
|
+
|
|
104
|
+
def stop_maintaining(self) -> None:
|
|
105
|
+
self.should_exit = True
|
|
106
|
+
self.lock_acquired.release()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
_LOCK_RELEASERS_BY_ID = dict[str, ty.Callable[[], None]]()
|
|
110
|
+
_LOCK_MAINTENANCE_DAEMON_STATES = dict[int, _LockMaintenanceThreadState]()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@cache.locking
|
|
114
|
+
def _ensure_daemon(thread_num: int) -> None:
|
|
115
|
+
"""Start the maintenance daemon exactly once."""
|
|
116
|
+
lock_state = _LockMaintenanceThreadState(
|
|
117
|
+
heap=[],
|
|
118
|
+
heap_lock=threading.Lock(),
|
|
119
|
+
lock_added_event=threading.Event(),
|
|
120
|
+
)
|
|
121
|
+
assert thread_num not in _LOCK_MAINTENANCE_DAEMON_STATES # protected by the cache.locking decorator
|
|
122
|
+
_LOCK_MAINTENANCE_DAEMON_STATES[thread_num] = lock_state
|
|
123
|
+
threading.Thread(target=_maintenance_daemon, args=(lock_state, thread_num), daemon=True).start()
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def add_lock_to_maintenance_daemon(lock_acq: LockAcquired) -> ty.Callable[[], None]:
|
|
127
|
+
"""Add lock to global maintenance system and return a cleanup function."""
|
|
128
|
+
if lock_acq.writer_id in _LOCK_RELEASERS_BY_ID:
|
|
129
|
+
# technically we could be locking this, but mops itself does not allow
|
|
130
|
+
# multiple callers to ask for the same lock to be maintained at the same time;
|
|
131
|
+
# it will always be either the runner or the future that the runner has created.
|
|
132
|
+
return _LOCK_RELEASERS_BY_ID[lock_acq.writer_id]
|
|
133
|
+
|
|
134
|
+
should_exit = _ShouldExit(lock_acq)
|
|
135
|
+
|
|
136
|
+
for i in range(len(_LOCK_MAINTENANCE_DAEMON_STATES) + 1):
|
|
137
|
+
maintenance_daemon_state = _LOCK_MAINTENANCE_DAEMON_STATES.get(i)
|
|
138
|
+
if maintenance_daemon_state is None:
|
|
139
|
+
_ensure_daemon(i)
|
|
140
|
+
maintenance_daemon_state = _LOCK_MAINTENANCE_DAEMON_STATES[i]
|
|
141
|
+
elif len(maintenance_daemon_state.heap) > _MAX_LOCKS_PER_THREAD:
|
|
142
|
+
continue # go to next thread if this one is too full
|
|
143
|
+
|
|
144
|
+
with maintenance_daemon_state.heap_lock:
|
|
145
|
+
next_time = time.monotonic() + (lock_acq.expire_s * _MAINTENANCE_MARGIN)
|
|
146
|
+
heapq.heappush(
|
|
147
|
+
maintenance_daemon_state.heap,
|
|
148
|
+
_LockMaintenanceKit(next_time, lock_acq, should_exit.check_status),
|
|
149
|
+
)
|
|
150
|
+
maintenance_daemon_state.lock_added_event.set()
|
|
151
|
+
break # we found a thread that can take the lock
|
|
152
|
+
|
|
153
|
+
_LOCK_RELEASERS_BY_ID[lock_acq.writer_id] = should_exit.stop_maintaining
|
|
154
|
+
return should_exit.stop_maintaining
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# from this point down, the code is about how to prepare to call add_lock_to_maintenance_daemon
|
|
158
|
+
# from the remote side, and what happens if the lock cannot or should not be maintained.
|
|
52
159
|
|
|
53
160
|
|
|
54
161
|
class CannotMaintainLock(ValueError):
|
|
@@ -59,9 +166,9 @@ class LockWasStolenError(ValueError):
|
|
|
59
166
|
pass # pragma: no cover
|
|
60
167
|
|
|
61
168
|
|
|
62
|
-
def
|
|
169
|
+
def make_remote_lock_writer(lock_dir_uri: str, expected_writer_id: str = "") -> LockAcquired:
|
|
63
170
|
"""Only for use by remote side - does not _acquire_ the lock,
|
|
64
|
-
but merely
|
|
171
|
+
but merely allows for it to be maintained as unexpired. Does not allow for releasing,
|
|
65
172
|
as it is not the responsibility of the remote side to release the lock.
|
|
66
173
|
|
|
67
174
|
Will raise a CannotMaintainLock exception if the lock does not exist or has no
|
|
@@ -72,7 +179,15 @@ def remote_lock_maintain(lock_dir_uri: str, expected_writer_id: str = "") -> Loc
|
|
|
72
179
|
match the lock's actual current writer_id - in other words, if some other writer has
|
|
73
180
|
acquired the lock before the remote side has been able to start running.
|
|
74
181
|
|
|
75
|
-
|
|
182
|
+
Notably, this is a race condition! The remote side depends on actual lock holders to
|
|
183
|
+
cooperate in having only a single lock holder; the remote is simply checking a single
|
|
184
|
+
time and then maintaining the lock indefinitely if the writer_id matches.
|
|
185
|
+
|
|
186
|
+
TODO: If the lock is already expired but the writer_id still matches, perhaps we
|
|
187
|
+
could be acquiring the lock to eliminate the race, and if we fail, we would
|
|
188
|
+
exit with LockWasStolenError...
|
|
189
|
+
|
|
190
|
+
The return value is intended to be passed to add_lock_to_maintenance_daemon.
|
|
76
191
|
"""
|
|
77
192
|
|
|
78
193
|
try:
|
|
@@ -115,52 +230,13 @@ def remote_lock_maintain(lock_dir_uri: str, expected_writer_id: str = "") -> Loc
|
|
|
115
230
|
return lockfile_writer
|
|
116
231
|
|
|
117
232
|
|
|
118
|
-
def
|
|
119
|
-
"""Run lock maintenance until the process exits, or until the returned callable gets
|
|
120
|
-
returned.
|
|
121
|
-
|
|
122
|
-
Return a 'release wrapper' that stops maintenance of the lock and releases it.
|
|
123
|
-
|
|
124
|
-
A whole thread for this seems expensive, but the simplest alternative is having too
|
|
125
|
-
many lock maintainers trying to share time slices within some global lock maintainer,
|
|
126
|
-
and that runs a definite risk of overrunning the expiry time(s) for those locks.
|
|
127
|
-
|
|
128
|
-
If we were async all the way down, we could more plausibly make a bunch of async
|
|
129
|
-
network/filesystem calls here without taking into consideration how long they actually
|
|
130
|
-
take to execute.
|
|
131
|
-
"""
|
|
132
|
-
should_exit = False
|
|
133
|
-
|
|
134
|
-
def should_stop_maintaining() -> bool:
|
|
135
|
-
return should_exit
|
|
136
|
-
|
|
137
|
-
Thread(
|
|
138
|
-
target=partial(
|
|
139
|
-
_maintain_forever,
|
|
140
|
-
lock_acq.maintain,
|
|
141
|
-
lock_acq.expire_s,
|
|
142
|
-
should_stop_maintaining,
|
|
143
|
-
),
|
|
144
|
-
daemon=True,
|
|
145
|
-
).start()
|
|
146
|
-
|
|
147
|
-
def stop_maintaining() -> None:
|
|
148
|
-
nonlocal should_exit
|
|
149
|
-
should_exit = True
|
|
150
|
-
lock_acq.release()
|
|
151
|
-
|
|
152
|
-
return stop_maintaining
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def maintain_to_release(
|
|
156
|
-
acquired_lock: LockAcquired,
|
|
157
|
-
) -> ty.Callable[[], None]:
|
|
233
|
+
def maintain_to_release(acquired_lock: LockAcquired) -> ty.Callable[[], None]:
|
|
158
234
|
"""Depending on configuration, potentially start maintaining the lock.
|
|
159
235
|
|
|
160
236
|
Return a callable that will release the lock when called.
|
|
161
237
|
"""
|
|
162
238
|
if MAINTAIN_LOCKS():
|
|
163
|
-
return
|
|
239
|
+
return add_lock_to_maintenance_daemon(acquired_lock)
|
|
164
240
|
|
|
165
241
|
return acquired_lock.release
|
|
166
242
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import typing as ty
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from datetime import datetime, timezone
|
|
@@ -52,9 +53,30 @@ class _ResultExcWithMetadataChannel:
|
|
|
52
53
|
)
|
|
53
54
|
|
|
54
55
|
def return_value(self, r: T) -> None:
|
|
56
|
+
result_uri = self.fs.join(self.call_id, results.RESULT)
|
|
57
|
+
if self.fs.exists(result_uri):
|
|
58
|
+
logger.warning("Not overwriting existing result at %s prior to serialization", result_uri)
|
|
59
|
+
self._write_metadata_only("lost-race")
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
# when we pickle the return value, we also end up potentially uploading
|
|
63
|
+
# various Sources and Paths and other special-cased things inside the result.
|
|
55
64
|
return_value_bytes = _pickle.gimme_bytes(self.dumper, r)
|
|
65
|
+
if self.fs.exists(result_uri):
|
|
66
|
+
logger.warning("Not overwriting existing result at %s after serialization", result_uri)
|
|
67
|
+
self._write_metadata_only("lost-race-after-serialization")
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
# BUG: there remains a race condition between fs.exists and putbytes.
|
|
71
|
+
# multiple callers could get a False from fs.exists and then proceed to write.
|
|
72
|
+
# the biggest issue here is for functions that are not truly pure, because
|
|
73
|
+
# they will be writing different results, and theoretically different callers
|
|
74
|
+
# could end up seeing the different results.
|
|
75
|
+
#
|
|
76
|
+
# In the future, if a Blob Store provided a put_unless_exists method, we could use
|
|
77
|
+
# that to avoid the race condition.
|
|
56
78
|
self.fs.putbytes(
|
|
57
|
-
|
|
79
|
+
result_uri,
|
|
58
80
|
self._metadata_header + return_value_bytes,
|
|
59
81
|
type_hint="application/mops-return-value",
|
|
60
82
|
)
|
|
@@ -79,6 +101,25 @@ def _unpickle_invocation(memo_uri: str) -> ty.Tuple[ty.Callable, Args, Kwargs]:
|
|
|
79
101
|
return invocation.func, args, kwargs
|
|
80
102
|
|
|
81
103
|
|
|
104
|
+
@contextlib.contextmanager
|
|
105
|
+
def _manage_lock(lock_uri: str, lock_writer_id: str) -> ty.Iterator[ty.Optional[Exception]]:
|
|
106
|
+
stop_lock: ty.Callable = lambda: None # noqa: E731
|
|
107
|
+
try:
|
|
108
|
+
stop_lock = lock.add_lock_to_maintenance_daemon(
|
|
109
|
+
lock.make_remote_lock_writer(lock_uri, expected_writer_id=lock_writer_id)
|
|
110
|
+
)
|
|
111
|
+
yield None # pause for execution
|
|
112
|
+
except lock.CannotMaintainLock as e:
|
|
113
|
+
logger.info(f"Cannot maintain lock: {e}. Continuing without the lock.")
|
|
114
|
+
yield None # pause for execution
|
|
115
|
+
except lock.LockWasStolenError as stolen_lock_error:
|
|
116
|
+
logger.error(f"Lock was stolen: {stolen_lock_error}. Will exit without running the function.")
|
|
117
|
+
yield stolen_lock_error # pause to upload failure
|
|
118
|
+
|
|
119
|
+
stop_lock() # not critical since we don't _own_ the lock, but keeps things cleaner
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@scope.bound
|
|
82
123
|
def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
|
|
83
124
|
"""The arguments are those supplied by MemoizingPicklingRunner.
|
|
84
125
|
|
|
@@ -92,16 +133,6 @@ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
|
|
|
92
133
|
|
|
93
134
|
# any recursively-called functions that use metadata will retain the original invoker.
|
|
94
135
|
|
|
95
|
-
try:
|
|
96
|
-
stop_lock = lock.launch_daemon_lock_maintainer(
|
|
97
|
-
lock.remote_lock_maintain(
|
|
98
|
-
fs.join(memo_uri, "lock"), expected_writer_id=invocation_metadata.invoker_uuid
|
|
99
|
-
)
|
|
100
|
-
)
|
|
101
|
-
except lock.CannotMaintainLock as e:
|
|
102
|
-
logger.info(f"Cannot maintain lock: {e}. Continuing without the lock.")
|
|
103
|
-
stop_lock = lambda: None # noqa: E731
|
|
104
|
-
|
|
105
136
|
def _extract_invocation_unique_key(memo_uri: str) -> ty.Tuple[str, str]:
|
|
106
137
|
parts = fs.split(memo_uri)
|
|
107
138
|
try:
|
|
@@ -113,6 +144,7 @@ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
|
|
|
113
144
|
invocation_parts = parts[runner_idx + 1 :]
|
|
114
145
|
return fs.join(*invocation_parts[:-1]), invocation_parts[-1]
|
|
115
146
|
|
|
147
|
+
lock_error = scope.enter(_manage_lock(fs.join(memo_uri, "lock"), invocation_metadata.invoker_uuid))
|
|
116
148
|
scope.enter(uris.ACTIVE_STORAGE_ROOT.set(uris.get_root(memo_uri)))
|
|
117
149
|
|
|
118
150
|
try:
|
|
@@ -124,6 +156,14 @@ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
|
|
|
124
156
|
def do_work_return_result() -> object:
|
|
125
157
|
# ONLY failures in this code should transmit an EXCEPTION
|
|
126
158
|
# back to the orchestrator side.
|
|
159
|
+
|
|
160
|
+
# if the lock was stolen, we will write an exception
|
|
161
|
+
# so that the orchestrator knows that it failed.
|
|
162
|
+
# in theory, it could resume waiting for a result, though
|
|
163
|
+
# currently it does not do this.
|
|
164
|
+
if lock_error:
|
|
165
|
+
raise lock_error
|
|
166
|
+
|
|
127
167
|
with unwrap_use_runner(func):
|
|
128
168
|
return func(*args, **kwargs)
|
|
129
169
|
|
|
@@ -143,4 +183,3 @@ def run_pickled_invocation(memo_uri: str, *metadata_args: str) -> None:
|
|
|
143
183
|
invocation_metadata.pipeline_id,
|
|
144
184
|
_extract_invocation_unique_key(memo_uri),
|
|
145
185
|
)
|
|
146
|
-
stop_lock() # not critical since we don't _own_ the lock, but keeps things cleaner
|
thds/mops/pure/runner/local.py
CHANGED
|
@@ -35,9 +35,11 @@ _BEFORE_INVOCATION_SEMAPHORE = threading.BoundedSemaphore(int(max_concurrent_net
|
|
|
35
35
|
_DarkBlue = colorized(fg="white", bg="#00008b")
|
|
36
36
|
_GreenYellow = colorized(fg="black", bg="#adff2f")
|
|
37
37
|
_Purple = colorized(fg="white", bg="#800080")
|
|
38
|
+
_Pink = colorized(fg="black", bg="#ff1493")
|
|
38
39
|
logger = log.getLogger(__name__)
|
|
39
40
|
_LogKnownResult = make_colorized_out(_DarkBlue, out=logger.info, fmt_str=" {} ")
|
|
40
41
|
_LogNewInvocation = make_colorized_out(_GreenYellow, out=logger.info, fmt_str=" {} ")
|
|
42
|
+
_LogInvocationAfterSteal = make_colorized_out(_Pink, out=logger.info, fmt_str=" {} ")
|
|
41
43
|
_LogAwaitedResult = make_colorized_out(_Purple, out=logger.info, fmt_str=" {} ")
|
|
42
44
|
|
|
43
45
|
|
|
@@ -141,6 +143,8 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
141
143
|
run_summary.extract_source_uris((args, kwargs)),
|
|
142
144
|
)
|
|
143
145
|
|
|
146
|
+
log_invocation = _LogNewInvocation # this is what we use unless we steal the lock.
|
|
147
|
+
|
|
144
148
|
# the network ops being grouped by _BEFORE_INVOCATION include one or more
|
|
145
149
|
# download attempts (consider possible Paths) plus
|
|
146
150
|
# one or more uploads (embedded Paths & Sources/refs, and then invocation).
|
|
@@ -181,17 +185,23 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
|
|
|
181
185
|
return futures.resolved(p_unwrap_value_or_error(memo_uri, result))
|
|
182
186
|
|
|
183
187
|
lock_owned = acquire_lock() # still inside the semaphore, as it's a network op
|
|
188
|
+
if lock_owned:
|
|
189
|
+
log_invocation = _LogInvocationAfterSteal
|
|
190
|
+
logger.info(f"Stole expired lock for {memo_uri} - invoking ourselves.")
|
|
184
191
|
|
|
185
192
|
assert lock_owned is not None
|
|
186
193
|
# if/when we acquire the lock, we move forever into 'run this ourselves mode'.
|
|
187
194
|
# If something about our invocation fails,
|
|
188
195
|
# we fail just as we would have previously, without any attempt to go
|
|
189
196
|
# 'back' to waiting for someone else to compute the result.
|
|
197
|
+
lock.maintain_to_release(lock_owned)
|
|
198
|
+
# we don't actually need the release_lock here, because it will get
|
|
199
|
+
# 'recreated' in the PostShimResultGetter below, which is also where it gets called
|
|
190
200
|
|
|
191
201
|
future_result_getter = PostShimResultGetter[T](memo_uri, p_unwrap_value_or_error)
|
|
192
202
|
|
|
193
203
|
with _BEFORE_INVOCATION_SEMAPHORE:
|
|
194
|
-
|
|
204
|
+
log_invocation(f"Invoking {memo_uri}")
|
|
195
205
|
upload_invocation_and_deps()
|
|
196
206
|
|
|
197
207
|
# can't hold the semaphore while we block on the shim, though.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thds.mops
|
|
3
|
-
Version: 3.9.
|
|
3
|
+
Version: 3.9.20250805191550
|
|
4
4
|
Summary: ML Ops tools for Trilliant Health
|
|
5
5
|
Author-email: Trilliant Health <info@trillianthealth.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
|
|
@@ -66,11 +66,11 @@ thds/mops/pure/core/entry/__init__.py,sha256=kiDcsj16CwjRSexOZW-4h4b4tDCYIS_eLS5
|
|
|
66
66
|
thds/mops/pure/core/entry/main.py,sha256=b1F5lFDK_hnpvW3bqzt5MWDcpKvCXZpWdEHI8zroC4k,2061
|
|
67
67
|
thds/mops/pure/core/entry/route_result.py,sha256=2LcS9M2mYtu56kso0YcMEZbR1mbTWZm0hFlbE2yaf4k,2741
|
|
68
68
|
thds/mops/pure/core/entry/runner_registry.py,sha256=aPDCML7gM_zP6NfPnqx0_Q1oRHzgdaCa_XzYc5VIw7U,601
|
|
69
|
-
thds/mops/pure/core/lock/__init__.py,sha256=
|
|
70
|
-
thds/mops/pure/core/lock/_acquire.py,sha256=
|
|
69
|
+
thds/mops/pure/core/lock/__init__.py,sha256=4x9NdborLPGktDNs8LDapW17LeuAHLCbO9v-8IWdT2I,268
|
|
70
|
+
thds/mops/pure/core/lock/_acquire.py,sha256=lVxHzDA30VB95Cfb4Fl2m0eatdLXCDv6rOCnERiyMNw,9468
|
|
71
71
|
thds/mops/pure/core/lock/_funcs.py,sha256=j4g8yVWnrAMPDKqLlq8nTnccM1KHSJ3g71L1iWNbV2Q,969
|
|
72
72
|
thds/mops/pure/core/lock/cli.py,sha256=uidtmgHB2y5LDkj7SQTncy_cNe1EfIseuiJPV9kcxBU,2488
|
|
73
|
-
thds/mops/pure/core/lock/maintain.py,sha256=
|
|
73
|
+
thds/mops/pure/core/lock/maintain.py,sha256=UFhXdr9Q6FdPgq3PtELiAKdlRVl1sqF8IzAf3Oun4g4,9718
|
|
74
74
|
thds/mops/pure/core/lock/read.py,sha256=Ct5eYMlkTlEaV5Yhw6HWsDD7VrgdhDZoI6AVIQ0ts-4,1255
|
|
75
75
|
thds/mops/pure/core/lock/types.py,sha256=f32t_e2svMOXUVzcnLkEizw6Q47g3HPQsyAkGT2OKMs,993
|
|
76
76
|
thds/mops/pure/core/lock/write.py,sha256=yuF2zRAzgYOmnet1GXZHwYT7oT1znVB3SPK1_j7orFA,5556
|
|
@@ -89,11 +89,11 @@ thds/mops/pure/pickling/_pickle.py,sha256=YB8xbqDiwdk8ccnVZ2_4kQn98V2JSrFqw2E3J-
|
|
|
89
89
|
thds/mops/pure/pickling/memoize_only.py,sha256=oI5CMy6IEJc46Gb_BGWNUuAe3fysS7HxRSTajN0WssI,837
|
|
90
90
|
thds/mops/pure/pickling/mprunner.py,sha256=vabdHIVteddkU5ncOq73wWC7-naChW_3_vvAQArvjqU,8814
|
|
91
91
|
thds/mops/pure/pickling/pickles.py,sha256=CSlnjLssE0Ad8YzqyaKqWCSNyW5LiMFKiXO6hWAZmvU,5097
|
|
92
|
-
thds/mops/pure/pickling/remote.py,sha256=
|
|
92
|
+
thds/mops/pure/pickling/remote.py,sha256=eax4y0FgoH6id-ceptYbs9_bSLA9TIBhw2EEHqBkvHE,7971
|
|
93
93
|
thds/mops/pure/pickling/sha256_b64.py,sha256=HL0cPixHPZYuZDVDBscxsnI-3a2amWEfw-LseOX-PyY,2916
|
|
94
94
|
thds/mops/pure/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
95
|
thds/mops/pure/runner/get_results.py,sha256=1K6qf_Vg2YfUPfUuu103WyYsfS3e_ju6W7Z_PV01-pU,4053
|
|
96
|
-
thds/mops/pure/runner/local.py,sha256=
|
|
96
|
+
thds/mops/pure/runner/local.py,sha256=Qw1g6GasYyWWoZ7ACug8Sq0B__-FoqcaFO7HfFq0rys,11024
|
|
97
97
|
thds/mops/pure/runner/shim_builder.py,sha256=obs2-NipAB8w0NR8o90UQX_bmHYS69c-raL2JPw8yM4,821
|
|
98
98
|
thds/mops/pure/runner/simple_shims.py,sha256=r-kLmpSCwzjfzF-Ku43YKvrHMLpZR5jDmweo4Vk07O4,1069
|
|
99
99
|
thds/mops/pure/runner/strings.py,sha256=PYAYMxZ2ehgahKIBXJilENNE6OrdNkueNBel8LPsoh8,26
|
|
@@ -109,8 +109,8 @@ thds/mops/pure/tools/summarize/cli.py,sha256=7kDtn24ok8oBO3jFjlMmOK3jnZYpMoE_5Y8
|
|
|
109
109
|
thds/mops/pure/tools/summarize/run_summary.py,sha256=w45qiQr7elrHDiK9Hgs85gtU3gwLuXa447ih1Y23BBY,5776
|
|
110
110
|
thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
111
|
thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
|
|
112
|
-
thds_mops-3.9.
|
|
113
|
-
thds_mops-3.9.
|
|
114
|
-
thds_mops-3.9.
|
|
115
|
-
thds_mops-3.9.
|
|
116
|
-
thds_mops-3.9.
|
|
112
|
+
thds_mops-3.9.20250805191550.dist-info/METADATA,sha256=fNmRhdaq6BzlVyZzEZIHeu6nCIgYBgHuGjUZbcSZR9I,2225
|
|
113
|
+
thds_mops-3.9.20250805191550.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
114
|
+
thds_mops-3.9.20250805191550.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
|
|
115
|
+
thds_mops-3.9.20250805191550.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
116
|
+
thds_mops-3.9.20250805191550.dist-info/RECORD,,
|
|
File without changes
|
{thds_mops-3.9.20250730184538.dist-info → thds_mops-3.9.20250805191550.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_mops-3.9.20250730184538.dist-info → thds_mops-3.9.20250805191550.dist-info}/top_level.txt
RENAMED
|
File without changes
|