thds.mops 3.9.20250813225451__py3-none-any.whl → 3.9.20250814171126__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.mops might be problematic. Click here for more details.
- thds/mops/pure/core/deferred_work.py +5 -8
- thds/mops/pure/core/entry/route_result.py +1 -9
- thds/mops/pure/core/source.py +17 -9
- thds/mops/pure/pickling/remote.py +9 -1
- {thds_mops-3.9.20250813225451.dist-info → thds_mops-3.9.20250814171126.dist-info}/METADATA +1 -1
- {thds_mops-3.9.20250813225451.dist-info → thds_mops-3.9.20250814171126.dist-info}/RECORD +9 -9
- {thds_mops-3.9.20250813225451.dist-info → thds_mops-3.9.20250814171126.dist-info}/WHEEL +0 -0
- {thds_mops-3.9.20250813225451.dist-info → thds_mops-3.9.20250814171126.dist-info}/entry_points.txt +0 -0
- {thds_mops-3.9.20250813225451.dist-info → thds_mops-3.9.20250814171126.dist-info}/top_level.txt +0 -0
|
@@ -23,6 +23,10 @@ _DEFERRED_WORK_THREADPOOL = refcount.Resource[concurrent.futures.ThreadPoolExecu
|
|
|
23
23
|
logger = core.log.getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
class NoDeferredWorkContext(Exception):
|
|
27
|
+
"""Raised when work is pushed with no open context"""
|
|
28
|
+
|
|
29
|
+
|
|
26
30
|
@contextmanager
|
|
27
31
|
def open_context() -> ty.Iterator[None]:
|
|
28
32
|
"""Enter this context before you begin serializing your invocation. When perform_all()
|
|
@@ -44,12 +48,6 @@ def open_context() -> ty.Iterator[None]:
|
|
|
44
48
|
)
|
|
45
49
|
|
|
46
50
|
|
|
47
|
-
@contextmanager
|
|
48
|
-
def push_non_context() -> ty.Iterator[None]:
|
|
49
|
-
with _DEFERRED_INVOCATION_WORK.set(None):
|
|
50
|
-
yield
|
|
51
|
-
|
|
52
|
-
|
|
53
51
|
def add(work_owner: str, work_id: ty.Hashable, work: ty.Callable[[], ty.Any]) -> None:
|
|
54
52
|
"""Add some work to an open context. The work will be performed when perform_all() is
|
|
55
53
|
called. If there is no open context, perform the work immediately.
|
|
@@ -62,8 +60,7 @@ def add(work_owner: str, work_id: ty.Hashable, work: ty.Callable[[], ty.Any]) ->
|
|
|
62
60
|
"""
|
|
63
61
|
deferred_work = _DEFERRED_INVOCATION_WORK()
|
|
64
62
|
if deferred_work is None:
|
|
65
|
-
|
|
66
|
-
work()
|
|
63
|
+
raise NoDeferredWorkContext("Deferred work can only be added when there is an open context.")
|
|
67
64
|
else:
|
|
68
65
|
logger.debug("Adding work %s to deferred work %s", (work_owner, work_id), id(deferred_work))
|
|
69
66
|
deferred_work[(work_owner, work_id)] = work
|
|
@@ -38,15 +38,7 @@ def route_return_value_or_exception(
|
|
|
38
38
|
pipeline_function_and_arguments_unique_key: ty.Optional[ty.Tuple[str, str]] = None,
|
|
39
39
|
) -> None:
|
|
40
40
|
"""The remote side of your runner implementation doesn't have to use this, but it's a reasonable approach."""
|
|
41
|
-
_routing_scope.enter(deferred_work.
|
|
42
|
-
# deferred work can be requested during result serialization, but because we don't want
|
|
43
|
-
# to leave a 'broken' result payload (one that refers to unperformed deferred work,
|
|
44
|
-
# maybe because of network or other failure), we simply don't open a deferred work
|
|
45
|
-
# context on the remote side, which forces all the work to be performed as it is
|
|
46
|
-
# added for deferral instead of actually being deferred.
|
|
47
|
-
#
|
|
48
|
-
# pushing this non-context is only necessary in the case of a thread-local
|
|
49
|
-
# 'remote' invocation - in all true remote invocations, there will be no context open.
|
|
41
|
+
_routing_scope.enter(deferred_work.open_context())
|
|
50
42
|
|
|
51
43
|
_routing_scope.enter(log.logger_context(remote=pipeline_id))
|
|
52
44
|
if pipeline_function_and_arguments_unique_key:
|
thds/mops/pure/core/source.py
CHANGED
|
@@ -178,10 +178,6 @@ def prepare_source_argument(source_: Source) -> ty.Union[str, hashing.Hash]:
|
|
|
178
178
|
return hashing.Hash(algo=sys.intern(source_.hash.algo), bytes=source_.hash.bytes)
|
|
179
179
|
|
|
180
180
|
|
|
181
|
-
def perform_source_uploads() -> None: # has been replaced by a general work-deferring mechanism.
|
|
182
|
-
deferred_work.perform_all()
|
|
183
|
-
|
|
184
|
-
|
|
185
181
|
# RETURNING FROM REMOTE
|
|
186
182
|
#
|
|
187
183
|
# when returning a Source from a remote, we cannot avoid the upload. this is because the
|
|
@@ -222,6 +218,11 @@ class DuplicateSourceBasenameError(ValueError):
|
|
|
222
218
|
"""
|
|
223
219
|
|
|
224
220
|
|
|
221
|
+
def _put_file_to_blob_store(local_path: Path, remote_uri: str) -> None:
|
|
222
|
+
logger.info("Uploading Source to remote URI %s", remote_uri)
|
|
223
|
+
lookup_blob_store(remote_uri).putfile(local_path, remote_uri)
|
|
224
|
+
|
|
225
|
+
|
|
225
226
|
def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = tuple()) -> SourceResult:
|
|
226
227
|
"""Call from within the remote side of an invocation, while serializing the function return value.
|
|
227
228
|
|
|
@@ -237,8 +238,12 @@ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = t
|
|
|
237
238
|
# it exists locally - an upload may be necessary.
|
|
238
239
|
file_uri = to_uri(source_.cached_path)
|
|
239
240
|
if source_.uri not in existing_uris:
|
|
240
|
-
|
|
241
|
-
|
|
241
|
+
logger.info("Using existing remote URI on Source %s", source_.uri)
|
|
242
|
+
deferred_work.add(
|
|
243
|
+
__name__ + "-chosen-source-result",
|
|
244
|
+
source_.uri,
|
|
245
|
+
partial(_put_file_to_blob_store, source_.cached_path, source_.uri),
|
|
246
|
+
)
|
|
242
247
|
else:
|
|
243
248
|
file_uri = ""
|
|
244
249
|
logger.debug("Creating a SourceResult for a URI that is presumed to already be uploaded.")
|
|
@@ -249,7 +254,7 @@ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = t
|
|
|
249
254
|
# future caller on a different machine could try to use this memoized result.
|
|
250
255
|
local_path = source.path_from_uri(source_.uri)
|
|
251
256
|
assert local_path.exists(), f"{local_path} does not exist"
|
|
252
|
-
logger.
|
|
257
|
+
logger.info("Automatically selecting a remote URI for a Source being returned.")
|
|
253
258
|
remote_uri = invocation_output_uri(name=local_path.name)
|
|
254
259
|
# the line above is a bit of opinionated magic. it uses the 'end' of the filename
|
|
255
260
|
# to automagically assign a meaningful name to the output remote URI.
|
|
@@ -266,9 +271,12 @@ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = t
|
|
|
266
271
|
" with unique basenames, in order to allow retention of the basename for usability and debugging."
|
|
267
272
|
)
|
|
268
273
|
|
|
269
|
-
|
|
274
|
+
deferred_work.add(
|
|
275
|
+
__name__ + "-derived-source-result",
|
|
276
|
+
remote_uri,
|
|
277
|
+
partial(_put_file_to_blob_store, local_path, remote_uri),
|
|
278
|
+
)
|
|
270
279
|
# upload must _always_ happen on remotely-returned Sources, as detailed above.
|
|
271
|
-
# There is no advantage to waiting to upload past this point.
|
|
272
280
|
return SourceResult(remote_uri, source_.hash, source_.uri)
|
|
273
281
|
|
|
274
282
|
|
|
@@ -7,7 +7,7 @@ from functools import cached_property
|
|
|
7
7
|
from thds.core import log, scope
|
|
8
8
|
|
|
9
9
|
from ..._utils.once import Once
|
|
10
|
-
from ..core import lock, metadata, pipeline_id, uris
|
|
10
|
+
from ..core import deferred_work, lock, metadata, pipeline_id, uris
|
|
11
11
|
from ..core.entry import route_return_value_or_exception
|
|
12
12
|
from ..core.memo import results
|
|
13
13
|
from ..core.serialize_big_objs import ByIdRegistry, ByIdSerializer
|
|
@@ -67,6 +67,14 @@ class _ResultExcWithMetadataChannel:
|
|
|
67
67
|
self._write_metadata_only("lost-race-after-serialization")
|
|
68
68
|
return
|
|
69
69
|
|
|
70
|
+
# It's important that all deferred work is performed before the return
|
|
71
|
+
# value is written to the blob store so that result consumers don't read
|
|
72
|
+
# inconsistent data. For example, one type of deferred work is uploading
|
|
73
|
+
# result sources. If the invocation result is written with the source
|
|
74
|
+
# uri before the source is uploaded, then result consumers might try to
|
|
75
|
+
# download a non-existent file in the meantime.
|
|
76
|
+
deferred_work.perform_all()
|
|
77
|
+
|
|
70
78
|
# BUG: there remains a race condition between fs.exists and putbytes.
|
|
71
79
|
# multiple callers could get a False from fs.exists and then proceed to write.
|
|
72
80
|
# the biggest issue here is for functions that are not truly pure, because
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thds.mops
|
|
3
|
-
Version: 3.9.
|
|
3
|
+
Version: 3.9.20250814171126
|
|
4
4
|
Summary: ML Ops tools for Trilliant Health
|
|
5
5
|
Author-email: Trilliant Health <info@trillianthealth.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
|
|
@@ -48,7 +48,7 @@ thds/mops/pure/adls/blob_store.py,sha256=ZWr7CKKcI-jz1sWZq4Jwq6LYkhFNxp-EFnNh83E
|
|
|
48
48
|
thds/mops/pure/adls/output_fqn.py,sha256=qnwdubjVwKShzZ5RruD0_85x86DtPwZNSgwADrdhrTs,748
|
|
49
49
|
thds/mops/pure/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
50
|
thds/mops/pure/core/content_addressed.py,sha256=RaCPvtM7bf0NnY5lNR5jPcNn2Moh-bmLtC4zOvdWjCU,1202
|
|
51
|
-
thds/mops/pure/core/deferred_work.py,sha256=
|
|
51
|
+
thds/mops/pure/core/deferred_work.py,sha256=lCgjFERyVKhfcy8R7MNA8NsEZAkZgmCQ71c8Bo7Nq8Q,3741
|
|
52
52
|
thds/mops/pure/core/file_blob_store.py,sha256=N4m4LLrBZaqTJFR4D_eYl03a-n6yQBRsv0ID1bOS9TA,4298
|
|
53
53
|
thds/mops/pure/core/metadata.py,sha256=xAL2iz0pXrcKapmYnNrqSZ8nH2GVakA167NSpAfwiCI,8276
|
|
54
54
|
thds/mops/pure/core/output_naming.py,sha256=ntufOVNJiVPiUM-Azl9mFpDFhIxiB-V2je9dv9AUQhg,2283
|
|
@@ -58,13 +58,13 @@ thds/mops/pure/core/pipeline_id_mask.py,sha256=Ll2yyQM5nSgzihx8i7fCrrSNlUUnIsbAO
|
|
|
58
58
|
thds/mops/pure/core/script_support.py,sha256=3j9Z1O5ynSSPmWSghtJgAj-Lt4GwYcA8cWcpUIRM7q0,952
|
|
59
59
|
thds/mops/pure/core/serialize_big_objs.py,sha256=YcOS1ccs82ZWO7nTbeumErMzYVe4hgXCTsfvMggYmd8,2332
|
|
60
60
|
thds/mops/pure/core/serialize_paths.py,sha256=bWI-AKNP_Tf29JGO7DKqshOh7b7gu51lfGryDXo3aMI,5787
|
|
61
|
-
thds/mops/pure/core/source.py,sha256=
|
|
61
|
+
thds/mops/pure/core/source.py,sha256=R36ajrCU1JdWF-8iD8YqAiP-q39ypZqf2DeBsqC9lYo,15105
|
|
62
62
|
thds/mops/pure/core/types.py,sha256=w2g83miGhnjaWr2_4TW2Fc3BdIgoIHFbIr_wX1HC7A0,5452
|
|
63
63
|
thds/mops/pure/core/uris.py,sha256=qO9_f-ro7kax6haNOPTPe81-_aUSRFELeeZH4PMTTU4,2694
|
|
64
64
|
thds/mops/pure/core/use_runner.py,sha256=m1Mu1XDr3xRf_u_VSiHfTG4TH6fnSg0IqwmtbLKG_oc,2103
|
|
65
65
|
thds/mops/pure/core/entry/__init__.py,sha256=kiDcsj16CwjRSexOZW-4h4b4tDCYIS_eLS5wgu2yIlk,151
|
|
66
66
|
thds/mops/pure/core/entry/main.py,sha256=b1F5lFDK_hnpvW3bqzt5MWDcpKvCXZpWdEHI8zroC4k,2061
|
|
67
|
-
thds/mops/pure/core/entry/route_result.py,sha256=
|
|
67
|
+
thds/mops/pure/core/entry/route_result.py,sha256=sifgDWUn0LRUScJur4KfjVJjkinbjFJ5KQdhq3wJwdU,2148
|
|
68
68
|
thds/mops/pure/core/entry/runner_registry.py,sha256=aPDCML7gM_zP6NfPnqx0_Q1oRHzgdaCa_XzYc5VIw7U,601
|
|
69
69
|
thds/mops/pure/core/lock/__init__.py,sha256=4x9NdborLPGktDNs8LDapW17LeuAHLCbO9v-8IWdT2I,268
|
|
70
70
|
thds/mops/pure/core/lock/_acquire.py,sha256=lVxHzDA30VB95Cfb4Fl2m0eatdLXCDv6rOCnERiyMNw,9468
|
|
@@ -89,7 +89,7 @@ thds/mops/pure/pickling/_pickle.py,sha256=YB8xbqDiwdk8ccnVZ2_4kQn98V2JSrFqw2E3J-
|
|
|
89
89
|
thds/mops/pure/pickling/memoize_only.py,sha256=oI5CMy6IEJc46Gb_BGWNUuAe3fysS7HxRSTajN0WssI,837
|
|
90
90
|
thds/mops/pure/pickling/mprunner.py,sha256=vabdHIVteddkU5ncOq73wWC7-naChW_3_vvAQArvjqU,8814
|
|
91
91
|
thds/mops/pure/pickling/pickles.py,sha256=CSlnjLssE0Ad8YzqyaKqWCSNyW5LiMFKiXO6hWAZmvU,5097
|
|
92
|
-
thds/mops/pure/pickling/remote.py,sha256=
|
|
92
|
+
thds/mops/pure/pickling/remote.py,sha256=7JXZRGnLI5y5dqElIDrhIlaRv6Q_zQ_78aqNhO7O4KY,8478
|
|
93
93
|
thds/mops/pure/pickling/sha256_b64.py,sha256=HL0cPixHPZYuZDVDBscxsnI-3a2amWEfw-LseOX-PyY,2916
|
|
94
94
|
thds/mops/pure/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
95
|
thds/mops/pure/runner/get_results.py,sha256=1K6qf_Vg2YfUPfUuu103WyYsfS3e_ju6W7Z_PV01-pU,4053
|
|
@@ -109,8 +109,8 @@ thds/mops/pure/tools/summarize/cli.py,sha256=7kDtn24ok8oBO3jFjlMmOK3jnZYpMoE_5Y8
|
|
|
109
109
|
thds/mops/pure/tools/summarize/run_summary.py,sha256=w45qiQr7elrHDiK9Hgs85gtU3gwLuXa447ih1Y23BBY,5776
|
|
110
110
|
thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
111
|
thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
|
|
112
|
-
thds_mops-3.9.
|
|
113
|
-
thds_mops-3.9.
|
|
114
|
-
thds_mops-3.9.
|
|
115
|
-
thds_mops-3.9.
|
|
116
|
-
thds_mops-3.9.
|
|
112
|
+
thds_mops-3.9.20250814171126.dist-info/METADATA,sha256=_uAlVicbPSNmKbIiwRID23fmuWdWulmrFhXcdzZdpBI,2225
|
|
113
|
+
thds_mops-3.9.20250814171126.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
114
|
+
thds_mops-3.9.20250814171126.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
|
|
115
|
+
thds_mops-3.9.20250814171126.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
116
|
+
thds_mops-3.9.20250814171126.dist-info/RECORD,,
|
|
File without changes
|
{thds_mops-3.9.20250813225451.dist-info → thds_mops-3.9.20250814171126.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_mops-3.9.20250813225451.dist-info → thds_mops-3.9.20250814171126.dist-info}/top_level.txt
RENAMED
|
File without changes
|