thds.mops 3.8.20250709231504__py3-none-any.whl → 3.8.20250711201237__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.mops might be problematic. Click here for more details.
- thds/mops/pure/core/source.py +20 -3
- thds/mops/pure/pickling/_pickle.py +5 -20
- {thds_mops-3.8.20250709231504.dist-info → thds_mops-3.8.20250711201237.dist-info}/METADATA +1 -1
- {thds_mops-3.8.20250709231504.dist-info → thds_mops-3.8.20250711201237.dist-info}/RECORD +7 -7
- {thds_mops-3.8.20250709231504.dist-info → thds_mops-3.8.20250711201237.dist-info}/WHEEL +0 -0
- {thds_mops-3.8.20250709231504.dist-info → thds_mops-3.8.20250711201237.dist-info}/entry_points.txt +0 -0
- {thds_mops-3.8.20250709231504.dist-info → thds_mops-3.8.20250711201237.dist-info}/top_level.txt +0 -0
thds/mops/pure/core/source.py
CHANGED
|
@@ -215,7 +215,14 @@ class SourceResult(ty.NamedTuple):
|
|
|
215
215
|
file_uri: str
|
|
216
216
|
|
|
217
217
|
|
|
218
|
-
|
|
218
|
+
class DuplicateSourceBasenameError(ValueError):
|
|
219
|
+
"""This is not a catchable error - it will be raised inside the mops result-wrapping
|
|
220
|
+
code, and is an indication that user code has attempted to return two file-only Source objects
|
|
221
|
+
without URIs specified, and that those two files have the same basename.
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = tuple()) -> SourceResult:
|
|
219
226
|
"""Call from within the remote side of an invocation, while serializing the function return value.
|
|
220
227
|
|
|
221
228
|
Forces the Source to be present at a remote URI which will be available once
|
|
@@ -229,8 +236,9 @@ def prepare_source_result(source_: Source) -> SourceResult:
|
|
|
229
236
|
if source_.cached_path and Path(source_.cached_path).exists():
|
|
230
237
|
# it exists locally - an upload may be necessary.
|
|
231
238
|
file_uri = to_uri(source_.cached_path)
|
|
232
|
-
|
|
233
|
-
|
|
239
|
+
if source_.uri not in existing_uris:
|
|
240
|
+
lookup_blob_store(source_.uri).putfile(source_.cached_path, source_.uri)
|
|
241
|
+
logger.info("Uploading Source to chosen URI %s", source_.uri)
|
|
234
242
|
else:
|
|
235
243
|
file_uri = ""
|
|
236
244
|
logger.debug("Creating a SourceResult for a URI that is presumed to already be uploaded.")
|
|
@@ -249,6 +257,15 @@ def prepare_source_result(source_: Source) -> SourceResult:
|
|
|
249
257
|
# If users do not like this automatically assigned remote URI name, they can construct
|
|
250
258
|
# the Source themselves and provide a remote URI (as well as, optionally, a
|
|
251
259
|
# local_path), and we will use their remote URI.
|
|
260
|
+
if remote_uri in existing_uris:
|
|
261
|
+
raise DuplicateSourceBasenameError(
|
|
262
|
+
f"Duplicate blob store URI {remote_uri} found in SourceResultPickler."
|
|
263
|
+
" This is usually an indication that you have two files with the same name in two different directories,"
|
|
264
|
+
" and are trying to convert them into Source objects with automatically-assigned URIs."
|
|
265
|
+
" Per the documentation, all output Source objects without explicitly assigned remote URIs must be provided"
|
|
266
|
+
" with unique basenames, in order to allow retention of the basename for usability and debugging."
|
|
267
|
+
)
|
|
268
|
+
|
|
252
269
|
lookup_blob_store(remote_uri).putfile(local_path, remote_uri)
|
|
253
270
|
# upload must _always_ happen on remotely-returned Sources, as detailed above.
|
|
254
271
|
# There is no advantage to waiting to upload past this point.
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
data and also functions."""
|
|
3
3
|
|
|
4
4
|
import io
|
|
5
|
-
import os
|
|
6
5
|
import pickle
|
|
7
6
|
import typing as ty
|
|
8
7
|
from functools import partial
|
|
@@ -161,34 +160,20 @@ class SourceArgumentPickler:
|
|
|
161
160
|
return None
|
|
162
161
|
|
|
163
162
|
|
|
164
|
-
class DuplicateSourceBasenameError(ValueError):
|
|
165
|
-
pass
|
|
166
|
-
|
|
167
|
-
|
|
168
163
|
class SourceResultPickler:
|
|
169
164
|
"""Only for use on the remote side, when serializing the result."""
|
|
170
165
|
|
|
171
166
|
def __init__(self) -> None:
|
|
172
167
|
"""There will be one of these per remote function call."""
|
|
173
168
|
self._basenames_seen: set[str] = set()
|
|
169
|
+
# 'basename' is no longer a good name for what is being collected here,
|
|
170
|
+
# but we are not changing it to preserve backwards compatibility with existing results.
|
|
171
|
+
# We use this instead to collect URIs that _may_ have been uploaded by `mops`.
|
|
174
172
|
|
|
175
173
|
def __call__(self, maybe_source: ty.Any) -> ty.Optional[_DeserSource]:
|
|
176
174
|
if isinstance(maybe_source, source.Source):
|
|
177
|
-
src_res = prepare_source_result(maybe_source)
|
|
178
|
-
|
|
179
|
-
# we need to check to make sure that this file_uri is not a duplicate
|
|
180
|
-
# - if it is, this indicates that this single function is attempting to return
|
|
181
|
-
# two Source objects that have not yet been uploaded but will be uploaded to the same name.
|
|
182
|
-
file_basename = os.path.basename(src_res.file_uri)
|
|
183
|
-
if file_basename in self._basenames_seen:
|
|
184
|
-
raise DuplicateSourceBasenameError(
|
|
185
|
-
f"Duplicate basename {os.path.basename(src_res.file_uri)} found in SourceResultPickler."
|
|
186
|
-
" This is usually an indication that you have two files with the same name in two different directories,"
|
|
187
|
-
" and are trying to convert them into Source objects with automatically-assigned URIs."
|
|
188
|
-
" Per the documentation, all output Source objects without explicitly assigned remote URIs must be provided"
|
|
189
|
-
" with unique basenames, in order to allow retention of the basename for usability and debugging."
|
|
190
|
-
)
|
|
191
|
-
self._basenames_seen.add(file_basename)
|
|
175
|
+
src_res = prepare_source_result(maybe_source, self._basenames_seen)
|
|
176
|
+
self._basenames_seen.add(src_res.remote_uri)
|
|
192
177
|
return ty.cast(_DeserSource, UnpickleSourceResult(*src_res))
|
|
193
178
|
|
|
194
179
|
return None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thds.mops
|
|
3
|
-
Version: 3.8.
|
|
3
|
+
Version: 3.8.20250711201237
|
|
4
4
|
Summary: ML Ops tools for Trilliant Health
|
|
5
5
|
Author-email: Trilliant Health <info@trillianthealth.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
|
|
@@ -54,7 +54,7 @@ thds/mops/pure/core/pipeline_id_mask.py,sha256=Ll2yyQM5nSgzihx8i7fCrrSNlUUnIsbAO
|
|
|
54
54
|
thds/mops/pure/core/script_support.py,sha256=3j9Z1O5ynSSPmWSghtJgAj-Lt4GwYcA8cWcpUIRM7q0,952
|
|
55
55
|
thds/mops/pure/core/serialize_big_objs.py,sha256=YcOS1ccs82ZWO7nTbeumErMzYVe4hgXCTsfvMggYmd8,2332
|
|
56
56
|
thds/mops/pure/core/serialize_paths.py,sha256=bWI-AKNP_Tf29JGO7DKqshOh7b7gu51lfGryDXo3aMI,5787
|
|
57
|
-
thds/mops/pure/core/source.py,sha256=
|
|
57
|
+
thds/mops/pure/core/source.py,sha256=b0i58gE13e25lIV6ls1yPKH67SQ7aCuZmKDEHNr9Ux4,14682
|
|
58
58
|
thds/mops/pure/core/types.py,sha256=w2g83miGhnjaWr2_4TW2Fc3BdIgoIHFbIr_wX1HC7A0,5452
|
|
59
59
|
thds/mops/pure/core/uris.py,sha256=qO9_f-ro7kax6haNOPTPe81-_aUSRFELeeZH4PMTTU4,2694
|
|
60
60
|
thds/mops/pure/core/use_runner.py,sha256=_YeKEjj6_9uc5UIjxcm-YKLUj4joApOdaTJCMaCLC2c,1547
|
|
@@ -81,7 +81,7 @@ thds/mops/pure/joblib/__init__.py,sha256=-3hSs-GsNzE_eNnwrdZBHAR_eaub5Uyl5GPYqBw
|
|
|
81
81
|
thds/mops/pure/joblib/backend.py,sha256=F__6lrdc1-VcX4n4Pw7Lz1bBgeefShtRy2DQh6Fp-eI,2671
|
|
82
82
|
thds/mops/pure/joblib/batching.py,sha256=tPOATD28-YW7KcWa3IqKm-fhLaILzM792ApvU-_zfnM,2298
|
|
83
83
|
thds/mops/pure/pickling/__init__.py,sha256=WNdG8PdJCk-kYaXkvvPa--hjYGoUlBXG3w2X86yuhGo,156
|
|
84
|
-
thds/mops/pure/pickling/_pickle.py,sha256=
|
|
84
|
+
thds/mops/pure/pickling/_pickle.py,sha256=YB8xbqDiwdk8ccnVZ2_4kQn98V2JSrFqw2E3J-jEHlA,8081
|
|
85
85
|
thds/mops/pure/pickling/memoize_only.py,sha256=oI5CMy6IEJc46Gb_BGWNUuAe3fysS7HxRSTajN0WssI,837
|
|
86
86
|
thds/mops/pure/pickling/mprunner.py,sha256=dVbwQA8hzEL7UiwYXmzoGwN3_jbEtGoHDPMkRmo_UtA,8378
|
|
87
87
|
thds/mops/pure/pickling/pickles.py,sha256=nCg7L7CqReNWDF8FAdEmCcuXVC_kLT5zuyW3V8Vvvs4,4704
|
|
@@ -104,8 +104,8 @@ thds/mops/pure/tools/summarize/cli.py,sha256=7kDtn24ok8oBO3jFjlMmOK3jnZYpMoE_5Y8
|
|
|
104
104
|
thds/mops/pure/tools/summarize/run_summary.py,sha256=LUtvbankAYbss2NCF_XbNl05jkNgxYz_SLyERJlp4sk,5773
|
|
105
105
|
thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
106
|
thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
|
|
107
|
-
thds_mops-3.8.
|
|
108
|
-
thds_mops-3.8.
|
|
109
|
-
thds_mops-3.8.
|
|
110
|
-
thds_mops-3.8.
|
|
111
|
-
thds_mops-3.8.
|
|
107
|
+
thds_mops-3.8.20250711201237.dist-info/METADATA,sha256=ngCNk84w-g_ste8xFLBDAGvjdJlIc3w13XompxZy1sQ,2225
|
|
108
|
+
thds_mops-3.8.20250711201237.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
109
|
+
thds_mops-3.8.20250711201237.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
|
|
110
|
+
thds_mops-3.8.20250711201237.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
111
|
+
thds_mops-3.8.20250711201237.dist-info/RECORD,,
|
|
File without changes
|
{thds_mops-3.8.20250709231504.dist-info → thds_mops-3.8.20250711201237.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{thds_mops-3.8.20250709231504.dist-info → thds_mops-3.8.20250711201237.dist-info}/top_level.txt
RENAMED
|
File without changes
|