thds.mops 3.8.20250709195412__py3-none-any.whl → 3.8.20250711201237__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of thds.mops might be problematic. Click here for more details.

@@ -215,7 +215,14 @@ class SourceResult(ty.NamedTuple):
215
215
  file_uri: str
216
216
 
217
217
 
218
- def prepare_source_result(source_: Source) -> SourceResult:
218
+ class DuplicateSourceBasenameError(ValueError):
219
+ """This is not a catchable error - it will be raised inside the mops result-wrapping
220
+ code, and is an indication that user code has attempted to return two file-only Source objects
221
+ without URIs specified, and that those two files have the same basename.
222
+ """
223
+
224
+
225
+ def prepare_source_result(source_: Source, existing_uris: ty.Collection[str] = tuple()) -> SourceResult:
219
226
  """Call from within the remote side of an invocation, while serializing the function return value.
220
227
 
221
228
  Forces the Source to be present at a remote URI which will be available once
@@ -229,8 +236,9 @@ def prepare_source_result(source_: Source) -> SourceResult:
229
236
  if source_.cached_path and Path(source_.cached_path).exists():
230
237
  # it exists locally - an upload may be necessary.
231
238
  file_uri = to_uri(source_.cached_path)
232
- lookup_blob_store(source_.uri).putfile(source_.cached_path, source_.uri)
233
- logger.info("Uploading Source to %s", source_.uri)
239
+ if source_.uri not in existing_uris:
240
+ lookup_blob_store(source_.uri).putfile(source_.cached_path, source_.uri)
241
+ logger.info("Uploading Source to chosen URI %s", source_.uri)
234
242
  else:
235
243
  file_uri = ""
236
244
  logger.debug("Creating a SourceResult for a URI that is presumed to already be uploaded.")
@@ -249,6 +257,15 @@ def prepare_source_result(source_: Source) -> SourceResult:
249
257
  # If users do not like this automatically assigned remote URI name, they can construct
250
258
  # the Source themselves and provide a remote URI (as well as, optionally, a
251
259
  # local_path), and we will use their remote URI.
260
+ if remote_uri in existing_uris:
261
+ raise DuplicateSourceBasenameError(
262
+ f"Duplicate blob store URI {remote_uri} found in SourceResultPickler."
263
+ " This is usually an indication that you have two files with the same name in two different directories,"
264
+ " and are trying to convert them into Source objects with automatically-assigned URIs."
265
+ " Per the documentation, all output Source objects without explicitly assigned remote URIs must be provided"
266
+ " with unique basenames, in order to allow retention of the basename for usability and debugging."
267
+ )
268
+
252
269
  lookup_blob_store(remote_uri).putfile(local_path, remote_uri)
253
270
  # upload must _always_ happen on remotely-returned Sources, as detailed above.
254
271
  # There is no advantage to waiting to upload past this point.
@@ -2,7 +2,6 @@
2
2
  data and also functions."""
3
3
 
4
4
  import io
5
- import os
6
5
  import pickle
7
6
  import typing as ty
8
7
  from functools import partial
@@ -161,34 +160,20 @@ class SourceArgumentPickler:
161
160
  return None
162
161
 
163
162
 
164
- class DuplicateSourceBasenameError(ValueError):
165
- pass
166
-
167
-
168
163
  class SourceResultPickler:
169
164
  """Only for use on the remote side, when serializing the result."""
170
165
 
171
166
  def __init__(self) -> None:
172
167
  """There will be one of these per remote function call."""
173
168
  self._basenames_seen: set[str] = set()
169
+ # 'basename' is no longer a good name for what is being collected here,
170
+ # but we are not changing it to preserve backwards compatibility with existing results.
171
+ # We use this instead to collect URIs that _may_ have been uploaded by `mops`.
174
172
 
175
173
  def __call__(self, maybe_source: ty.Any) -> ty.Optional[_DeserSource]:
176
174
  if isinstance(maybe_source, source.Source):
177
- src_res = prepare_source_result(maybe_source)
178
- if src_res.file_uri:
179
- # we need to check to make sure that this file_uri is not a duplicate
180
- # - if it is, this indicates that this single function is attempting to return
181
- # two Source objects that have not yet been uploaded but will be uploaded to the same name.
182
- file_basename = os.path.basename(src_res.file_uri)
183
- if file_basename in self._basenames_seen:
184
- raise DuplicateSourceBasenameError(
185
- f"Duplicate basename {os.path.basename(src_res.file_uri)} found in SourceResultPickler."
186
- " This is usually an indication that you have two files with the same name in two different directories,"
187
- " and are trying to convert them into Source objects with automatically-assigned URIs."
188
- " Per the documentation, all output Source objects without explicitly assigned remote URIs must be provided"
189
- " with unique basenames, in order to allow retention of the basename for usability and debugging."
190
- )
191
- self._basenames_seen.add(file_basename)
175
+ src_res = prepare_source_result(maybe_source, self._basenames_seen)
176
+ self._basenames_seen.add(src_res.remote_uri)
192
177
  return ty.cast(_DeserSource, UnpickleSourceResult(*src_res))
193
178
 
194
179
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.mops
3
- Version: 3.8.20250709195412
3
+ Version: 3.8.20250711201237
4
4
  Summary: ML Ops tools for Trilliant Health
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
@@ -54,7 +54,7 @@ thds/mops/pure/core/pipeline_id_mask.py,sha256=Ll2yyQM5nSgzihx8i7fCrrSNlUUnIsbAO
54
54
  thds/mops/pure/core/script_support.py,sha256=3j9Z1O5ynSSPmWSghtJgAj-Lt4GwYcA8cWcpUIRM7q0,952
55
55
  thds/mops/pure/core/serialize_big_objs.py,sha256=YcOS1ccs82ZWO7nTbeumErMzYVe4hgXCTsfvMggYmd8,2332
56
56
  thds/mops/pure/core/serialize_paths.py,sha256=bWI-AKNP_Tf29JGO7DKqshOh7b7gu51lfGryDXo3aMI,5787
57
- thds/mops/pure/core/source.py,sha256=7E1e4pdwuuwc1DZP0V7vfU1PSZU8j7PDxyjG3hR9ND8,13622
57
+ thds/mops/pure/core/source.py,sha256=b0i58gE13e25lIV6ls1yPKH67SQ7aCuZmKDEHNr9Ux4,14682
58
58
  thds/mops/pure/core/types.py,sha256=w2g83miGhnjaWr2_4TW2Fc3BdIgoIHFbIr_wX1HC7A0,5452
59
59
  thds/mops/pure/core/uris.py,sha256=qO9_f-ro7kax6haNOPTPe81-_aUSRFELeeZH4PMTTU4,2694
60
60
  thds/mops/pure/core/use_runner.py,sha256=_YeKEjj6_9uc5UIjxcm-YKLUj4joApOdaTJCMaCLC2c,1547
@@ -81,7 +81,7 @@ thds/mops/pure/joblib/__init__.py,sha256=-3hSs-GsNzE_eNnwrdZBHAR_eaub5Uyl5GPYqBw
81
81
  thds/mops/pure/joblib/backend.py,sha256=F__6lrdc1-VcX4n4Pw7Lz1bBgeefShtRy2DQh6Fp-eI,2671
82
82
  thds/mops/pure/joblib/batching.py,sha256=tPOATD28-YW7KcWa3IqKm-fhLaILzM792ApvU-_zfnM,2298
83
83
  thds/mops/pure/pickling/__init__.py,sha256=WNdG8PdJCk-kYaXkvvPa--hjYGoUlBXG3w2X86yuhGo,156
84
- thds/mops/pure/pickling/_pickle.py,sha256=kZc70_yvEAmIsTjOJuwW8ZpExTL7icAWgGskZF5GSfI,9000
84
+ thds/mops/pure/pickling/_pickle.py,sha256=YB8xbqDiwdk8ccnVZ2_4kQn98V2JSrFqw2E3J-jEHlA,8081
85
85
  thds/mops/pure/pickling/memoize_only.py,sha256=oI5CMy6IEJc46Gb_BGWNUuAe3fysS7HxRSTajN0WssI,837
86
86
  thds/mops/pure/pickling/mprunner.py,sha256=dVbwQA8hzEL7UiwYXmzoGwN3_jbEtGoHDPMkRmo_UtA,8378
87
87
  thds/mops/pure/pickling/pickles.py,sha256=nCg7L7CqReNWDF8FAdEmCcuXVC_kLT5zuyW3V8Vvvs4,4704
@@ -104,8 +104,8 @@ thds/mops/pure/tools/summarize/cli.py,sha256=7kDtn24ok8oBO3jFjlMmOK3jnZYpMoE_5Y8
104
104
  thds/mops/pure/tools/summarize/run_summary.py,sha256=LUtvbankAYbss2NCF_XbNl05jkNgxYz_SLyERJlp4sk,5773
105
105
  thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
106
  thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
107
- thds_mops-3.8.20250709195412.dist-info/METADATA,sha256=r3ojo-BZukA6nKQLAProV9qEn7oog-Cir9DjTH8B1fg,2225
108
- thds_mops-3.8.20250709195412.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
109
- thds_mops-3.8.20250709195412.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
110
- thds_mops-3.8.20250709195412.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
111
- thds_mops-3.8.20250709195412.dist-info/RECORD,,
107
+ thds_mops-3.8.20250711201237.dist-info/METADATA,sha256=ngCNk84w-g_ste8xFLBDAGvjdJlIc3w13XompxZy1sQ,2225
108
+ thds_mops-3.8.20250711201237.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
109
+ thds_mops-3.8.20250711201237.dist-info/entry_points.txt,sha256=qKvCAaB80syXfxVR3xx6x9J0YJdaQWkIbVSw-NwFgMw,322
110
+ thds_mops-3.8.20250711201237.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
111
+ thds_mops-3.8.20250711201237.dist-info/RECORD,,