thds.mops 3.8.20250425173841__py3-none-any.whl → 3.8.20250425182948__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,8 +48,9 @@ class _MagicApi:
48
48
  *,
49
49
  blob_root: uris.UriResolvable = "",
50
50
  pipeline_id: str = "",
51
+ calls: ty.Collection[ty.Callable] = tuple(),
51
52
  ) -> ty.Callable[[ty.Callable[P, R]], sauce.Magic[P, R]]:
52
- return sauce.make_magic(_get_config(), shim_or_builder, blob_root, pipeline_id)
53
+ return sauce.make_magic(_get_config(), shim_or_builder, blob_root, pipeline_id, calls)
53
54
 
54
55
  @staticmethod
55
56
  def deco(
@@ -68,6 +68,7 @@ class Magic(ty.Generic[P, R]):
68
68
  self,
69
69
  func: ty.Callable[P, R],
70
70
  config: _MagicConfig,
71
+ calls: ty.Collection[ty.Callable] = frozenset(),
71
72
  ):
72
73
  functools.update_wrapper(self, func)
73
74
  self._func_config_path = full_name_and_callable(func)[0].replace("--", ".")
@@ -80,6 +81,7 @@ class Magic(ty.Generic[P, R]):
80
81
  str(func) + "_SHIM", None # none means nothing has been set stack-local
81
82
  )
82
83
  self.runner = MemoizingPicklingRunner(self._shimbuilder, self._get_blob_root)
84
+ self.runner.calls(func, *calls)
83
85
  self._func = use_runner(self.runner, self._is_off)(func)
84
86
  self.__doc__ = f"{func.__doc__}\n\nMagic class info:\n{self.__class__.__doc__}"
85
87
  self.__wrapped__ = func
@@ -138,6 +140,7 @@ def make_magic(
138
140
  shim_or_builder: ty.Union[ShimName, ShimOrBuilder, None],
139
141
  blob_root: uris.UriResolvable,
140
142
  pipeline_id: str,
143
+ calls: ty.Collection[ty.Callable],
141
144
  ) -> ty.Callable[[ty.Callable[P, R]], Magic[P, R]]:
142
145
  def deco(func: ty.Callable[P, R]) -> Magic[P, R]:
143
146
  fully_qualified_name = full_name_and_callable(func)[0].replace("--", ".")
@@ -147,6 +150,6 @@ def make_magic(
147
150
  config.blob_root[fully_qualified_name] = uris.to_lazy_uri(blob_root)
148
151
  if pipeline_id: # could be empty string
149
152
  config.pipeline_id[fully_qualified_name] = pipeline_id
150
- return Magic(func, config)
153
+ return Magic(func, config, calls)
151
154
 
152
155
  return deco
@@ -1,3 +1,4 @@
1
+ from . import calls, unique_name_for_function # noqa: F401
1
2
  from .function_memospace import ( # noqa
2
3
  args_kwargs_content_address,
3
4
  make_function_memospace,
@@ -0,0 +1,91 @@
1
+ """This module currently exists only to serve the use case of tracking function logic keys recursively
2
+ for mops-wrapped functions that call other mops-wrapped functions...
3
+
4
+ which is _often but not always_ an anti-pattern...
5
+ """
6
+
7
+ import typing as ty
8
+
9
+ from .unique_name_for_function import (
10
+ extract_function_logic_key_from_docstr,
11
+ make_unique_name_including_docstring_key,
12
+ )
13
+
14
+
15
+ def resolve(
16
+ calls_registry: ty.Mapping[ty.Callable, ty.Collection[ty.Callable]],
17
+ origin_callable: ty.Callable,
18
+ ) -> ty.List[ty.Callable]:
19
+ """Using the 'edges' defined in the mapping, return a set of all callables recursively
20
+ reachable from the origin callable, not including the origin callable itself.
21
+ """
22
+ visited = list()
23
+ stack = [origin_callable]
24
+
25
+ while stack:
26
+ current_function = stack.pop()
27
+ if current_function in visited:
28
+ continue
29
+
30
+ if current_function is not origin_callable:
31
+ visited.append(current_function)
32
+
33
+ stack.extend(calls_registry.get(current_function, []))
34
+
35
+ return visited
36
+
37
+
38
+ # the below code is operating under the assumption that putting all function full names
39
+ # (module and name) plus their function logic keys inside the memo uri explicitly is
40
+ # better than hashing them all together, because this gives users the standard amount of
41
+ # 'debuggability' that they've come to expect when it comes to things like function names
42
+ # and function-logic-keys.
43
+ #
44
+ # it _will_ lead to longer memo uris, which is unfortunate in some ways.
45
+ #
46
+ # an alternative would be to store these inside the Invocation and also change the _hash_
47
+ # that we're already computing from being a hash of _only_ the args, kwargs to a hash
48
+ # of those plus these.
49
+ #
50
+ # but there's great utility in being able to see that the hash itself comes only
51
+ # from the args, kwargs - and it is not expected that the set of functions 'called'
52
+ # would change during runtime - it should be static like everything else.
53
+ #
54
+ # A third alternative would be to use a second hash (calls-<the hash>) and then embed the
55
+ # actual names in some other place - either the invocation itself, or possibly as
56
+ # metadata. But I also think there's utility in being able to see those function names as
57
+ # part of the overall memo uri, rather than having to go look at metadata to see it. It
58
+ # also makes it much more obvious that we're doing nesting - which is frowned upon for
59
+ # most use cases, and good to make visible even if the use case is valuable.
60
+
61
+
62
+ CALLS_PREFIX = "calls-"
63
+
64
+
65
+ def combine_function_logic_keys(functions: ty.Iterable[ty.Callable]) -> tuple[str, ...]:
66
+ funcs_and_logic_keys = list()
67
+ for func in functions:
68
+ flk = extract_function_logic_key_from_docstr(func)
69
+ if flk:
70
+ # if the function doesn't have a function logic key, then it can't really 'invalidate'
71
+ # anything, so we can ignore it.
72
+ funcs_and_logic_keys.append(CALLS_PREFIX + make_unique_name_including_docstring_key(func))
73
+ return tuple(sorted(funcs_and_logic_keys))
74
+
75
+
76
+ class CallsPieces(ty.NamedTuple):
77
+ remaining_prefix: str
78
+ full_function_name: str
79
+ calls_functions: list[str]
80
+
81
+
82
+ def split_off_calls_strings(memo_str_not_including_args_kwargs: str, separator: str) -> CallsPieces:
83
+ calls_functions = list()
84
+ rest = memo_str_not_including_args_kwargs
85
+ while True:
86
+ rest, full_function_name = rest.rsplit(separator, 1)
87
+ if full_function_name.startswith(CALLS_PREFIX):
88
+ calls_functions.append(full_function_name[len(CALLS_PREFIX) :])
89
+ else:
90
+ break
91
+ return CallsPieces(rest, full_function_name, calls_functions)
@@ -147,6 +147,7 @@ from ..pipeline_id_mask import (
147
147
  pipeline_id_mask,
148
148
  )
149
149
  from ..uris import lookup_blob_store
150
+ from . import calls
150
151
  from .unique_name_for_function import make_unique_name_including_docstring_key, parse_unique_name
151
152
 
152
153
 
@@ -227,6 +228,7 @@ class MemoUriComponents(ty.NamedTuple):
227
228
  function_module: str
228
229
  function_name: str
229
230
  function_logic_key: str
231
+ calls_functions: ty.List[str]
230
232
  args_hash: str
231
233
 
232
234
 
@@ -247,8 +249,12 @@ def parse_memo_uri(
247
249
 
248
250
  runner_prefix = runner_prefix.rstrip(separator)
249
251
  rest, args_hash = memo_uri.rsplit(separator, 1) # args hash is last component
250
- rest, full_function_name = rest.rsplit(separator, 1)
251
- pipeline_id = rest[len(runner_prefix) :]
252
+
253
+ remaining_prefix, full_function_name, calls_functions = calls.split_off_calls_strings(
254
+ rest, separator
255
+ )
256
+
257
+ pipeline_id = remaining_prefix[len(runner_prefix) :]
252
258
  pipeline_id = pipeline_id.strip(separator)
253
259
 
254
260
  function_parts = parse_unique_name(full_function_name)
@@ -259,6 +265,7 @@ def parse_memo_uri(
259
265
  function_parts.module,
260
266
  function_parts.name,
261
267
  function_parts.function_logic_key,
268
+ calls_functions,
262
269
  args_hash,
263
270
  )
264
271
 
@@ -88,7 +88,7 @@ def read_partial_pickle(full_bytes: bytes) -> ty.Tuple[bytes, ty.Any]:
88
88
  # non-pickle metadata and embedding it at the beginning of the file.
89
89
  first_pickle_pos = full_bytes.find(b"\x80")
90
90
  if first_pickle_pos == -1:
91
- raise ValueError("Unable to find a pickle in the bytes")
91
+ raise ValueError(f"Unable to find a pickle in bytes of length {len(full_bytes)}")
92
92
  return (
93
93
  full_bytes[:first_pickle_pos],
94
94
  CallableUnpickler(io.BytesIO(full_bytes[first_pickle_pos:])).load(),
@@ -102,7 +102,10 @@ def make_read_header_and_object(
102
102
  type_hint: str, xf_header: ty.Optional[ty.Callable[[bytes], H]] = None
103
103
  ) -> ty.Callable[[str], ty.Tuple[H, ty.Any]]:
104
104
  def read_object(uri: str) -> ty.Tuple[H, ty.Any]:
105
- header, unpickled = read_partial_pickle(get_bytes(uri, type_hint=type_hint))
105
+ uri_bytes = get_bytes(uri, type_hint=type_hint)
106
+ if not uri_bytes:
107
+ raise ValueError(f"{uri} exists but is empty - something is very wrong.")
108
+ header, unpickled = read_partial_pickle(uri_bytes)
106
109
  return (xf_header or (lambda h: h))(header), unpickled # type: ignore
107
110
 
108
111
  return read_object
@@ -6,8 +6,11 @@ See runner.local.py for the core runner implementation.
6
6
  """
7
7
 
8
8
  import typing as ty
9
+ from collections import defaultdict
9
10
  from functools import partial
10
11
 
12
+ from typing_extensions import Self
13
+
11
14
  from thds.core import cache, log
12
15
  from thds.core.stack_context import StackContext
13
16
 
@@ -94,6 +97,23 @@ class MemoizingPicklingRunner:
94
97
 
95
98
  self._run_directory = run_summary.create_mops_run_directory()
96
99
 
100
+ self._calls_registry: dict[ty.Callable, list[ty.Callable]] = defaultdict(list)
101
+
102
+ def calls(self, caller: ty.Callable, *callees: ty.Callable) -> Self:
103
+ """Register that the first Callable calls the provided Callables(s).
104
+
105
+ This is (currently) used to ensure that function-logic-keys on the callees affect
106
+ the memoization of the caller. Callees that do not have a function-logic-key will
107
+ be ignored for this purpose; however there are no known reasons why your
108
+ underlying Callable should not have a function-logic-key, unless it has never been
109
+ modified since its creation.
110
+
111
+ The interface is more general and could in theory be used for other purposes in
112
+ the future.
113
+ """
114
+ self._calls_registry[caller].extend(callees)
115
+ return self # returns self mainly to faciliate use with use_runner.
116
+
97
117
  def shared(self, *objs: ty.Any, **named_objs: ty.Any) -> None:
98
118
  """Set up memoizing pickle serialization for these objects.
99
119
 
@@ -162,6 +182,7 @@ class MemoizingPicklingRunner:
162
182
  self._wrap_shim_builder,
163
183
  _pickle.read_metadata_and_object,
164
184
  self._run_directory,
185
+ self._calls_registry,
165
186
  )(
166
187
  self._rerun_exceptions,
167
188
  memo.make_function_memospace(
@@ -43,6 +43,7 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
43
43
  shim_builder: types.ShimBuilder,
44
44
  get_meta_and_result: types.GetMetaAndResult,
45
45
  run_directory: ty.Optional[Path] = None,
46
+ calls_registry: ty.Mapping[ty.Callable, ty.Collection[ty.Callable]] = dict(), # noqa: B006
46
47
  ) -> ty.Callable[[bool, str, ty.Callable[..., T], Args, Kwargs], T]:
47
48
  @scope.bound
48
49
  def create_invocation__check_result__wait_shim(
@@ -80,7 +81,12 @@ def invoke_via_shim_or_return_memoized( # noqa: C901
80
81
  scope.enter(deferred_work.open_context()) # optimize Source objects during serialization
81
82
 
82
83
  args_kwargs_bytes = serialize_args_kwargs(storage_root, func, args, kwargs)
83
- memo_uri = fs.join(function_memospace, memo.args_kwargs_content_address(args_kwargs_bytes))
84
+ memo_uri = fs.join(
85
+ function_memospace,
86
+ *memo.calls.combine_function_logic_keys(memo.calls.resolve(calls_registry, func)),
87
+ # ^ these will embedded as extra nesting.
88
+ memo.args_kwargs_content_address(args_kwargs_bytes),
89
+ )
84
90
 
85
91
  # Define some important and reusable 'chunks of work'
86
92
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.mops
3
- Version: 3.8.20250425173841
3
+ Version: 3.8.20250425182948
4
4
  Summary: ML Ops tools for Trilliant Health
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
@@ -36,8 +36,8 @@ thds/mops/k8s/tools/krsync.py,sha256=us7pXX0-bRMwD2oAno7Z6BJcPs6FgaUabHW0STyQJYg
36
36
  thds/mops/k8s/tools/krsync.sh,sha256=lskw4COt51Bv1yy2IAYUc8u8uQV-coSyUiOT8rADKkQ,546
37
37
  thds/mops/pure/__init__.py,sha256=kbG0lMvXRBS3LGbb2gPPE9-qjYMXrypyb2tJX2__aZc,1533
38
38
  thds/mops/pure/_magic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- thds/mops/pure/_magic/api.py,sha256=7FxlBVLmh5kHp2Jro8gw_I1rmNaaNGitJtOnjega1Bs,5034
40
- thds/mops/pure/_magic/sauce.py,sha256=AdFNqldbaZ41k8o8CSug75boR2FBEdenOwZTnNBxaK8,6173
39
+ thds/mops/pure/_magic/api.py,sha256=kSlediIZQYsmeHB8plP6osjvUuSEVW4NWdY9ADia12Y,5094
40
+ thds/mops/pure/_magic/sauce.py,sha256=LDiv4YnJ4yQ7YW8t4Qr8bB6p3l5KcteSxMuO--BRJ3A,6316
41
41
  thds/mops/pure/_magic/shims.py,sha256=JI49ddv6lEUmNVsEl-XkGlsx2RpOMQoIOSSSfootYE8,1188
42
42
  thds/mops/pure/adls/__init__.py,sha256=fw67xxwnizBurScMa-_zWb94lo5gamEVRt27V4bR0jc,54
43
43
  thds/mops/pure/adls/_files.py,sha256=9m35Y4elWF0DjgAXVp4oi5CaY6fXWt8n67PilWxWJns,821
@@ -71,8 +71,9 @@ thds/mops/pure/core/lock/maintain.py,sha256=2VkqKxyp0bZkfM5wZV4Lz7zmZl7t5TOcCGNY
71
71
  thds/mops/pure/core/lock/read.py,sha256=Ct5eYMlkTlEaV5Yhw6HWsDD7VrgdhDZoI6AVIQ0ts-4,1255
72
72
  thds/mops/pure/core/lock/types.py,sha256=f32t_e2svMOXUVzcnLkEizw6Q47g3HPQsyAkGT2OKMs,993
73
73
  thds/mops/pure/core/lock/write.py,sha256=4z3W9rsRIs5ZI-_g2Q6ZplQdez6DxCGJ-HZikQI3dHo,5614
74
- thds/mops/pure/core/memo/__init__.py,sha256=7h0BHoHqCj-u8lNtPMgrkocvUe7WVq5gIM3djYIz848,217
75
- thds/mops/pure/core/memo/function_memospace.py,sha256=36mOPybkj8vlGzo6pxDQCS8ZxccHWoNAcBjhBR8wrRA,11463
74
+ thds/mops/pure/core/memo/__init__.py,sha256=OAgSWsup07EKxITr3yjwJ8eXbhU6-P1DVeZaYIgylgc,277
75
+ thds/mops/pure/core/memo/calls.py,sha256=kvm6kn-CbOLxZuo86BvzEJw69p7VlEJ8_mCiWd6uz-g,3631
76
+ thds/mops/pure/core/memo/function_memospace.py,sha256=ooUoqqCDwsqE_Ni25SUW0iTx57kGiWMcewn1F6vYTEI,11620
76
77
  thds/mops/pure/core/memo/keyfunc.py,sha256=FAOEDzMcQ-0JvW4j1eaUzixnemo_373V-16kWZl7_i0,2053
77
78
  thds/mops/pure/core/memo/overwrite_params.py,sha256=ltuFxhr8gNo2iBoBz2eFPayjSV23gMdBuoLZD42lIAg,2425
78
79
  thds/mops/pure/core/memo/results.py,sha256=272pbb5jLqP0KZ5YL5fNTxl2sP2zALFHQs8du8jCfFo,3143
@@ -81,14 +82,14 @@ thds/mops/pure/joblib/__init__.py,sha256=-3hSs-GsNzE_eNnwrdZBHAR_eaub5Uyl5GPYqBw
81
82
  thds/mops/pure/joblib/backend.py,sha256=F__6lrdc1-VcX4n4Pw7Lz1bBgeefShtRy2DQh6Fp-eI,2671
82
83
  thds/mops/pure/joblib/batching.py,sha256=tPOATD28-YW7KcWa3IqKm-fhLaILzM792ApvU-_zfnM,2298
83
84
  thds/mops/pure/pickling/__init__.py,sha256=WNdG8PdJCk-kYaXkvvPa--hjYGoUlBXG3w2X86yuhGo,156
84
- thds/mops/pure/pickling/_pickle.py,sha256=91ACcVRK3_DwOuZhqeqjphBFvYdn0iGm2thgrRgpXvg,7347
85
+ thds/mops/pure/pickling/_pickle.py,sha256=oBt2LX3_Bm33lFmQiOdQq0zIdPjDmPY8je2ICWgUQbo,7514
85
86
  thds/mops/pure/pickling/memoize_only.py,sha256=oI5CMy6IEJc46Gb_BGWNUuAe3fysS7HxRSTajN0WssI,837
86
- thds/mops/pure/pickling/mprunner.py,sha256=D-nChnp7ebRfwtdM8oZytbsCLfwOFp9skDrHWFAqaAs,7388
87
+ thds/mops/pure/pickling/mprunner.py,sha256=dVbwQA8hzEL7UiwYXmzoGwN3_jbEtGoHDPMkRmo_UtA,8378
87
88
  thds/mops/pure/pickling/pickles.py,sha256=nCg7L7CqReNWDF8FAdEmCcuXVC_kLT5zuyW3V8Vvvs4,4704
88
89
  thds/mops/pure/pickling/remote.py,sha256=XMR4-DdhxhBgbskmXCC2G9BEXJ15j6i_Rcg_-JQi_W8,6025
89
90
  thds/mops/pure/pickling/sha256_b64.py,sha256=HL0cPixHPZYuZDVDBscxsnI-3a2amWEfw-LseOX-PyY,2916
90
91
  thds/mops/pure/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
- thds/mops/pure/runner/local.py,sha256=eq2Z99oT--fm0e6toL7gFKvmzHUsQy9oFtiW1w38o0U,11678
92
+ thds/mops/pure/runner/local.py,sha256=qnA8d4j6cVfSqPvc_LwaPTWX-GucezGpULlE-ft0nOA,11958
92
93
  thds/mops/pure/runner/shim_builder.py,sha256=DkOXbPaOWPj2uUsJhjlWmh8ijG9OQc4ciHqa-vHPfXw,709
93
94
  thds/mops/pure/runner/simple_shims.py,sha256=oJ8sC5EVD-JFZx8CYE3_QwaQTuFa5F3IYH5PJ9mdMtY,702
94
95
  thds/mops/pure/runner/strings.py,sha256=PYAYMxZ2ehgahKIBXJilENNE6OrdNkueNBel8LPsoh8,26
@@ -103,8 +104,8 @@ thds/mops/pure/tools/summarize/cli.py,sha256=gaechsJhRZsOxGJGG1dQsW5dMBlgSv2sUmE
103
104
  thds/mops/pure/tools/summarize/run_summary.py,sha256=ujJC24J0XsF5W5P-eHiIq-4gmedmFXk2g1uljuvqOvc,5373
104
105
  thds/mops/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
106
  thds/mops/testing/deferred_imports.py,sha256=f0ezCgQAtzTqW1yAOb0OWgsB9ZrlztLB894LtpWDaVw,3780
106
- thds_mops-3.8.20250425173841.dist-info/METADATA,sha256=OAgEX4VS03W5fmJe2vqoIaiIbHqZOgr5g7asZ1NrsB8,2158
107
- thds_mops-3.8.20250425173841.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
108
- thds_mops-3.8.20250425173841.dist-info/entry_points.txt,sha256=GShNqjcjbq0TAJuwpyeCI5XCltiwdZxnNHkBpmYbNkU,329
109
- thds_mops-3.8.20250425173841.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
110
- thds_mops-3.8.20250425173841.dist-info/RECORD,,
107
+ thds_mops-3.8.20250425182948.dist-info/METADATA,sha256=YXR7rice7lax6IE10hBe1NY8qQoCS_aqe3G4vgYDCKM,2158
108
+ thds_mops-3.8.20250425182948.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
109
+ thds_mops-3.8.20250425182948.dist-info/entry_points.txt,sha256=GShNqjcjbq0TAJuwpyeCI5XCltiwdZxnNHkBpmYbNkU,329
110
+ thds_mops-3.8.20250425182948.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
111
+ thds_mops-3.8.20250425182948.dist-info/RECORD,,