ygg 0.1.44__py3-none-any.whl → 0.1.45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.44.dist-info → ygg-0.1.45.dist-info}/METADATA +1 -1
- {ygg-0.1.44.dist-info → ygg-0.1.45.dist-info}/RECORD +14 -13
- yggdrasil/databricks/compute/cluster.py +20 -16
- yggdrasil/databricks/compute/execution_context.py +35 -50
- yggdrasil/databricks/sql/engine.py +5 -2
- yggdrasil/databricks/sql/warehouse.py +355 -0
- yggdrasil/databricks/workspaces/workspace.py +19 -6
- yggdrasil/pyutils/callable_serde.py +183 -281
- yggdrasil/pyutils/expiring_dict.py +114 -25
- yggdrasil/version.py +1 -1
- {ygg-0.1.44.dist-info → ygg-0.1.45.dist-info}/WHEEL +0 -0
- {ygg-0.1.44.dist-info → ygg-0.1.45.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.44.dist-info → ygg-0.1.45.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.44.dist-info → ygg-0.1.45.dist-info}/top_level.txt +0 -0
|
@@ -3,10 +3,11 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import base64
|
|
6
|
+
import binascii
|
|
6
7
|
import dis
|
|
7
8
|
import importlib
|
|
8
9
|
import inspect
|
|
9
|
-
import
|
|
10
|
+
import lzma
|
|
10
11
|
import os
|
|
11
12
|
import struct
|
|
12
13
|
import sys
|
|
@@ -21,23 +22,29 @@ __all__ = ["CallableSerde"]
|
|
|
21
22
|
|
|
22
23
|
T = TypeVar("T", bound="CallableSerde")
|
|
23
24
|
|
|
24
|
-
|
|
25
25
|
# ---------- internal helpers ----------
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
_MAGIC_V1 = b"CS1" # legacy framing v1: zlib only (FLAG_COMPRESSED)
|
|
28
|
+
_MAGIC_V2 = b"CS2" # new framing v2: codec-aware
|
|
29
29
|
|
|
30
|
+
_FLAG_COMPRESSED = 1 # legacy CS1 meaning
|
|
30
31
|
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
# CS2 codecs (u8)
|
|
33
|
+
_CODEC_RAW = 0
|
|
34
|
+
_CODEC_ZLIB = 1
|
|
35
|
+
_CODEC_LZMA = 2
|
|
36
|
+
_CODEC_ZSTD = 3
|
|
33
37
|
|
|
34
|
-
Args:
|
|
35
|
-
mod: Module to traverse.
|
|
36
|
-
qualname: Dotted qualified name.
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
def _try_import_zstd():
|
|
40
|
+
try:
|
|
41
|
+
import zstandard as zstd # type: ignore
|
|
42
|
+
return zstd
|
|
43
|
+
except Exception:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _resolve_attr_chain(mod: Any, qualname: str) -> Any:
|
|
41
48
|
obj = mod
|
|
42
49
|
for part in qualname.split("."):
|
|
43
50
|
obj = getattr(obj, part)
|
|
@@ -45,10 +52,6 @@ def _resolve_attr_chain(mod: Any, qualname: str) -> Any:
|
|
|
45
52
|
|
|
46
53
|
|
|
47
54
|
def _find_pkg_root_from_file(file_path: Path) -> Optional[Path]:
|
|
48
|
-
"""
|
|
49
|
-
Walk up parents while __init__.py exists.
|
|
50
|
-
Return the directory that should be on sys.path (parent of top package dir).
|
|
51
|
-
"""
|
|
52
55
|
file_path = file_path.resolve()
|
|
53
56
|
d = file_path.parent
|
|
54
57
|
|
|
@@ -61,14 +64,6 @@ def _find_pkg_root_from_file(file_path: Path) -> Optional[Path]:
|
|
|
61
64
|
|
|
62
65
|
|
|
63
66
|
def _callable_file_line(fn: Callable[..., Any]) -> Tuple[Optional[str], Optional[int]]:
|
|
64
|
-
"""Return the source file path and line number for a callable.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
fn: Callable to inspect.
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
Tuple of (file path, line number).
|
|
71
|
-
"""
|
|
72
67
|
file = None
|
|
73
68
|
line = None
|
|
74
69
|
try:
|
|
@@ -84,17 +79,12 @@ def _callable_file_line(fn: Callable[..., Any]) -> Tuple[Optional[str], Optional
|
|
|
84
79
|
|
|
85
80
|
|
|
86
81
|
def _referenced_global_names(fn: Callable[..., Any]) -> Set[str]:
|
|
87
|
-
"""
|
|
88
|
-
Names that the function *actually* resolves from globals/namespaces at runtime.
|
|
89
|
-
Uses bytecode to avoid shipping random junk.
|
|
90
|
-
"""
|
|
91
82
|
names: Set[str] = set()
|
|
92
83
|
try:
|
|
93
84
|
for ins in dis.get_instructions(fn):
|
|
94
85
|
if ins.opname in ("LOAD_GLOBAL", "LOAD_NAME") and isinstance(ins.argval, str):
|
|
95
86
|
names.add(ins.argval)
|
|
96
87
|
except Exception:
|
|
97
|
-
# fallback: less precise
|
|
98
88
|
try:
|
|
99
89
|
names.update(getattr(fn.__code__, "co_names", ()) or ())
|
|
100
90
|
except Exception:
|
|
@@ -105,14 +95,6 @@ def _referenced_global_names(fn: Callable[..., Any]) -> Set[str]:
|
|
|
105
95
|
|
|
106
96
|
|
|
107
97
|
def _is_importable_reference(fn: Callable[..., Any]) -> bool:
|
|
108
|
-
"""Return True when a callable can be imported by module and qualname.
|
|
109
|
-
|
|
110
|
-
Args:
|
|
111
|
-
fn: Callable to inspect.
|
|
112
|
-
|
|
113
|
-
Returns:
|
|
114
|
-
True if importable by module/qualname.
|
|
115
|
-
"""
|
|
116
98
|
mod_name = getattr(fn, "__module__", None)
|
|
117
99
|
qualname = getattr(fn, "__qualname__", None)
|
|
118
100
|
if not mod_name or not qualname:
|
|
@@ -128,59 +110,145 @@ def _is_importable_reference(fn: Callable[..., Any]) -> bool:
|
|
|
128
110
|
|
|
129
111
|
|
|
130
112
|
def _pick_zlib_level(n: int, limit: int) -> int:
|
|
131
|
-
"""
|
|
132
|
-
Ramp compression level 1..9 based on how much payload exceeds byte_limit.
|
|
133
|
-
ratio=1 -> level=1
|
|
134
|
-
ratio=4 -> level=9
|
|
135
|
-
clamp beyond.
|
|
136
|
-
"""
|
|
137
113
|
ratio = n / max(1, limit)
|
|
138
114
|
x = min(1.0, max(0.0, (ratio - 1.0) / 3.0))
|
|
139
115
|
return max(1, min(9, int(round(1 + 8 * x))))
|
|
140
116
|
|
|
141
117
|
|
|
118
|
+
def _frame_v2(codec: int, orig_len: int, param: int, payload: bytes) -> bytes:
|
|
119
|
+
# Frame: MAGIC(3) + codec(u8) + orig_len(u32) + param(u8) + data
|
|
120
|
+
return _MAGIC_V2 + struct.pack(">BIB", int(codec) & 0xFF, int(orig_len), int(param) & 0xFF) + payload
|
|
121
|
+
|
|
122
|
+
|
|
142
123
|
def _encode_result_blob(raw: bytes, byte_limit: int) -> bytes:
|
|
143
124
|
"""
|
|
144
|
-
|
|
145
|
-
|
|
125
|
+
Result payload (remote -> host):
|
|
126
|
+
- If small: return raw dill bytes (no framing)
|
|
127
|
+
- Else: try strongest available codecs and pick smallest:
|
|
128
|
+
zstd (if installed) -> lzma -> zlib
|
|
129
|
+
- Frame as CS2(codec, orig_len, param) + payload
|
|
130
|
+
Back-compat: decoder also supports legacy CS1 frames.
|
|
146
131
|
"""
|
|
147
132
|
if len(raw) <= byte_limit:
|
|
148
133
|
return raw
|
|
149
134
|
|
|
150
|
-
|
|
151
|
-
compressed = zlib.compress(raw, level)
|
|
135
|
+
candidates: list[bytes] = []
|
|
152
136
|
|
|
153
|
-
#
|
|
154
|
-
|
|
155
|
-
|
|
137
|
+
# zstd (best tradeoff, optional dependency)
|
|
138
|
+
zstd = _try_import_zstd()
|
|
139
|
+
if zstd is not None:
|
|
140
|
+
for lvl in (6, 10, 15):
|
|
141
|
+
try:
|
|
142
|
+
c = zstd.ZstdCompressor(level=lvl).compress(raw)
|
|
143
|
+
candidates.append(_frame_v2(_CODEC_ZSTD, len(raw), lvl, c))
|
|
144
|
+
except Exception:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
# lzma (stdlib, strong, slower)
|
|
148
|
+
for preset in (6, 9):
|
|
149
|
+
try:
|
|
150
|
+
c = lzma.compress(raw, preset=preset)
|
|
151
|
+
candidates.append(_frame_v2(_CODEC_LZMA, len(raw), preset, c))
|
|
152
|
+
except Exception:
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
# zlib (stdlib, weaker)
|
|
156
|
+
lvl = _pick_zlib_level(len(raw), byte_limit)
|
|
157
|
+
try:
|
|
158
|
+
c = zlib.compress(raw, lvl)
|
|
159
|
+
candidates.append(_frame_v2(_CODEC_ZLIB, len(raw), lvl, c))
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
156
162
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
return
|
|
163
|
+
best = min(candidates, key=len, default=b"")
|
|
164
|
+
if not best or len(best) >= len(raw):
|
|
165
|
+
return raw
|
|
166
|
+
return best
|
|
161
167
|
|
|
162
168
|
|
|
163
|
-
def
|
|
169
|
+
def _encode_wire_blob_stdlib(raw: bytes, byte_limit: int) -> bytes:
|
|
164
170
|
"""
|
|
165
|
-
|
|
166
|
-
|
|
171
|
+
Input payload (host -> remote):
|
|
172
|
+
MUST be decodable on a vanilla Python. So: lzma (if available) -> zlib.
|
|
173
|
+
Same CS2 framing.
|
|
167
174
|
"""
|
|
168
|
-
if
|
|
169
|
-
return
|
|
175
|
+
if len(raw) <= byte_limit:
|
|
176
|
+
return raw
|
|
177
|
+
|
|
178
|
+
candidates: list[bytes] = []
|
|
179
|
+
|
|
180
|
+
# lzma may be absent in some minimal builds; guard it
|
|
181
|
+
for preset in (6, 9):
|
|
182
|
+
try:
|
|
183
|
+
c = lzma.compress(raw, preset=preset)
|
|
184
|
+
candidates.append(_frame_v2(_CODEC_LZMA, len(raw), preset, c))
|
|
185
|
+
except Exception:
|
|
186
|
+
pass
|
|
187
|
+
|
|
188
|
+
lvl = _pick_zlib_level(len(raw), byte_limit)
|
|
189
|
+
try:
|
|
190
|
+
c = zlib.compress(raw, lvl)
|
|
191
|
+
candidates.append(_frame_v2(_CODEC_ZLIB, len(raw), lvl, c))
|
|
192
|
+
except Exception:
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
best = min(candidates, key=len, default=b"")
|
|
196
|
+
if not best or len(best) >= len(raw):
|
|
197
|
+
return raw
|
|
198
|
+
return best
|
|
170
199
|
|
|
171
|
-
if len(blob) < 3 + 1 + 4 + 1:
|
|
172
|
-
raise ValueError("Framed result too short / corrupted.")
|
|
173
200
|
|
|
174
|
-
|
|
175
|
-
|
|
201
|
+
def _decode_result_blob(blob: bytes) -> bytes:
|
|
202
|
+
"""
|
|
203
|
+
Decode:
|
|
204
|
+
- raw (no MAGIC) => blob
|
|
205
|
+
- CS1 legacy => zlib if flagged
|
|
206
|
+
- CS2 => decode by codec
|
|
207
|
+
"""
|
|
208
|
+
# raw
|
|
209
|
+
if not isinstance(blob, (bytes, bytearray)) or len(blob) < 3:
|
|
210
|
+
return blob # type: ignore[return-value]
|
|
211
|
+
|
|
212
|
+
# ---- legacy CS1 ----
|
|
213
|
+
if blob.startswith(_MAGIC_V1):
|
|
214
|
+
if len(blob) < 3 + 1 + 4 + 1:
|
|
215
|
+
raise ValueError("Framed result too short / corrupted (CS1).")
|
|
216
|
+
flags, orig_len, _level = struct.unpack(">BIB", blob[3 : 3 + 6])
|
|
217
|
+
data = blob[3 + 6 :]
|
|
218
|
+
if flags & _FLAG_COMPRESSED:
|
|
219
|
+
raw = zlib.decompress(data)
|
|
220
|
+
if orig_len and len(raw) != orig_len:
|
|
221
|
+
raise ValueError(f"Decompressed length mismatch: got {len(raw)}, expected {orig_len}")
|
|
222
|
+
return raw
|
|
223
|
+
return data
|
|
224
|
+
|
|
225
|
+
# ---- new CS2 ----
|
|
226
|
+
if blob.startswith(_MAGIC_V2):
|
|
227
|
+
if len(blob) < 3 + 1 + 4 + 1:
|
|
228
|
+
raise ValueError("Framed result too short / corrupted (CS2).")
|
|
229
|
+
codec, orig_len, param = struct.unpack(">BIB", blob[3 : 3 + 6])
|
|
230
|
+
data = blob[3 + 6 :]
|
|
231
|
+
|
|
232
|
+
if codec == _CODEC_RAW:
|
|
233
|
+
raw = data
|
|
234
|
+
elif codec == _CODEC_ZLIB:
|
|
235
|
+
raw = zlib.decompress(data)
|
|
236
|
+
elif codec == _CODEC_LZMA:
|
|
237
|
+
raw = lzma.decompress(data)
|
|
238
|
+
elif codec == _CODEC_ZSTD:
|
|
239
|
+
zstd = _try_import_zstd()
|
|
240
|
+
if zstd is None:
|
|
241
|
+
raise RuntimeError("CS2 payload uses zstd, but 'zstandard' is not installed.")
|
|
242
|
+
raw = zstd.ZstdDecompressor().decompress(data, max_output_size=int(orig_len) if orig_len else 0)
|
|
243
|
+
else:
|
|
244
|
+
raise ValueError(f"Unknown CS2 codec: {codec}")
|
|
176
245
|
|
|
177
|
-
if flags & _FLAG_COMPRESSED:
|
|
178
|
-
raw = zlib.decompress(data)
|
|
179
246
|
if orig_len and len(raw) != orig_len:
|
|
180
|
-
raise ValueError(f"
|
|
247
|
+
raise ValueError(f"Decoded length mismatch: got {len(raw)}, expected {orig_len}")
|
|
181
248
|
return raw
|
|
182
249
|
|
|
183
|
-
|
|
250
|
+
# not framed
|
|
251
|
+
return blob
|
|
184
252
|
|
|
185
253
|
|
|
186
254
|
def _dump_env(
|
|
@@ -190,12 +258,6 @@ def _dump_env(
|
|
|
190
258
|
include_closure: bool,
|
|
191
259
|
filter_used_globals: bool,
|
|
192
260
|
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
193
|
-
"""
|
|
194
|
-
Returns (env, meta).
|
|
195
|
-
env is dill-able and contains:
|
|
196
|
-
- "globals": {name: value} (filtered to used names if enabled)
|
|
197
|
-
- "closure": {freevar: value} (capture only; injection not generally safe)
|
|
198
|
-
"""
|
|
199
261
|
env: Dict[str, Any] = {}
|
|
200
262
|
meta: Dict[str, Any] = {
|
|
201
263
|
"missing_globals": [],
|
|
@@ -246,18 +308,6 @@ def _dump_env(
|
|
|
246
308
|
|
|
247
309
|
@dataclass
|
|
248
310
|
class CallableSerde:
|
|
249
|
-
"""
|
|
250
|
-
Core field: `fn`
|
|
251
|
-
Serialized/backing fields used when fn isn't present yet.
|
|
252
|
-
|
|
253
|
-
kind:
|
|
254
|
-
- "auto": resolve import if possible else dill
|
|
255
|
-
- "import": module + qualname
|
|
256
|
-
- "dill": dill_b64
|
|
257
|
-
|
|
258
|
-
Optional env payload:
|
|
259
|
-
- env_b64: dill(base64) of {"globals": {...}, "closure": {...}}
|
|
260
|
-
"""
|
|
261
311
|
fn: Optional[Callable[..., Any]] = None
|
|
262
312
|
|
|
263
313
|
_kind: str = "auto" # "auto" | "import" | "dill"
|
|
@@ -269,52 +319,23 @@ class CallableSerde:
|
|
|
269
319
|
_env_b64: Optional[str] = None
|
|
270
320
|
_env_meta: Optional[Dict[str, Any]] = None
|
|
271
321
|
|
|
272
|
-
# ----- construction -----
|
|
273
|
-
|
|
274
322
|
@classmethod
|
|
275
323
|
def from_callable(cls: type[T], x: Union[Callable[..., Any], T]) -> T:
|
|
276
|
-
"""Create a CallableSerde from a callable or existing instance.
|
|
277
|
-
|
|
278
|
-
Args:
|
|
279
|
-
x: Callable or CallableSerde instance.
|
|
280
|
-
|
|
281
|
-
Returns:
|
|
282
|
-
CallableSerde instance.
|
|
283
|
-
"""
|
|
284
324
|
if isinstance(x, cls):
|
|
285
325
|
return x
|
|
286
|
-
|
|
287
326
|
obj = cls(fn=x) # type: ignore[return-value]
|
|
288
|
-
|
|
289
327
|
return obj
|
|
290
328
|
|
|
291
|
-
# ----- lazy-ish properties (computed on access) -----
|
|
292
|
-
|
|
293
329
|
@property
|
|
294
330
|
def module(self) -> Optional[str]:
|
|
295
|
-
"""Return the callable's module name if available.
|
|
296
|
-
|
|
297
|
-
Returns:
|
|
298
|
-
Module name or None.
|
|
299
|
-
"""
|
|
300
331
|
return self._module or (getattr(self.fn, "__module__", None) if self.fn else None)
|
|
301
332
|
|
|
302
333
|
@property
|
|
303
334
|
def qualname(self) -> Optional[str]:
|
|
304
|
-
"""Return the callable's qualified name if available.
|
|
305
|
-
|
|
306
|
-
Returns:
|
|
307
|
-
Qualified name or None.
|
|
308
|
-
"""
|
|
309
335
|
return self._qualname or (getattr(self.fn, "__qualname__", None) if self.fn else None)
|
|
310
336
|
|
|
311
337
|
@property
|
|
312
338
|
def file(self) -> Optional[str]:
|
|
313
|
-
"""Return the filesystem path of the callable's source file.
|
|
314
|
-
|
|
315
|
-
Returns:
|
|
316
|
-
File path or None.
|
|
317
|
-
"""
|
|
318
339
|
if not self.fn:
|
|
319
340
|
return None
|
|
320
341
|
f, _ = _callable_file_line(self.fn)
|
|
@@ -322,11 +343,6 @@ class CallableSerde:
|
|
|
322
343
|
|
|
323
344
|
@property
|
|
324
345
|
def line(self) -> Optional[int]:
|
|
325
|
-
"""Return the line number where the callable is defined.
|
|
326
|
-
|
|
327
|
-
Returns:
|
|
328
|
-
Line number or None.
|
|
329
|
-
"""
|
|
330
346
|
if not self.fn:
|
|
331
347
|
return None
|
|
332
348
|
_, ln = _callable_file_line(self.fn)
|
|
@@ -334,11 +350,6 @@ class CallableSerde:
|
|
|
334
350
|
|
|
335
351
|
@property
|
|
336
352
|
def pkg_root(self) -> Optional[str]:
|
|
337
|
-
"""Return the inferred package root for the callable, if known.
|
|
338
|
-
|
|
339
|
-
Returns:
|
|
340
|
-
Package root path or None.
|
|
341
|
-
"""
|
|
342
353
|
if self._pkg_root:
|
|
343
354
|
return self._pkg_root
|
|
344
355
|
if not self.file:
|
|
@@ -348,11 +359,6 @@ class CallableSerde:
|
|
|
348
359
|
|
|
349
360
|
@property
|
|
350
361
|
def relpath_from_pkg_root(self) -> Optional[str]:
|
|
351
|
-
"""Return the callable's path relative to the package root.
|
|
352
|
-
|
|
353
|
-
Returns:
|
|
354
|
-
Relative path or None.
|
|
355
|
-
"""
|
|
356
362
|
if not self.file or not self.pkg_root:
|
|
357
363
|
return None
|
|
358
364
|
try:
|
|
@@ -362,38 +368,19 @@ class CallableSerde:
|
|
|
362
368
|
|
|
363
369
|
@property
|
|
364
370
|
def importable(self) -> bool:
|
|
365
|
-
"""Return True when the callable can be imported by reference.
|
|
366
|
-
|
|
367
|
-
Returns:
|
|
368
|
-
True if importable by module/qualname.
|
|
369
|
-
"""
|
|
370
371
|
if self.fn is None:
|
|
371
372
|
return bool(self.module and self.qualname and "<locals>" not in (self.qualname or ""))
|
|
372
373
|
return _is_importable_reference(self.fn)
|
|
373
374
|
|
|
374
|
-
# ----- serde API -----
|
|
375
|
-
|
|
376
375
|
def dump(
|
|
377
376
|
self,
|
|
378
377
|
*,
|
|
379
|
-
prefer: str = "import",
|
|
380
|
-
dump_env: str = "none",
|
|
378
|
+
prefer: str = "import",
|
|
379
|
+
dump_env: str = "none",
|
|
381
380
|
filter_used_globals: bool = True,
|
|
382
381
|
env_keys: Optional[Iterable[str]] = None,
|
|
383
382
|
env_variables: Optional[Dict[str, str]] = None,
|
|
384
383
|
) -> Dict[str, Any]:
|
|
385
|
-
"""Serialize the callable into a dict for transport.
|
|
386
|
-
|
|
387
|
-
Args:
|
|
388
|
-
prefer: Preferred serialization kind.
|
|
389
|
-
dump_env: Environment payload selection.
|
|
390
|
-
filter_used_globals: Filter globals to referenced names.
|
|
391
|
-
env_keys: environment keys
|
|
392
|
-
env_variables: environment key values
|
|
393
|
-
|
|
394
|
-
Returns:
|
|
395
|
-
Serialized payload dict.
|
|
396
|
-
"""
|
|
397
384
|
kind = prefer
|
|
398
385
|
if kind == "import" and not self.importable:
|
|
399
386
|
kind = "dill"
|
|
@@ -420,7 +407,6 @@ class CallableSerde:
|
|
|
420
407
|
if env_keys:
|
|
421
408
|
for env_key in env_keys:
|
|
422
409
|
existing = os.getenv(env_key)
|
|
423
|
-
|
|
424
410
|
if existing:
|
|
425
411
|
env_variables[env_key] = existing
|
|
426
412
|
|
|
@@ -448,15 +434,6 @@ class CallableSerde:
|
|
|
448
434
|
|
|
449
435
|
@classmethod
|
|
450
436
|
def load(cls: type[T], d: Dict[str, Any], *, add_pkg_root_to_syspath: bool = True) -> T:
|
|
451
|
-
"""Construct a CallableSerde from a serialized dict payload.
|
|
452
|
-
|
|
453
|
-
Args:
|
|
454
|
-
d: Serialized payload dict.
|
|
455
|
-
add_pkg_root_to_syspath: Add package root to sys.path if True.
|
|
456
|
-
|
|
457
|
-
Returns:
|
|
458
|
-
CallableSerde instance.
|
|
459
|
-
"""
|
|
460
437
|
obj = cls(
|
|
461
438
|
fn=None,
|
|
462
439
|
_kind=d.get("kind", "auto"),
|
|
@@ -474,14 +451,6 @@ class CallableSerde:
|
|
|
474
451
|
return obj # type: ignore[return-value]
|
|
475
452
|
|
|
476
453
|
def materialize(self, *, add_pkg_root_to_syspath: bool = True) -> Callable[..., Any]:
|
|
477
|
-
"""Resolve and return the underlying callable.
|
|
478
|
-
|
|
479
|
-
Args:
|
|
480
|
-
add_pkg_root_to_syspath: Add package root to sys.path if True.
|
|
481
|
-
|
|
482
|
-
Returns:
|
|
483
|
-
Resolved callable.
|
|
484
|
-
"""
|
|
485
454
|
if self.fn is not None:
|
|
486
455
|
return self.fn
|
|
487
456
|
|
|
@@ -515,15 +484,6 @@ class CallableSerde:
|
|
|
515
484
|
raise ValueError(f"Unknown kind: {kind}")
|
|
516
485
|
|
|
517
486
|
def __call__(self, *args: Any, **kwargs: Any) -> Any:
|
|
518
|
-
"""Invoke the materialized callable with the provided arguments.
|
|
519
|
-
|
|
520
|
-
Args:
|
|
521
|
-
*args: Positional args for the callable.
|
|
522
|
-
**kwargs: Keyword args for the callable.
|
|
523
|
-
|
|
524
|
-
Returns:
|
|
525
|
-
Callable return value.
|
|
526
|
-
"""
|
|
527
487
|
fn = self.materialize()
|
|
528
488
|
return fn(*args, **kwargs)
|
|
529
489
|
|
|
@@ -536,7 +496,7 @@ class CallableSerde:
|
|
|
536
496
|
*,
|
|
537
497
|
result_tag: str = "__CALLABLE_SERDE_RESULT__",
|
|
538
498
|
prefer: str = "dill",
|
|
539
|
-
byte_limit: int =
|
|
499
|
+
byte_limit: int = 4 * 1024,
|
|
540
500
|
dump_env: str = "none", # "none" | "globals" | "closure" | "both"
|
|
541
501
|
filter_used_globals: bool = True,
|
|
542
502
|
env_keys: Optional[Iterable[str]] = None,
|
|
@@ -545,15 +505,9 @@ class CallableSerde:
|
|
|
545
505
|
"""
|
|
546
506
|
Returns Python code string to execute in another interpreter.
|
|
547
507
|
Prints one line: "{result_tag}:{base64(blob)}"
|
|
548
|
-
where blob is raw dill bytes or framed
|
|
549
|
-
|
|
550
|
-
Also compresses the input call payload (args/kwargs) using the same framing
|
|
551
|
-
scheme when it exceeds byte_limit.
|
|
508
|
+
where blob is raw dill bytes or framed (CS1/CS2).
|
|
552
509
|
"""
|
|
553
|
-
import base64
|
|
554
510
|
import json
|
|
555
|
-
import struct
|
|
556
|
-
import zlib
|
|
557
511
|
|
|
558
512
|
args = args or ()
|
|
559
513
|
kwargs = kwargs or {}
|
|
@@ -567,75 +521,30 @@ class CallableSerde:
|
|
|
567
521
|
)
|
|
568
522
|
serde_json = json.dumps(serde_dict, ensure_ascii=False)
|
|
569
523
|
|
|
570
|
-
#
|
|
571
|
-
MAGIC = b"CS1"
|
|
572
|
-
FLAG_COMPRESSED = 1
|
|
573
|
-
|
|
574
|
-
def _pick_level(n: int, limit: int) -> int:
|
|
575
|
-
ratio = n / max(1, limit)
|
|
576
|
-
x = min(1.0, max(0.0, (ratio - 1.0) / 3.0))
|
|
577
|
-
return max(1, min(9, int(round(1 + 8 * x))))
|
|
578
|
-
|
|
579
|
-
def _encode_blob(raw: bytes, limit: int) -> bytes:
|
|
580
|
-
if len(raw) <= limit:
|
|
581
|
-
return raw
|
|
582
|
-
level = _pick_level(len(raw), limit)
|
|
583
|
-
compressed = zlib.compress(raw, level)
|
|
584
|
-
if len(compressed) >= len(raw):
|
|
585
|
-
return raw
|
|
586
|
-
header = MAGIC + struct.pack(">BIB", FLAG_COMPRESSED, len(raw), level)
|
|
587
|
-
return header + compressed
|
|
588
|
-
|
|
524
|
+
# Encode (args, kwargs) with stdlib-only strategy so remote can always decode.
|
|
589
525
|
call_raw = dill.dumps((args, kwargs), recurse=True)
|
|
590
|
-
|
|
526
|
+
|
|
527
|
+
# Use your local encoder for wire payload (stdlib only)
|
|
528
|
+
call_blob = _encode_wire_blob_stdlib(call_raw, int(byte_limit))
|
|
591
529
|
call_payload_b64 = base64.b64encode(call_blob).decode("ascii")
|
|
592
530
|
|
|
593
|
-
# NOTE: plain string template + replace. No f-string. No brace escaping.
|
|
594
531
|
template = r"""
|
|
595
|
-
import base64, json,
|
|
532
|
+
import base64, json, os, sys
|
|
596
533
|
import dill
|
|
597
534
|
|
|
535
|
+
# thin import from your real module
|
|
536
|
+
from yggdrasil.pyutils.callable_serde import (
|
|
537
|
+
CallableSerde,
|
|
538
|
+
_decode_result_blob, # decodes raw/CS1/CS2
|
|
539
|
+
_encode_result_blob, # encodes result with strongest available
|
|
540
|
+
)
|
|
541
|
+
|
|
598
542
|
RESULT_TAG = __RESULT_TAG__
|
|
599
543
|
BYTE_LIMIT = __BYTE_LIMIT__
|
|
600
544
|
|
|
601
|
-
MAGIC = b"CS1"
|
|
602
|
-
FLAG_COMPRESSED = 1
|
|
603
|
-
|
|
604
|
-
def _resolve_attr_chain(mod, qualname: str):
|
|
605
|
-
obj = mod
|
|
606
|
-
for part in qualname.split("."):
|
|
607
|
-
obj = getattr(obj, part)
|
|
608
|
-
return obj
|
|
609
|
-
|
|
610
|
-
def _pick_level(n: int, limit: int) -> int:
|
|
611
|
-
ratio = n / max(1, limit)
|
|
612
|
-
x = min(1.0, max(0.0, (ratio - 1.0) / 3.0))
|
|
613
|
-
return max(1, min(9, int(round(1 + 8 * x))))
|
|
614
|
-
|
|
615
|
-
def _encode_result(raw: bytes, byte_limit: int) -> bytes:
|
|
616
|
-
if len(raw) <= byte_limit:
|
|
617
|
-
return raw
|
|
618
|
-
level = _pick_level(len(raw), byte_limit)
|
|
619
|
-
compressed = zlib.compress(raw, level)
|
|
620
|
-
if len(compressed) >= len(raw):
|
|
621
|
-
return raw
|
|
622
|
-
header = MAGIC + struct.pack(">BIB", FLAG_COMPRESSED, len(raw), level)
|
|
623
|
-
return header + compressed
|
|
624
|
-
|
|
625
|
-
def _decode_blob(blob: bytes) -> bytes:
|
|
626
|
-
# If it's framed (MAGIC + header), decompress; else return as-is.
|
|
627
|
-
if isinstance(blob, (bytes, bytearray)) and len(blob) >= 3 and blob[:3] == MAGIC:
|
|
628
|
-
if len(blob) >= 3 + 6:
|
|
629
|
-
flag, orig_len, level = struct.unpack(">BIB", blob[3:3+6])
|
|
630
|
-
if flag & FLAG_COMPRESSED:
|
|
631
|
-
raw = zlib.decompress(blob[3+6:])
|
|
632
|
-
# best-effort sanity check; don't hard-fail on mismatch
|
|
633
|
-
if isinstance(orig_len, int) and orig_len > 0 and len(raw) != orig_len:
|
|
634
|
-
return raw
|
|
635
|
-
return raw
|
|
636
|
-
return blob
|
|
637
|
-
|
|
638
545
|
def _needed_globals(fn) -> set[str]:
|
|
546
|
+
# keep this tiny + local; doesn’t need full module internals
|
|
547
|
+
import dis
|
|
639
548
|
names = set()
|
|
640
549
|
try:
|
|
641
550
|
for ins in dis.get_instructions(fn):
|
|
@@ -669,47 +578,38 @@ def _apply_env(fn, env: dict, filter_used: bool):
|
|
|
669
578
|
|
|
670
579
|
serde = json.loads(__SERDE_JSON__)
|
|
671
580
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
kind = serde.get("kind")
|
|
677
|
-
if kind == "import":
|
|
678
|
-
mod = importlib.import_module(serde["module"])
|
|
679
|
-
fn = _resolve_attr_chain(mod, serde["qualname"])
|
|
680
|
-
elif kind == "dill":
|
|
681
|
-
fn = dill.loads(base64.b64decode(serde["dill_b64"]))
|
|
682
|
-
else:
|
|
683
|
-
if serde.get("module") and serde.get("qualname") and "<locals>" not in serde.get("qualname", ""):
|
|
684
|
-
mod = importlib.import_module(serde["module"])
|
|
685
|
-
fn = _resolve_attr_chain(mod, serde["qualname"])
|
|
686
|
-
else:
|
|
687
|
-
fn = dill.loads(base64.b64decode(serde["dill_b64"]))
|
|
581
|
+
# materialize callable
|
|
582
|
+
cs = CallableSerde.load(serde, add_pkg_root_to_syspath=True)
|
|
583
|
+
fn = cs.materialize(add_pkg_root_to_syspath=True)
|
|
688
584
|
|
|
585
|
+
# apply os env vars (if present)
|
|
689
586
|
osenv = serde.get("osenv")
|
|
690
587
|
if osenv:
|
|
691
588
|
for k, v in osenv.items():
|
|
692
589
|
os.environ[k] = v
|
|
693
590
|
|
|
591
|
+
# apply dill'd env payload (if present)
|
|
694
592
|
env_b64 = serde.get("env_b64")
|
|
695
593
|
if env_b64:
|
|
696
594
|
env = dill.loads(base64.b64decode(env_b64))
|
|
697
595
|
meta = serde.get("env_meta") or {}
|
|
698
596
|
_apply_env(fn, env, bool(meta.get("filter_used_globals", True)))
|
|
699
597
|
|
|
598
|
+
# decode call payload
|
|
700
599
|
call_blob = base64.b64decode(__CALL_PAYLOAD_B64__)
|
|
701
|
-
call_raw =
|
|
600
|
+
call_raw = _decode_result_blob(call_blob)
|
|
702
601
|
args, kwargs = dill.loads(call_raw)
|
|
703
602
|
|
|
603
|
+
# execute
|
|
704
604
|
res = fn(*args, **kwargs)
|
|
705
|
-
raw = dill.dumps(res, recurse=True)
|
|
706
|
-
blob = _encode_result(raw, BYTE_LIMIT)
|
|
707
605
|
|
|
708
|
-
#
|
|
709
|
-
|
|
710
|
-
|
|
606
|
+
# encode + print result
|
|
607
|
+
raw = dill.dumps(res)
|
|
608
|
+
blob = _encode_result_blob(raw, BYTE_LIMIT)
|
|
609
|
+
print(f"{RESULT_TAG}:{base64.b64encode(blob).decode('ascii')}")
|
|
610
|
+
"""
|
|
711
611
|
|
|
712
|
-
|
|
612
|
+
return (
|
|
713
613
|
template
|
|
714
614
|
.replace("__RESULT_TAG__", repr(result_tag))
|
|
715
615
|
.replace("__BYTE_LIMIT__", str(int(byte_limit)))
|
|
@@ -717,23 +617,25 @@ sys.stdout.write(str(RESULT_TAG) + ":" + base64.b64encode(blob).decode("ascii")
|
|
|
717
617
|
.replace("__CALL_PAYLOAD_B64__", repr(call_payload_b64))
|
|
718
618
|
)
|
|
719
619
|
|
|
720
|
-
return code
|
|
721
|
-
|
|
722
620
|
@staticmethod
|
|
723
621
|
def parse_command_result(output: str, *, result_tag: str = "__CALLABLE_SERDE_RESULT__") -> Any:
|
|
724
|
-
"""
|
|
725
|
-
Parse stdout/stderr combined text, find last "{result_tag}:{b64}" line.
|
|
726
|
-
Supports raw dill or framed+zlib compressed payloads.
|
|
727
|
-
"""
|
|
728
622
|
prefix = f"{result_tag}:"
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
623
|
+
if prefix not in output:
|
|
624
|
+
raise ValueError(f"Result tag not found in output: {result_tag}")
|
|
625
|
+
|
|
626
|
+
# last tagged line, first line after it
|
|
627
|
+
_, b64 = output.rsplit(prefix, 1)
|
|
628
|
+
|
|
734
629
|
if not b64:
|
|
735
|
-
raise ValueError(f"
|
|
630
|
+
raise ValueError(f"Found result tag {result_tag} but payload is empty")
|
|
631
|
+
|
|
632
|
+
try:
|
|
633
|
+
blob = base64.b64decode(b64.encode("ascii"))
|
|
634
|
+
except (UnicodeEncodeError, binascii.Error) as e:
|
|
635
|
+
raise ValueError("Invalid base64 payload after result tag") from e
|
|
736
636
|
|
|
737
|
-
blob = base64.b64decode(b64.encode("ascii"))
|
|
738
637
|
raw = _decode_result_blob(blob)
|
|
739
|
-
|
|
638
|
+
try:
|
|
639
|
+
return dill.loads(raw)
|
|
640
|
+
except Exception as e:
|
|
641
|
+
raise ValueError("Failed to dill.loads decoded payload") from e
|