ygg 0.1.19__py3-none-any.whl → 0.1.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.19.dist-info → ygg-0.1.20.dist-info}/METADATA +1 -1
- {ygg-0.1.19.dist-info → ygg-0.1.20.dist-info}/RECORD +6 -6
- yggdrasil/databricks/workspaces/databricks_path.py +2 -2
- yggdrasil/ser/callable_serde.py +39 -23
- {ygg-0.1.19.dist-info → ygg-0.1.20.dist-info}/WHEEL +0 -0
- {ygg-0.1.19.dist-info → ygg-0.1.20.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.20
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
Project-URL: Homepage, https://github.com/Platob/Yggdrasil
|
|
@@ -12,7 +12,7 @@ yggdrasil/databricks/sql/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
|
|
|
12
12
|
yggdrasil/databricks/sql/statement_result.py,sha256=L-hrK5MVnH3XG57BpGmaETtRzYjAtYGQthgAPGVj618,12610
|
|
13
13
|
yggdrasil/databricks/sql/types.py,sha256=YgasSyq8sygk1h6ZOTcRwXAZWNKSuk-9g9VqlR8kJl4,5324
|
|
14
14
|
yggdrasil/databricks/workspaces/__init__.py,sha256=tNNS3A_Pl9FYkQ8nGERhr4VF-hwKrvh8k1W8vTaR0uo,58
|
|
15
|
-
yggdrasil/databricks/workspaces/databricks_path.py,sha256=
|
|
15
|
+
yggdrasil/databricks/workspaces/databricks_path.py,sha256=ieaAEPo2lBmgqrdFwtXdy9uIDFyhFT8hjbO_KP5ayec,29547
|
|
16
16
|
yggdrasil/databricks/workspaces/workspace.py,sha256=8T-d0DTq-s2zTkmMbaEeS_7AiZwcUDxorch9IbZACko,33333
|
|
17
17
|
yggdrasil/dataclasses/__init__.py,sha256=QVAvZnNl7gFYTLOVTfMkdQZf6o_WL8_UuDV1uTZ7Aeg,67
|
|
18
18
|
yggdrasil/dataclasses/dataclass.py,sha256=ln-D1-bbiCLBd2khRMGs4dFoxzJEEGzHTKekWCnF2uk,5436
|
|
@@ -33,7 +33,7 @@ yggdrasil/requests/__init__.py,sha256=THJz1IoZYQccwmXcQR3N8D-uWxCkfMtgeXDhONdERR
|
|
|
33
33
|
yggdrasil/requests/msal.py,sha256=ucnN45iZZpbXkByw212PX4shH4g0EeyrW8JEmfimWtY,5861
|
|
34
34
|
yggdrasil/requests/session.py,sha256=YomLcDf8O_mc8BUnf9fr5wrupDnxEzaGw-guhV91NsE,830
|
|
35
35
|
yggdrasil/ser/__init__.py,sha256=sS66Bxu8aiLb-8N2aNayquamfi7FobEH51JyV5ULDFI,31
|
|
36
|
-
yggdrasil/ser/callable_serde.py,sha256=
|
|
36
|
+
yggdrasil/ser/callable_serde.py,sha256=hgJilSWYcAivFCfwRJJ-eKzGBtDfUE1eNppX4ac7Sd0,21473
|
|
37
37
|
yggdrasil/types/__init__.py,sha256=p0Qu_69RkePPgQGM9nSue_bcbEIAM2u9eo3zsEplHJ8,82
|
|
38
38
|
yggdrasil/types/libs.py,sha256=7-p0M4C6TnEWpUGf2nY4XshhJxtXOE_-bsYmJWU6jtk,227
|
|
39
39
|
yggdrasil/types/python_arrow.py,sha256=1Ac1ZnEgmH4nLNBfrfbM6E9EfANGWtdANqlwMENQkTw,21408
|
|
@@ -48,7 +48,7 @@ yggdrasil/types/cast/registry.py,sha256=-88mq-U1pDSGbEC9PRY0zJCzloyBodXgeSRBPb6h
|
|
|
48
48
|
yggdrasil/types/cast/spark_cast.py,sha256=IHthM78dugabGXxNNW9sSHn-olDwzXcFdIFcPo9IiXU,23021
|
|
49
49
|
yggdrasil/types/cast/spark_pandas_cast.py,sha256=8PgJItF_XbyBcNuBnXkMQU3PBy3sAPEXZT9SXL2WbU4,4200
|
|
50
50
|
yggdrasil/types/cast/spark_polars_cast.py,sha256=ba1UOvY1ouGCro1Np9slXmJ4TEyWnUtwVEAwxGvPLlk,8336
|
|
51
|
-
ygg-0.1.
|
|
52
|
-
ygg-0.1.
|
|
53
|
-
ygg-0.1.
|
|
54
|
-
ygg-0.1.
|
|
51
|
+
ygg-0.1.20.dist-info/METADATA,sha256=MlGCV96KVsKcbu92-pX4CKQjA9KlOE1lafq2KhG4DdQ,5981
|
|
52
|
+
ygg-0.1.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
53
|
+
ygg-0.1.20.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
54
|
+
ygg-0.1.20.dist-info/RECORD,,
|
|
@@ -44,6 +44,8 @@ class DatabricksPathKind(str, Enum):
|
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
46
|
def parse(cls, path: str, workspace: Optional["Workspace"] = None) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
|
|
47
|
+
from .workspace import Workspace
|
|
48
|
+
|
|
47
49
|
if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
|
|
48
50
|
if path.startswith("/Users/me"):
|
|
49
51
|
workspace = Workspace() if workspace is None else workspace
|
|
@@ -54,8 +56,6 @@ class DatabricksPathKind(str, Enum):
|
|
|
54
56
|
return cls.VOLUME, workspace, path
|
|
55
57
|
|
|
56
58
|
if path.startswith("dbfs://"):
|
|
57
|
-
from .workspace import Workspace
|
|
58
|
-
|
|
59
59
|
parsed = urlparse.urlparse(path)
|
|
60
60
|
kind, _, inner_path = cls.parse(parsed.path)
|
|
61
61
|
workspace = Workspace(host=parsed.hostname) if workspace is None else workspace
|
yggdrasil/ser/callable_serde.py
CHANGED
|
@@ -174,6 +174,17 @@ def _capture_module_imports(fn: Callable[..., Any]) -> str:
|
|
|
174
174
|
return ""
|
|
175
175
|
|
|
176
176
|
|
|
177
|
+
def _pack_payload(payload: Dict[str, Any], *, level: int = 6) -> str:
|
|
178
|
+
lvl = int(level)
|
|
179
|
+
if lvl < 1:
|
|
180
|
+
lvl = 1
|
|
181
|
+
elif lvl > 9:
|
|
182
|
+
lvl = 9
|
|
183
|
+
raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
|
|
184
|
+
comp = zlib.compress(raw, level=lvl)
|
|
185
|
+
return _b64e(comp)
|
|
186
|
+
|
|
187
|
+
|
|
177
188
|
def parse_tagged_result(stdout_text: str, result_tag: str) -> Dict[str, Any]:
|
|
178
189
|
"""
|
|
179
190
|
Extract the last tagged JSON payload printed by a cluster command.
|
|
@@ -359,6 +370,8 @@ class CallableSerdeMixin:
|
|
|
359
370
|
use_dill: bool,
|
|
360
371
|
byte_limit: int = 0,
|
|
361
372
|
result_tag: str = "<<<RESULT>>>",
|
|
373
|
+
compress_input_payload: bool = True, # NEW
|
|
374
|
+
payload_compression_level: int = 6, # NEW (1..9)
|
|
362
375
|
) -> str:
|
|
363
376
|
if kwargs is None:
|
|
364
377
|
kwargs = {}
|
|
@@ -390,7 +403,7 @@ class CallableSerdeMixin:
|
|
|
390
403
|
"name": getattr(self.fn, "__name__", None),
|
|
391
404
|
"qualname": getattr(self.fn, "__qualname__", None),
|
|
392
405
|
"module": getattr(self.fn, "__module__", None),
|
|
393
|
-
"imports": imports,
|
|
406
|
+
"imports": imports,
|
|
394
407
|
"source": src,
|
|
395
408
|
"dill_b64": None,
|
|
396
409
|
"env": _capture_exec_env(self.fn),
|
|
@@ -418,16 +431,32 @@ class CallableSerdeMixin:
|
|
|
418
431
|
"kwargs": kwargs_pack,
|
|
419
432
|
"env": client_env,
|
|
420
433
|
"result_tag": result_tag,
|
|
421
|
-
"byte_limit": byte_limit
|
|
434
|
+
"byte_limit": byte_limit,
|
|
422
435
|
}
|
|
423
436
|
|
|
437
|
+
# NEW: compress the embedded input payload to keep command size small
|
|
438
|
+
if compress_input_payload:
|
|
439
|
+
payload_b64 = _pack_payload(payload, level=payload_compression_level)
|
|
440
|
+
payload_bootstrap = f"""
|
|
441
|
+
_payload_b64 = {payload_b64!r}
|
|
442
|
+
|
|
443
|
+
def _load_payload(b64: str) -> dict:
|
|
444
|
+
raw = base64.b64decode(b64.encode("ascii"))
|
|
445
|
+
raw = zlib.decompress(raw)
|
|
446
|
+
return json.loads(raw.decode("utf-8"))
|
|
447
|
+
|
|
448
|
+
_payload = _load_payload(_payload_b64)
|
|
449
|
+
""".rstrip()
|
|
450
|
+
else:
|
|
451
|
+
payload_bootstrap = f"_payload = {payload!r}"
|
|
452
|
+
|
|
424
453
|
return f"""
|
|
425
454
|
# --- generated by yggdrasil.ser.CallableSerdeMixin.to_command ---
|
|
426
455
|
import base64, json, os, traceback, zlib
|
|
427
456
|
import dill
|
|
428
457
|
from yggdrasil.databricks import *
|
|
429
458
|
|
|
430
|
-
|
|
459
|
+
{payload_bootstrap}
|
|
431
460
|
|
|
432
461
|
def _b64d(s: str) -> bytes:
|
|
433
462
|
return base64.b64decode(s.encode("ascii"))
|
|
@@ -472,7 +501,6 @@ def _load_callable(cpack, use_dill: bool):
|
|
|
472
501
|
ns = {{}}
|
|
473
502
|
ns.update(_hydrate_env(cpack.get("env") or {{}}))
|
|
474
503
|
|
|
475
|
-
# NEW: exec module imports first
|
|
476
504
|
if imports.strip():
|
|
477
505
|
exec(imports, ns, ns)
|
|
478
506
|
|
|
@@ -490,34 +518,22 @@ def _load_callable(cpack, use_dill: bool):
|
|
|
490
518
|
def _emit(tag: str, obj: dict):
|
|
491
519
|
print(tag + json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
|
|
492
520
|
|
|
493
|
-
|
|
494
521
|
def _zlib_level(n: int, limit: int) -> int:
|
|
495
|
-
# ratio of size to limit
|
|
496
522
|
r = n / max(1, limit)
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
if r >=
|
|
500
|
-
|
|
501
|
-
if r >=
|
|
502
|
-
|
|
503
|
-
if r >= 8:
|
|
504
|
-
return 3
|
|
505
|
-
if r >= 4:
|
|
506
|
-
return 4
|
|
507
|
-
if r >= 2:
|
|
508
|
-
return 5
|
|
509
|
-
if r >= 1.25:
|
|
510
|
-
return 6
|
|
511
|
-
# barely over: squeeze a bit more
|
|
523
|
+
if r >= 32: return 1
|
|
524
|
+
if r >= 16: return 2
|
|
525
|
+
if r >= 8: return 3
|
|
526
|
+
if r >= 4: return 4
|
|
527
|
+
if r >= 2: return 5
|
|
528
|
+
if r >= 1.25: return 6
|
|
512
529
|
return 7
|
|
513
530
|
|
|
514
|
-
# apply env vars
|
|
515
531
|
for k, v in (_payload.get("env") or {{}}).items():
|
|
516
532
|
if v is not None:
|
|
517
533
|
os.environ[str(k)] = str(v)
|
|
518
534
|
|
|
519
535
|
tag = _payload.get("result_tag", "<<<RESULT>>>")
|
|
520
|
-
byte_limit = int((_payload.get("byte_limit") or 2_000_000))
|
|
536
|
+
byte_limit = int((_payload.get("byte_limit") or 2_000_000))
|
|
521
537
|
|
|
522
538
|
try:
|
|
523
539
|
use_dill = bool(_payload.get("use_dill", False))
|
|
File without changes
|
|
File without changes
|