ygg 0.1.19__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.19
3
+ Version: 0.1.20
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  Project-URL: Homepage, https://github.com/Platob/Yggdrasil
@@ -12,7 +12,7 @@ yggdrasil/databricks/sql/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJ
12
12
  yggdrasil/databricks/sql/statement_result.py,sha256=L-hrK5MVnH3XG57BpGmaETtRzYjAtYGQthgAPGVj618,12610
13
13
  yggdrasil/databricks/sql/types.py,sha256=YgasSyq8sygk1h6ZOTcRwXAZWNKSuk-9g9VqlR8kJl4,5324
14
14
  yggdrasil/databricks/workspaces/__init__.py,sha256=tNNS3A_Pl9FYkQ8nGERhr4VF-hwKrvh8k1W8vTaR0uo,58
15
- yggdrasil/databricks/workspaces/databricks_path.py,sha256=1jGLqAxSUvhBiSs78W2rS31ENEz4HcAQ0U_HqgqlJMM,29551
15
+ yggdrasil/databricks/workspaces/databricks_path.py,sha256=ieaAEPo2lBmgqrdFwtXdy9uIDFyhFT8hjbO_KP5ayec,29547
16
16
  yggdrasil/databricks/workspaces/workspace.py,sha256=8T-d0DTq-s2zTkmMbaEeS_7AiZwcUDxorch9IbZACko,33333
17
17
  yggdrasil/dataclasses/__init__.py,sha256=QVAvZnNl7gFYTLOVTfMkdQZf6o_WL8_UuDV1uTZ7Aeg,67
18
18
  yggdrasil/dataclasses/dataclass.py,sha256=ln-D1-bbiCLBd2khRMGs4dFoxzJEEGzHTKekWCnF2uk,5436
@@ -33,7 +33,7 @@ yggdrasil/requests/__init__.py,sha256=THJz1IoZYQccwmXcQR3N8D-uWxCkfMtgeXDhONdERR
33
33
  yggdrasil/requests/msal.py,sha256=ucnN45iZZpbXkByw212PX4shH4g0EeyrW8JEmfimWtY,5861
34
34
  yggdrasil/requests/session.py,sha256=YomLcDf8O_mc8BUnf9fr5wrupDnxEzaGw-guhV91NsE,830
35
35
  yggdrasil/ser/__init__.py,sha256=sS66Bxu8aiLb-8N2aNayquamfi7FobEH51JyV5ULDFI,31
36
- yggdrasil/ser/callable_serde.py,sha256=GlCHYayy1MbrtRL9A4JQ4H91HhPpBDSZPEeXTGcimB0,20751
36
+ yggdrasil/ser/callable_serde.py,sha256=hgJilSWYcAivFCfwRJJ-eKzGBtDfUE1eNppX4ac7Sd0,21473
37
37
  yggdrasil/types/__init__.py,sha256=p0Qu_69RkePPgQGM9nSue_bcbEIAM2u9eo3zsEplHJ8,82
38
38
  yggdrasil/types/libs.py,sha256=7-p0M4C6TnEWpUGf2nY4XshhJxtXOE_-bsYmJWU6jtk,227
39
39
  yggdrasil/types/python_arrow.py,sha256=1Ac1ZnEgmH4nLNBfrfbM6E9EfANGWtdANqlwMENQkTw,21408
@@ -48,7 +48,7 @@ yggdrasil/types/cast/registry.py,sha256=-88mq-U1pDSGbEC9PRY0zJCzloyBodXgeSRBPb6h
48
48
  yggdrasil/types/cast/spark_cast.py,sha256=IHthM78dugabGXxNNW9sSHn-olDwzXcFdIFcPo9IiXU,23021
49
49
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=8PgJItF_XbyBcNuBnXkMQU3PBy3sAPEXZT9SXL2WbU4,4200
50
50
  yggdrasil/types/cast/spark_polars_cast.py,sha256=ba1UOvY1ouGCro1Np9slXmJ4TEyWnUtwVEAwxGvPLlk,8336
51
- ygg-0.1.19.dist-info/METADATA,sha256=__YFsy3OqdkKedGn-zOh6Y1VFpTWVl_rZMS4Aa8AtZY,5981
52
- ygg-0.1.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
- ygg-0.1.19.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
54
- ygg-0.1.19.dist-info/RECORD,,
51
+ ygg-0.1.20.dist-info/METADATA,sha256=MlGCV96KVsKcbu92-pX4CKQjA9KlOE1lafq2KhG4DdQ,5981
52
+ ygg-0.1.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ ygg-0.1.20.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
54
+ ygg-0.1.20.dist-info/RECORD,,
@@ -44,6 +44,8 @@ class DatabricksPathKind(str, Enum):
44
44
 
45
45
  @classmethod
46
46
  def parse(cls, path: str, workspace: Optional["Workspace"] = None) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
47
+ from .workspace import Workspace
48
+
47
49
  if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
48
50
  if path.startswith("/Users/me"):
49
51
  workspace = Workspace() if workspace is None else workspace
@@ -54,8 +56,6 @@ class DatabricksPathKind(str, Enum):
54
56
  return cls.VOLUME, workspace, path
55
57
 
56
58
  if path.startswith("dbfs://"):
57
- from .workspace import Workspace
58
-
59
59
  parsed = urlparse.urlparse(path)
60
60
  kind, _, inner_path = cls.parse(parsed.path)
61
61
  workspace = Workspace(host=parsed.hostname) if workspace is None else workspace
@@ -174,6 +174,17 @@ def _capture_module_imports(fn: Callable[..., Any]) -> str:
174
174
  return ""
175
175
 
176
176
 
177
+ def _pack_payload(payload: Dict[str, Any], *, level: int = 6) -> str:
178
+ lvl = int(level)
179
+ if lvl < 1:
180
+ lvl = 1
181
+ elif lvl > 9:
182
+ lvl = 9
183
+ raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
184
+ comp = zlib.compress(raw, level=lvl)
185
+ return _b64e(comp)
186
+
187
+
177
188
  def parse_tagged_result(stdout_text: str, result_tag: str) -> Dict[str, Any]:
178
189
  """
179
190
  Extract the last tagged JSON payload printed by a cluster command.
@@ -359,6 +370,8 @@ class CallableSerdeMixin:
359
370
  use_dill: bool,
360
371
  byte_limit: int = 0,
361
372
  result_tag: str = "<<<RESULT>>>",
373
+ compress_input_payload: bool = True, # NEW
374
+ payload_compression_level: int = 6, # NEW (1..9)
362
375
  ) -> str:
363
376
  if kwargs is None:
364
377
  kwargs = {}
@@ -390,7 +403,7 @@ class CallableSerdeMixin:
390
403
  "name": getattr(self.fn, "__name__", None),
391
404
  "qualname": getattr(self.fn, "__qualname__", None),
392
405
  "module": getattr(self.fn, "__module__", None),
393
- "imports": imports, # NEW
406
+ "imports": imports,
394
407
  "source": src,
395
408
  "dill_b64": None,
396
409
  "env": _capture_exec_env(self.fn),
@@ -418,16 +431,32 @@ class CallableSerdeMixin:
418
431
  "kwargs": kwargs_pack,
419
432
  "env": client_env,
420
433
  "result_tag": result_tag,
421
- "byte_limit": byte_limit
434
+ "byte_limit": byte_limit,
422
435
  }
423
436
 
437
+ # NEW: compress the embedded input payload to keep command size small
438
+ if compress_input_payload:
439
+ payload_b64 = _pack_payload(payload, level=payload_compression_level)
440
+ payload_bootstrap = f"""
441
+ _payload_b64 = {payload_b64!r}
442
+
443
+ def _load_payload(b64: str) -> dict:
444
+ raw = base64.b64decode(b64.encode("ascii"))
445
+ raw = zlib.decompress(raw)
446
+ return json.loads(raw.decode("utf-8"))
447
+
448
+ _payload = _load_payload(_payload_b64)
449
+ """.rstrip()
450
+ else:
451
+ payload_bootstrap = f"_payload = {payload!r}"
452
+
424
453
  return f"""
425
454
  # --- generated by yggdrasil.ser.CallableSerdeMixin.to_command ---
426
455
  import base64, json, os, traceback, zlib
427
456
  import dill
428
457
  from yggdrasil.databricks import *
429
458
 
430
- _payload = {payload!r}
459
+ {payload_bootstrap}
431
460
 
432
461
  def _b64d(s: str) -> bytes:
433
462
  return base64.b64decode(s.encode("ascii"))
@@ -472,7 +501,6 @@ def _load_callable(cpack, use_dill: bool):
472
501
  ns = {{}}
473
502
  ns.update(_hydrate_env(cpack.get("env") or {{}}))
474
503
 
475
- # NEW: exec module imports first
476
504
  if imports.strip():
477
505
  exec(imports, ns, ns)
478
506
 
@@ -490,34 +518,22 @@ def _load_callable(cpack, use_dill: bool):
490
518
  def _emit(tag: str, obj: dict):
491
519
  print(tag + json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
492
520
 
493
-
494
521
  def _zlib_level(n: int, limit: int) -> int:
495
- # ratio of size to limit
496
522
  r = n / max(1, limit)
497
-
498
- # fast for huge payloads, stronger only when slightly over
499
- if r >= 32:
500
- return 1
501
- if r >= 16:
502
- return 2
503
- if r >= 8:
504
- return 3
505
- if r >= 4:
506
- return 4
507
- if r >= 2:
508
- return 5
509
- if r >= 1.25:
510
- return 6
511
- # barely over: squeeze a bit more
523
+ if r >= 32: return 1
524
+ if r >= 16: return 2
525
+ if r >= 8: return 3
526
+ if r >= 4: return 4
527
+ if r >= 2: return 5
528
+ if r >= 1.25: return 6
512
529
  return 7
513
530
 
514
- # apply env vars
515
531
  for k, v in (_payload.get("env") or {{}}).items():
516
532
  if v is not None:
517
533
  os.environ[str(k)] = str(v)
518
534
 
519
535
  tag = _payload.get("result_tag", "<<<RESULT>>>")
520
- byte_limit = int((_payload.get("byte_limit") or 2_000_000)) # ~2MB serialized bytes
536
+ byte_limit = int((_payload.get("byte_limit") or 2_000_000))
521
537
 
522
538
  try:
523
539
  use_dill = bool(_payload.get("use_dill", False))
File without changes