ygg 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.18
3
+ Version: 0.1.20
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  Project-URL: Homepage, https://github.com/Platob/Yggdrasil
@@ -2,7 +2,7 @@ yggdrasil/__init__.py,sha256=6OPibApplA5TF4TeixkQO_qewpaAidYX-fSDvvKYcTI,91
2
2
  yggdrasil/databricks/__init__.py,sha256=aGVve5mpoQtxSK2nfzrexjRPoutCIyaOnKZijkG4_QE,92
3
3
  yggdrasil/databricks/compute/__init__.py,sha256=TVDwPmW2SOmHmnhzZhsvrWbrxZ_lEcgqe3l9BeB-oxM,218
4
4
  yggdrasil/databricks/compute/cluster.py,sha256=xElDioObG6exkUS08K-Ccs_EFNbWD69Z15fjvnHwOx8,26958
5
- yggdrasil/databricks/compute/execution_context.py,sha256=pnzA_itZiYW4LpjgWnKZlovABEjHToDAp2ahVMnZmRQ,18625
5
+ yggdrasil/databricks/compute/execution_context.py,sha256=0W8GiOJH8iWsonXxKy7KLsyX7d8SMLvV5IfM5s8KTGs,18481
6
6
  yggdrasil/databricks/compute/remote.py,sha256=DzPVPk-4bt5bOL52Onur3xLNh3UzS2K70DU5HglhGSg,1216
7
7
  yggdrasil/databricks/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  yggdrasil/databricks/jobs/config.py,sha256=8Slfw4Wl7vu0kIlaUUqVqjjOgPwuULoo0rroENCbC20,11494
@@ -11,8 +11,8 @@ yggdrasil/databricks/sql/engine.py,sha256=WRDrY-FpXMegF3qNmMkCsfIitJB9rY_lW8Pnk7
11
11
  yggdrasil/databricks/sql/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  yggdrasil/databricks/sql/statement_result.py,sha256=L-hrK5MVnH3XG57BpGmaETtRzYjAtYGQthgAPGVj618,12610
13
13
  yggdrasil/databricks/sql/types.py,sha256=YgasSyq8sygk1h6ZOTcRwXAZWNKSuk-9g9VqlR8kJl4,5324
14
- yggdrasil/databricks/workspaces/__init__.py,sha256=ffKl4XrLPjBicXa7Ygda_VcbhSGw8IPJVm5NqrMzHks,26
15
- yggdrasil/databricks/workspaces/databricks_path.py,sha256=1jGLqAxSUvhBiSs78W2rS31ENEz4HcAQ0U_HqgqlJMM,29551
14
+ yggdrasil/databricks/workspaces/__init__.py,sha256=tNNS3A_Pl9FYkQ8nGERhr4VF-hwKrvh8k1W8vTaR0uo,58
15
+ yggdrasil/databricks/workspaces/databricks_path.py,sha256=ieaAEPo2lBmgqrdFwtXdy9uIDFyhFT8hjbO_KP5ayec,29547
16
16
  yggdrasil/databricks/workspaces/workspace.py,sha256=8T-d0DTq-s2zTkmMbaEeS_7AiZwcUDxorch9IbZACko,33333
17
17
  yggdrasil/dataclasses/__init__.py,sha256=QVAvZnNl7gFYTLOVTfMkdQZf6o_WL8_UuDV1uTZ7Aeg,67
18
18
  yggdrasil/dataclasses/dataclass.py,sha256=ln-D1-bbiCLBd2khRMGs4dFoxzJEEGzHTKekWCnF2uk,5436
@@ -33,7 +33,7 @@ yggdrasil/requests/__init__.py,sha256=THJz1IoZYQccwmXcQR3N8D-uWxCkfMtgeXDhONdERR
33
33
  yggdrasil/requests/msal.py,sha256=ucnN45iZZpbXkByw212PX4shH4g0EeyrW8JEmfimWtY,5861
34
34
  yggdrasil/requests/session.py,sha256=YomLcDf8O_mc8BUnf9fr5wrupDnxEzaGw-guhV91NsE,830
35
35
  yggdrasil/ser/__init__.py,sha256=sS66Bxu8aiLb-8N2aNayquamfi7FobEH51JyV5ULDFI,31
36
- yggdrasil/ser/callable_serde.py,sha256=1pDgrzAceoFQ7JS7qIeSxC4hCz2oAcQOkeWyYYeT7iY,21206
36
+ yggdrasil/ser/callable_serde.py,sha256=hgJilSWYcAivFCfwRJJ-eKzGBtDfUE1eNppX4ac7Sd0,21473
37
37
  yggdrasil/types/__init__.py,sha256=p0Qu_69RkePPgQGM9nSue_bcbEIAM2u9eo3zsEplHJ8,82
38
38
  yggdrasil/types/libs.py,sha256=7-p0M4C6TnEWpUGf2nY4XshhJxtXOE_-bsYmJWU6jtk,227
39
39
  yggdrasil/types/python_arrow.py,sha256=1Ac1ZnEgmH4nLNBfrfbM6E9EfANGWtdANqlwMENQkTw,21408
@@ -48,7 +48,7 @@ yggdrasil/types/cast/registry.py,sha256=-88mq-U1pDSGbEC9PRY0zJCzloyBodXgeSRBPb6h
48
48
  yggdrasil/types/cast/spark_cast.py,sha256=IHthM78dugabGXxNNW9sSHn-olDwzXcFdIFcPo9IiXU,23021
49
49
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=8PgJItF_XbyBcNuBnXkMQU3PBy3sAPEXZT9SXL2WbU4,4200
50
50
  yggdrasil/types/cast/spark_polars_cast.py,sha256=ba1UOvY1ouGCro1Np9slXmJ4TEyWnUtwVEAwxGvPLlk,8336
51
- ygg-0.1.18.dist-info/METADATA,sha256=d_adw4j5tP0rLrz-9sgI6fL5vWp5s6KgICs7lyLctgI,5981
52
- ygg-0.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
- ygg-0.1.18.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
54
- ygg-0.1.18.dist-info/RECORD,,
51
+ ygg-0.1.20.dist-info/METADATA,sha256=MlGCV96KVsKcbu92-pX4CKQjA9KlOE1lafq2KhG4DdQ,5981
52
+ ygg-0.1.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ ygg-0.1.20.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
54
+ ygg-0.1.20.dist-info/RECORD,,
@@ -294,8 +294,6 @@ print(json.dumps(meta))
294
294
  if use_dill is None:
295
295
  if current_version == self.cluster.python_version:
296
296
  use_dill = True
297
- elif current_version in ((3, 12), (3, 13)) and self.cluster.python_version in ((3, 12), (3, 13)):
298
- use_dill = True
299
297
  else:
300
298
  use_dill = False
301
299
 
@@ -1 +1,2 @@
1
1
  from .workspace import *
2
+ from .databricks_path import *
@@ -44,6 +44,8 @@ class DatabricksPathKind(str, Enum):
44
44
 
45
45
  @classmethod
46
46
  def parse(cls, path: str, workspace: Optional["Workspace"] = None) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
47
+ from .workspace import Workspace
48
+
47
49
  if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
48
50
  if path.startswith("/Users/me"):
49
51
  workspace = Workspace() if workspace is None else workspace
@@ -54,8 +56,6 @@ class DatabricksPathKind(str, Enum):
54
56
  return cls.VOLUME, workspace, path
55
57
 
56
58
  if path.startswith("dbfs://"):
57
- from .workspace import Workspace
58
-
59
59
  parsed = urlparse.urlparse(path)
60
60
  kind, _, inner_path = cls.parse(parsed.path)
61
61
  workspace = Workspace(host=parsed.hostname) if workspace is None else workspace
@@ -174,6 +174,17 @@ def _capture_module_imports(fn: Callable[..., Any]) -> str:
174
174
  return ""
175
175
 
176
176
 
177
+ def _pack_payload(payload: Dict[str, Any], *, level: int = 6) -> str:
178
+ lvl = int(level)
179
+ if lvl < 1:
180
+ lvl = 1
181
+ elif lvl > 9:
182
+ lvl = 9
183
+ raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
184
+ comp = zlib.compress(raw, level=lvl)
185
+ return _b64e(comp)
186
+
187
+
177
188
  def parse_tagged_result(stdout_text: str, result_tag: str) -> Dict[str, Any]:
178
189
  """
179
190
  Extract the last tagged JSON payload printed by a cluster command.
@@ -359,6 +370,8 @@ class CallableSerdeMixin:
359
370
  use_dill: bool,
360
371
  byte_limit: int = 0,
361
372
  result_tag: str = "<<<RESULT>>>",
373
+ compress_input_payload: bool = True, # NEW
374
+ payload_compression_level: int = 6, # NEW (1..9)
362
375
  ) -> str:
363
376
  if kwargs is None:
364
377
  kwargs = {}
@@ -390,7 +403,7 @@ class CallableSerdeMixin:
390
403
  "name": getattr(self.fn, "__name__", None),
391
404
  "qualname": getattr(self.fn, "__qualname__", None),
392
405
  "module": getattr(self.fn, "__module__", None),
393
- "imports": imports, # NEW
406
+ "imports": imports,
394
407
  "source": src,
395
408
  "dill_b64": None,
396
409
  "env": _capture_exec_env(self.fn),
@@ -400,22 +413,13 @@ class CallableSerdeMixin:
400
413
  callable_payload["dill_b64"] = _b64e(dumped_fn)
401
414
 
402
415
  # args/kwargs transport
403
- if use_dill:
404
- dumped_args = _safe_dill_dumps(args)
405
- dumped_kwargs = _safe_dill_dumps(kwargs)
406
- if dumped_args is None or dumped_kwargs is None:
407
- raise ValueError("Failed to dill-serialize args/kwargs")
408
- args_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_args)}
409
- kwargs_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_kwargs)}
410
- else:
411
- try:
412
- args_pack = {"kind": "json", "text": json.dumps(args)}
413
- kwargs_pack = {"kind": "json", "text": json.dumps(kwargs)}
414
- except TypeError as e:
415
- raise ValueError(
416
- "use_dill=False requires JSON-serializable args/kwargs. "
417
- "If you need complex args, run same Python minor and use_dill=True."
418
- ) from e
416
+ dumped_args = _safe_dill_dumps(args)
417
+ dumped_kwargs = _safe_dill_dumps(kwargs)
418
+ if dumped_args is None or dumped_kwargs is None:
419
+ raise ValueError("Failed to dill-serialize args/kwargs")
420
+
421
+ args_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_args)}
422
+ kwargs_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_kwargs)}
419
423
 
420
424
  if not byte_limit:
421
425
  byte_limit = 512 * 1024
@@ -427,15 +431,32 @@ class CallableSerdeMixin:
427
431
  "kwargs": kwargs_pack,
428
432
  "env": client_env,
429
433
  "result_tag": result_tag,
430
- "byte_limit": byte_limit
434
+ "byte_limit": byte_limit,
431
435
  }
432
436
 
437
+ # NEW: compress the embedded input payload to keep command size small
438
+ if compress_input_payload:
439
+ payload_b64 = _pack_payload(payload, level=payload_compression_level)
440
+ payload_bootstrap = f"""
441
+ _payload_b64 = {payload_b64!r}
442
+
443
+ def _load_payload(b64: str) -> dict:
444
+ raw = base64.b64decode(b64.encode("ascii"))
445
+ raw = zlib.decompress(raw)
446
+ return json.loads(raw.decode("utf-8"))
447
+
448
+ _payload = _load_payload(_payload_b64)
449
+ """.rstrip()
450
+ else:
451
+ payload_bootstrap = f"_payload = {payload!r}"
452
+
433
453
  return f"""
434
454
  # --- generated by yggdrasil.ser.CallableSerdeMixin.to_command ---
435
455
  import base64, json, os, traceback, zlib
436
456
  import dill
457
+ from yggdrasil.databricks import *
437
458
 
438
- _payload = {payload!r}
459
+ {payload_bootstrap}
439
460
 
440
461
  def _b64d(s: str) -> bytes:
441
462
  return base64.b64decode(s.encode("ascii"))
@@ -480,7 +501,6 @@ def _load_callable(cpack, use_dill: bool):
480
501
  ns = {{}}
481
502
  ns.update(_hydrate_env(cpack.get("env") or {{}}))
482
503
 
483
- # NEW: exec module imports first
484
504
  if imports.strip():
485
505
  exec(imports, ns, ns)
486
506
 
@@ -498,34 +518,22 @@ def _load_callable(cpack, use_dill: bool):
498
518
  def _emit(tag: str, obj: dict):
499
519
  print(tag + json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
500
520
 
501
-
502
521
  def _zlib_level(n: int, limit: int) -> int:
503
- # ratio of size to limit
504
522
  r = n / max(1, limit)
505
-
506
- # fast for huge payloads, stronger only when slightly over
507
- if r >= 32:
508
- return 1
509
- if r >= 16:
510
- return 2
511
- if r >= 8:
512
- return 3
513
- if r >= 4:
514
- return 4
515
- if r >= 2:
516
- return 5
517
- if r >= 1.25:
518
- return 6
519
- # barely over: squeeze a bit more
523
+ if r >= 32: return 1
524
+ if r >= 16: return 2
525
+ if r >= 8: return 3
526
+ if r >= 4: return 4
527
+ if r >= 2: return 5
528
+ if r >= 1.25: return 6
520
529
  return 7
521
530
 
522
- # apply env vars
523
531
  for k, v in (_payload.get("env") or {{}}).items():
524
532
  if v is not None:
525
533
  os.environ[str(k)] = str(v)
526
534
 
527
535
  tag = _payload.get("result_tag", "<<<RESULT>>>")
528
- byte_limit = int((_payload.get("byte_limit") or 2_000_000)) # ~2MB serialized bytes
536
+ byte_limit = int((_payload.get("byte_limit") or 2_000_000))
529
537
 
530
538
  try:
531
539
  use_dill = bool(_payload.get("use_dill", False))
File without changes