ygg 0.1.18__tar.gz → 0.1.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.18 → ygg-0.1.20}/PKG-INFO +1 -1
- {ygg-0.1.18 → ygg-0.1.20}/pyproject.toml +1 -1
- {ygg-0.1.18 → ygg-0.1.20}/src/ygg.egg-info/PKG-INFO +1 -1
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/compute/execution_context.py +0 -2
- ygg-0.1.20/src/yggdrasil/databricks/workspaces/__init__.py +2 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/workspaces/databricks_path.py +2 -2
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/ser/callable_serde.py +47 -39
- ygg-0.1.18/src/yggdrasil/databricks/workspaces/__init__.py +0 -1
- {ygg-0.1.18 → ygg-0.1.20}/README.md +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/setup.cfg +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/ygg.egg-info/SOURCES.txt +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/ygg.egg-info/dependency_links.txt +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/ygg.egg-info/requires.txt +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/ygg.egg-info/top_level.txt +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/compute/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/compute/cluster.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/compute/remote.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/jobs/config.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/sql/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/sql/engine.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/sql/statement_result.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/sql/types.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/databricks/workspaces/workspace.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/dataclasses/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/dataclasses/dataclass.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/databrickslib.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/extensions/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/pandaslib.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/polarslib.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/libs/sparklib.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/pyutils/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/pyutils/exceptions.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/pyutils/modules.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/pyutils/parallel.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/pyutils/retry.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/requests/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/requests/msal.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/requests/session.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/ser/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/__init__.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/cast_options.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/polars_cast.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/registry.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/spark_cast.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/libs.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/python_arrow.py +0 -0
- {ygg-0.1.18 → ygg-0.1.20}/src/yggdrasil/types/python_defaults.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.20
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
Project-URL: Homepage, https://github.com/Platob/Yggdrasil
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ygg"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.20"
|
|
8
8
|
description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.20
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
Project-URL: Homepage, https://github.com/Platob/Yggdrasil
|
|
@@ -294,8 +294,6 @@ print(json.dumps(meta))
|
|
|
294
294
|
if use_dill is None:
|
|
295
295
|
if current_version == self.cluster.python_version:
|
|
296
296
|
use_dill = True
|
|
297
|
-
elif current_version in ((3, 12), (3, 13)) and self.cluster.python_version in ((3, 12), (3, 13)):
|
|
298
|
-
use_dill = True
|
|
299
297
|
else:
|
|
300
298
|
use_dill = False
|
|
301
299
|
|
|
@@ -44,6 +44,8 @@ class DatabricksPathKind(str, Enum):
|
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
46
|
def parse(cls, path: str, workspace: Optional["Workspace"] = None) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
|
|
47
|
+
from .workspace import Workspace
|
|
48
|
+
|
|
47
49
|
if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
|
|
48
50
|
if path.startswith("/Users/me"):
|
|
49
51
|
workspace = Workspace() if workspace is None else workspace
|
|
@@ -54,8 +56,6 @@ class DatabricksPathKind(str, Enum):
|
|
|
54
56
|
return cls.VOLUME, workspace, path
|
|
55
57
|
|
|
56
58
|
if path.startswith("dbfs://"):
|
|
57
|
-
from .workspace import Workspace
|
|
58
|
-
|
|
59
59
|
parsed = urlparse.urlparse(path)
|
|
60
60
|
kind, _, inner_path = cls.parse(parsed.path)
|
|
61
61
|
workspace = Workspace(host=parsed.hostname) if workspace is None else workspace
|
|
@@ -174,6 +174,17 @@ def _capture_module_imports(fn: Callable[..., Any]) -> str:
|
|
|
174
174
|
return ""
|
|
175
175
|
|
|
176
176
|
|
|
177
|
+
def _pack_payload(payload: Dict[str, Any], *, level: int = 6) -> str:
|
|
178
|
+
lvl = int(level)
|
|
179
|
+
if lvl < 1:
|
|
180
|
+
lvl = 1
|
|
181
|
+
elif lvl > 9:
|
|
182
|
+
lvl = 9
|
|
183
|
+
raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
|
|
184
|
+
comp = zlib.compress(raw, level=lvl)
|
|
185
|
+
return _b64e(comp)
|
|
186
|
+
|
|
187
|
+
|
|
177
188
|
def parse_tagged_result(stdout_text: str, result_tag: str) -> Dict[str, Any]:
|
|
178
189
|
"""
|
|
179
190
|
Extract the last tagged JSON payload printed by a cluster command.
|
|
@@ -359,6 +370,8 @@ class CallableSerdeMixin:
|
|
|
359
370
|
use_dill: bool,
|
|
360
371
|
byte_limit: int = 0,
|
|
361
372
|
result_tag: str = "<<<RESULT>>>",
|
|
373
|
+
compress_input_payload: bool = True, # NEW
|
|
374
|
+
payload_compression_level: int = 6, # NEW (1..9)
|
|
362
375
|
) -> str:
|
|
363
376
|
if kwargs is None:
|
|
364
377
|
kwargs = {}
|
|
@@ -390,7 +403,7 @@ class CallableSerdeMixin:
|
|
|
390
403
|
"name": getattr(self.fn, "__name__", None),
|
|
391
404
|
"qualname": getattr(self.fn, "__qualname__", None),
|
|
392
405
|
"module": getattr(self.fn, "__module__", None),
|
|
393
|
-
"imports": imports,
|
|
406
|
+
"imports": imports,
|
|
394
407
|
"source": src,
|
|
395
408
|
"dill_b64": None,
|
|
396
409
|
"env": _capture_exec_env(self.fn),
|
|
@@ -400,22 +413,13 @@ class CallableSerdeMixin:
|
|
|
400
413
|
callable_payload["dill_b64"] = _b64e(dumped_fn)
|
|
401
414
|
|
|
402
415
|
# args/kwargs transport
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
else:
|
|
411
|
-
try:
|
|
412
|
-
args_pack = {"kind": "json", "text": json.dumps(args)}
|
|
413
|
-
kwargs_pack = {"kind": "json", "text": json.dumps(kwargs)}
|
|
414
|
-
except TypeError as e:
|
|
415
|
-
raise ValueError(
|
|
416
|
-
"use_dill=False requires JSON-serializable args/kwargs. "
|
|
417
|
-
"If you need complex args, run same Python minor and use_dill=True."
|
|
418
|
-
) from e
|
|
416
|
+
dumped_args = _safe_dill_dumps(args)
|
|
417
|
+
dumped_kwargs = _safe_dill_dumps(kwargs)
|
|
418
|
+
if dumped_args is None or dumped_kwargs is None:
|
|
419
|
+
raise ValueError("Failed to dill-serialize args/kwargs")
|
|
420
|
+
|
|
421
|
+
args_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_args)}
|
|
422
|
+
kwargs_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_kwargs)}
|
|
419
423
|
|
|
420
424
|
if not byte_limit:
|
|
421
425
|
byte_limit = 512 * 1024
|
|
@@ -427,15 +431,32 @@ class CallableSerdeMixin:
|
|
|
427
431
|
"kwargs": kwargs_pack,
|
|
428
432
|
"env": client_env,
|
|
429
433
|
"result_tag": result_tag,
|
|
430
|
-
"byte_limit": byte_limit
|
|
434
|
+
"byte_limit": byte_limit,
|
|
431
435
|
}
|
|
432
436
|
|
|
437
|
+
# NEW: compress the embedded input payload to keep command size small
|
|
438
|
+
if compress_input_payload:
|
|
439
|
+
payload_b64 = _pack_payload(payload, level=payload_compression_level)
|
|
440
|
+
payload_bootstrap = f"""
|
|
441
|
+
_payload_b64 = {payload_b64!r}
|
|
442
|
+
|
|
443
|
+
def _load_payload(b64: str) -> dict:
|
|
444
|
+
raw = base64.b64decode(b64.encode("ascii"))
|
|
445
|
+
raw = zlib.decompress(raw)
|
|
446
|
+
return json.loads(raw.decode("utf-8"))
|
|
447
|
+
|
|
448
|
+
_payload = _load_payload(_payload_b64)
|
|
449
|
+
""".rstrip()
|
|
450
|
+
else:
|
|
451
|
+
payload_bootstrap = f"_payload = {payload!r}"
|
|
452
|
+
|
|
433
453
|
return f"""
|
|
434
454
|
# --- generated by yggdrasil.ser.CallableSerdeMixin.to_command ---
|
|
435
455
|
import base64, json, os, traceback, zlib
|
|
436
456
|
import dill
|
|
457
|
+
from yggdrasil.databricks import *
|
|
437
458
|
|
|
438
|
-
|
|
459
|
+
{payload_bootstrap}
|
|
439
460
|
|
|
440
461
|
def _b64d(s: str) -> bytes:
|
|
441
462
|
return base64.b64decode(s.encode("ascii"))
|
|
@@ -480,7 +501,6 @@ def _load_callable(cpack, use_dill: bool):
|
|
|
480
501
|
ns = {{}}
|
|
481
502
|
ns.update(_hydrate_env(cpack.get("env") or {{}}))
|
|
482
503
|
|
|
483
|
-
# NEW: exec module imports first
|
|
484
504
|
if imports.strip():
|
|
485
505
|
exec(imports, ns, ns)
|
|
486
506
|
|
|
@@ -498,34 +518,22 @@ def _load_callable(cpack, use_dill: bool):
|
|
|
498
518
|
def _emit(tag: str, obj: dict):
|
|
499
519
|
print(tag + json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
|
|
500
520
|
|
|
501
|
-
|
|
502
521
|
def _zlib_level(n: int, limit: int) -> int:
|
|
503
|
-
# ratio of size to limit
|
|
504
522
|
r = n / max(1, limit)
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
if r >=
|
|
508
|
-
|
|
509
|
-
if r >=
|
|
510
|
-
|
|
511
|
-
if r >= 8:
|
|
512
|
-
return 3
|
|
513
|
-
if r >= 4:
|
|
514
|
-
return 4
|
|
515
|
-
if r >= 2:
|
|
516
|
-
return 5
|
|
517
|
-
if r >= 1.25:
|
|
518
|
-
return 6
|
|
519
|
-
# barely over: squeeze a bit more
|
|
523
|
+
if r >= 32: return 1
|
|
524
|
+
if r >= 16: return 2
|
|
525
|
+
if r >= 8: return 3
|
|
526
|
+
if r >= 4: return 4
|
|
527
|
+
if r >= 2: return 5
|
|
528
|
+
if r >= 1.25: return 6
|
|
520
529
|
return 7
|
|
521
530
|
|
|
522
|
-
# apply env vars
|
|
523
531
|
for k, v in (_payload.get("env") or {{}}).items():
|
|
524
532
|
if v is not None:
|
|
525
533
|
os.environ[str(k)] = str(v)
|
|
526
534
|
|
|
527
535
|
tag = _payload.get("result_tag", "<<<RESULT>>>")
|
|
528
|
-
byte_limit = int((_payload.get("byte_limit") or 2_000_000))
|
|
536
|
+
byte_limit = int((_payload.get("byte_limit") or 2_000_000))
|
|
529
537
|
|
|
530
538
|
try:
|
|
531
539
|
use_dill = bool(_payload.get("use_dill", False))
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .workspace import *
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|