PyPI - ygg - Versions diffs - 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl - Mend

ygg 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

ygg-0.1.21.dist-info/METADATA +367 -0
{ygg-0.1.19.dist-info → ygg-0.1.21.dist-info}/RECORD +13 -11
ygg-0.1.21.dist-info/entry_points.txt +2 -0
ygg-0.1.21.dist-info/licenses/LICENSE +201 -0
yggdrasil/databricks/compute/cluster.py +52 -14
yggdrasil/databricks/compute/execution_context.py +22 -20
yggdrasil/databricks/compute/remote.py +0 -2
yggdrasil/databricks/workspaces/databricks_path.py +2 -2
yggdrasil/pyutils/__init__.py +2 -0
yggdrasil/pyutils/callable_serde.py +563 -0
yggdrasil/pyutils/python_env.py +1342 -0
ygg-0.1.19.dist-info/METADATA +0 -163
yggdrasil/ser/__init__.py +0 -1
yggdrasil/ser/callable_serde.py +0 -645
{ygg-0.1.19.dist-info → ygg-0.1.21.dist-info}/WHEEL +0 -0
{ygg-0.1.19.dist-info → ygg-0.1.21.dist-info}/top_level.txt +0 -0

yggdrasil/ser/callable_serde.py DELETED Viewed

@@ -1,645 +0,0 @@
-# yggdrasil/ser.py
-from __future__ import annotations
-import ast
-import base64
-import builtins
-import inspect
-import json
-import os
-import sys
-import textwrap
-import zlib
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Callable, Dict, Iterable, Optional, Tuple
-import dill
-PyVer = Tuple[int, int]
-def _pyver() -> PyVer:
-    v = sys.version_info
-    return v.major, v.minor
-def _b64e(b: bytes) -> str:
-    return base64.b64encode(b).decode("ascii")
-def _b64d(s: str) -> bytes:
-    return base64.b64decode(s.encode("ascii"))
-def _is_python_function(obj: Any) -> bool:
-    return inspect.isfunction(obj) or inspect.ismethod(obj) or isinstance(obj, type(lambda: 0))
-def _safe_dill_dumps(obj: Any) -> Optional[bytes]:
-    try:
-        return dill.dumps(obj, recurse=True)
-    except Exception:
-        return None
-def _safe_dill_loads(b: Optional[bytes]) -> Any:
-    if b is None:
-        raise ValueError("No dill bytes to load")
-    return dill.loads(b)
-def _infer_package_root(fn: Callable[..., Any]) -> Tuple[str, ...]:
-    """
-    Return the *topmost* package directory for the module defining `fn`.
-    Walk upwards from the module file directory while an __init__.py exists.
-    Example:
-      /repo/my_pkg/sub/mod.py  -> returns /repo/my_pkg
-    If no __init__.py is found, falls back to the module's parent directory.
-    """
-    try:
-        mod = inspect.getmodule(fn)
-        file = getattr(mod, "__file__", None)
-        if not file:
-            return tuple()
-        p = Path(file).resolve()
-        if p.suffix.lower() != ".py":
-            return tuple()
-        cur = p.parent  # start at directory containing the module file
-        # If this directory isn't a package at all, just return it
-        if not (cur / "__init__.py").exists():
-            return (str(cur),)
-        # Climb while parent is still a package (has __init__.py)
-        # Stop at the highest directory that is still a package.
-        top = cur
-        while True:
-            parent = top.parent
-            if parent == top:
-                break
-            if (parent / "__init__.py").exists():
-                top = parent
-                continue
-            break
-        return (str(top),)
-    except Exception:
-        return tuple()
-def _capture_exec_env(fn: Callable[..., Any]) -> Dict[str, Dict[str, str]]:
-    """
-    Capture a minimal environment for exec(source) reconstruction.
-    - globals: names referenced by fn.__code__.co_names that exist in fn.__globals__ (excluding builtins)
-    - freevars: closure cell contents for fn.__code__.co_freevars
-    Values are stored as base64(dill.dumps(value)) (best-effort).
-    """
-    code = fn.__code__
-    fn_globals = getattr(fn, "__globals__", {}) or {}
-    referenced = set(code.co_names or ())
-    builtins_set = set(dir(builtins))
-    g_payload: Dict[str, str] = {}
-    for name in referenced:
-        if name in builtins_set:
-            continue
-        if name in fn_globals:
-            dumped = _safe_dill_dumps(fn_globals[name])
-            if dumped is not None:
-                g_payload[name] = _b64e(dumped)
-    fv_payload: Dict[str, str] = {}
-    freevars = code.co_freevars or ()
-    closure = fn.__closure__ or ()
-    if freevars and closure and len(freevars) == len(closure):
-        for name, cell in zip(freevars, closure):
-            try:
-                val = cell.cell_contents
-            except ValueError:
-                continue
-            dumped = _safe_dill_dumps(val)
-            if dumped is not None:
-                fv_payload[name] = _b64e(dumped)
-    return {"globals": g_payload, "freevars": fv_payload}
-def _capture_module_imports(fn: Callable[..., Any]) -> str:
-    """
-    Capture top-level imports from the module file where `fn` is defined.
-    We extract only:
-      - `import x`
-      - `import x as y`
-      - `from a.b import c`
-      - `from a.b import c as d`
-    Returned as a single string block (may be empty).
-    """
-    try:
-        src_file = inspect.getsourcefile(fn) or inspect.getfile(fn)
-        if not src_file:
-            return ""
-        p = Path(src_file)
-        if not p.exists() or p.suffix.lower() != ".py":
-            return ""
-        text = p.read_text(encoding="utf-8")
-        tree = ast.parse(text)
-        imports: list[str] = []
-        for node in tree.body:
-            if isinstance(node, (ast.Import, ast.ImportFrom)):
-                seg = ast.get_source_segment(text, node)
-                if seg:
-                    imports.append(seg.strip())
-        # de-dupe while preserving order
-        seen = set()
-        uniq: list[str] = []
-        for line in imports:
-            if line not in seen:
-                seen.add(line)
-                uniq.append(line)
-        return "\n".join(uniq).strip() + ("\n" if uniq else "")
-    except Exception:
-        return ""
-def parse_tagged_result(stdout_text: str, result_tag: str) -> Dict[str, Any]:
-    """
-    Extract the last tagged JSON payload printed by a cluster command.
-    Expects lines like:  <<<RESULT>>>{...json...}
-    """
-    last = None
-    for line in stdout_text.splitlines():
-        if line.startswith(result_tag):
-            last = line[len(result_tag) :]
-    if last is None:
-        raise ValueError(f"Result tag {result_tag!r} not found in output")
-    try:
-        return json.loads(last)
-    except Exception as e:
-        raise ValueError("Tagged result was not valid JSON") from e
-class CommandError(RuntimeError):
-    """
-    Raised when the remote Databricks command returns ok=false.
-    Attributes
-    ----------
-    error: str
-        Short error message from remote.
-    traceback: str
-        Remote traceback (string) if provided.
-    raw: dict
-        Full parsed JSON payload from the tagged output line.
-    """
-    def __init__(self, error: str, traceback: str = "", raw: Optional[Dict[str, Any]] = None):
-        self.error = error
-        self.traceback = traceback
-        self.raw = raw or {}
-        msg = error if not traceback else f"{error}\n{traceback}"
-        super().__init__(msg.rstrip())
-class CommandResultParseError(ValueError):
-    """
-    Raised when the command output cannot be parsed or decoded.
-    """
-    def __init__(self, message: str, stdout_text: Optional[str] = None):
-        self.stdout_text = stdout_text
-        super().__init__(message)
-@dataclass
-class CallableSerdeMixin:
-    """
-    Encapsulates a callable + serialization strategy + Databricks command generation.
-    - Same Python (major, minor): prefer dill
-    - Different Python (major, minor): exec(imports + source) with captured globals/freevars
-    """
-    fn: Callable[..., Any]
-    package_root: Tuple[str, ...] = field(default_factory=tuple)
-    ALLOW_EXEC_SOURCE: bool = True
-    @classmethod
-    def from_callable(cls, fn: Callable[..., Any]) -> "CallableSerdeMixin":
-        if isinstance(fn, CallableSerdeMixin):
-            return fn
-        return cls(fn=fn, package_root=_infer_package_root(fn))
-    def __call__(self, *args: Any, **kwargs: Any) -> Any:
-        return self.fn(*args, **kwargs)
-    # ---------- pickle protocol ----------
-    def __getstate__(self) -> Dict[str, Any]:
-        if _is_python_function(self.fn):
-            src = None
-            try:
-                src = textwrap.dedent(inspect.getsource(self.fn))
-            except Exception:
-                src = None
-            payload: Dict[str, Any] = {
-                "__callable__": True,
-                "pyver": list(_pyver()),
-                "name": getattr(self.fn, "__name__", None),
-                "qualname": getattr(self.fn, "__qualname__", None),
-                "module": getattr(self.fn, "__module__", None),
-                "imports": _capture_module_imports(self.fn),  # NEW
-                "source": src,
-                "dill_b64": None,
-                "env": _capture_exec_env(self.fn),
-            }
-            dumped = _safe_dill_dumps(self.fn)
-            if dumped is not None:
-                payload["dill_b64"] = _b64e(dumped)
-            return {
-                "fn": payload,
-                "package_root": tuple(self.package_root),
-                "ALLOW_EXEC_SOURCE": bool(self.ALLOW_EXEC_SOURCE),
-            }
-        dumped = _safe_dill_dumps(self.fn)
-        if dumped is None:
-            raise ValueError("Callable object could not be dill-serialized")
-        return {
-            "fn": {"__callable__": False, "dill_b64": _b64e(dumped)},
-            "package_root": tuple(self.package_root),
-            "ALLOW_EXEC_SOURCE": bool(self.ALLOW_EXEC_SOURCE),
-        }
-    def __setstate__(self, state: Dict[str, Any]) -> None:
-        self.package_root = tuple(state.get("package_root") or ())
-        self.ALLOW_EXEC_SOURCE = bool(state.get("ALLOW_EXEC_SOURCE", True))
-        fn_payload = state["fn"]
-        if isinstance(fn_payload, dict) and fn_payload.get("__callable__") is False:
-            self.fn = _safe_dill_loads(_b64d(fn_payload["dill_b64"]))
-            return
-        if not isinstance(fn_payload, dict) or fn_payload.get("__callable__") is not True:
-            raise ValueError("Invalid callable payload")
-        src_pyver = tuple(fn_payload.get("pyver") or ())
-        cur_pyver = _pyver()
-        if src_pyver == cur_pyver and fn_payload.get("dill_b64"):
-            try:
-                self.fn = _safe_dill_loads(_b64d(fn_payload["dill_b64"]))
-                if callable(self.fn):
-                    return
-            except Exception:
-                pass
-        if not self.ALLOW_EXEC_SOURCE:
-            raise ValueError("Exec-based restore disabled and dill path unavailable/failed")
-        imports = fn_payload.get("imports") or ""
-        source = fn_payload.get("source")
-        name = fn_payload.get("name")
-        env = fn_payload.get("env") or {}
-        if not source:
-            raise ValueError("No source available for exec-based restore")
-        ns: Dict[str, Any] = {}
-        # preload captured names
-        for bucket in ("globals", "freevars"):
-            items = (env.get(bucket) or {})
-            for k, b64 in items.items():
-                try:
-                    ns[k] = _safe_dill_loads(_b64d(b64))
-                except Exception:
-                    pass
-        # NEW: run module imports first
-        if imports.strip():
-            exec(imports, ns, ns)
-        exec(source, ns, ns)
-        if name and name in ns and callable(ns[name]):
-            self.fn = ns[name]
-            return
-        cands = [v for v in ns.values() if callable(v)]
-        if cands:
-            self.fn = cands[-1]
-            return
-        raise ValueError("exec(source) succeeded but no callable could be recovered")
-    # ---------- Databricks command generation ----------
-    def to_command(
-        self,
-        *,
-        args: Tuple[Any, ...] = (),
-        kwargs: Optional[Dict[str, Any]] = None,
-        env_keys: Iterable[str] = (),
-        env_variables: Optional[Dict[str, str]] = None,
-        use_dill: bool,
-        byte_limit: int = 0,
-        result_tag: str = "<<<RESULT>>>",
-    ) -> str:
-        if kwargs is None:
-            kwargs = {}
-        if env_variables is None:
-            env_variables = {}
-        # capture env vars from client process
-        client_env: Dict[str, str] = {}
-        for k in env_keys:
-            v = os.environ.get(k)
-            if v is not None:
-                client_env[k] = v
-        client_env.update(env_variables)
-        if not _is_python_function(self.fn):
-            raise ValueError("to_command supports Python functions/methods/lambdas only")
-        src = None
-        try:
-            src = textwrap.dedent(inspect.getsource(self.fn))
-        except Exception:
-            src = None
-        imports = _capture_module_imports(self.fn)
-        callable_payload: Dict[str, Any] = {
-            "__callable__": True,
-            "pyver": list(_pyver()),
-            "name": getattr(self.fn, "__name__", None),
-            "qualname": getattr(self.fn, "__qualname__", None),
-            "module": getattr(self.fn, "__module__", None),
-            "imports": imports,  # NEW
-            "source": src,
-            "dill_b64": None,
-            "env": _capture_exec_env(self.fn),
-        }
-        dumped_fn = _safe_dill_dumps(self.fn)
-        if dumped_fn is not None:
-            callable_payload["dill_b64"] = _b64e(dumped_fn)
-        # args/kwargs transport
-        dumped_args = _safe_dill_dumps(args)
-        dumped_kwargs = _safe_dill_dumps(kwargs)
-        if dumped_args is None or dumped_kwargs is None:
-            raise ValueError("Failed to dill-serialize args/kwargs")
-        args_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_args)}
-        kwargs_pack: Dict[str, Any] = {"kind": "dill", "b64": _b64e(dumped_kwargs)}
-        if not byte_limit:
-            byte_limit = 512 * 1024
-        payload = {
-            "callable": callable_payload,
-            "use_dill": bool(use_dill),
-            "args": args_pack,
-            "kwargs": kwargs_pack,
-            "env": client_env,
-            "result_tag": result_tag,
-            "byte_limit": byte_limit
-        }
-        return f"""
-# --- generated by yggdrasil.ser.CallableSerdeMixin.to_command ---
-import base64, json, os, traceback, zlib
-import dill
-from yggdrasil.databricks import *
-_payload = {payload!r}
-def _b64d(s: str) -> bytes:
-    return base64.b64decode(s.encode("ascii"))
-def _try_dill_load(b64: str):
-    return dill.loads(_b64d(b64))
-def _hydrate_env(env_dict):
-    ns = {{}}
-    for bucket in ("globals", "freevars"):
-        items = (env_dict or {{}}).get(bucket, {{}}) or {{}}
-        for name, b64 in items.items():
-            try:
-                ns[name] = _try_dill_load(b64)
-            except Exception:
-                pass
-    return ns
-def _load_args(pack):
-    if pack["kind"] == "dill":
-        return dill.loads(_b64d(pack["b64"]))
-    return tuple(json.loads(pack["text"]))
-def _load_kwargs(pack):
-    if pack["kind"] == "dill":
-        return dill.loads(_b64d(pack["b64"]))
-    return dict(json.loads(pack["text"]))
-def _load_callable(cpack, use_dill: bool):
-    # Prefer dill when requested/available
-    if use_dill and cpack.get("dill_b64"):
-        fn = _try_dill_load(cpack["dill_b64"])
-        if callable(fn):
-            return fn
-    src = cpack.get("source")
-    name = cpack.get("name")
-    imports = cpack.get("imports") or ""
-    if not src:
-        raise ValueError("No source available for exec-based restore")
-    ns = {{}}
-    ns.update(_hydrate_env(cpack.get("env") or {{}}))
-    # NEW: exec module imports first
-    if imports.strip():
-        exec(imports, ns, ns)
-    exec(src, ns, ns)
-    if name and name in ns and callable(ns[name]):
-        return ns[name]
-    cands = [v for v in ns.values() if callable(v)]
-    if cands:
-        return cands[-1]
-    raise ValueError("exec(source) ran but no callable was recovered")
-def _emit(tag: str, obj: dict):
-    print(tag + json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
-def _zlib_level(n: int, limit: int) -> int:
-    # ratio of size to limit
-    r = n / max(1, limit)
-    # fast for huge payloads, stronger only when slightly over
-    if r >= 32:
-        return 1
-    if r >= 16:
-        return 2
-    if r >= 8:
-        return 3
-    if r >= 4:
-        return 4
-    if r >= 2:
-        return 5
-    if r >= 1.25:
-        return 6
-    # barely over: squeeze a bit more
-    return 7
-# apply env vars
-for k, v in (_payload.get("env") or {{}}).items():
-    if v is not None:
-        os.environ[str(k)] = str(v)
-tag = _payload.get("result_tag", "<<<RESULT>>>")
-byte_limit = int((_payload.get("byte_limit") or 2_000_000))  # ~2MB serialized bytes
-try:
-    use_dill = bool(_payload.get("use_dill", False))
-    fn = _load_callable(_payload["callable"], use_dill=use_dill)
-    args = _load_args(_payload["args"])
-    kwargs = _load_kwargs(_payload["kwargs"])
-    out = fn(*args, **kwargs)
-    out_raw = dill.dumps(out, recurse=True)
-    if len(out_raw) > byte_limit:
-        lvl = _zlib_level(len(out_raw), byte_limit)
-        out_comp = zlib.compress(out_raw, level=lvl)
-        out_b64 = base64.b64encode(out_comp).decode("ascii")
-        _emit(tag, {{"ok": True, "encoding": f"dill+zlib{{lvl}}+b64", "payload": out_b64}})
-    else:
-        out_b64 = base64.b64encode(out_raw).decode("ascii")
-        _emit(tag, {{"ok": True, "encoding": "dill+b64", "payload": out_b64}})
-except Exception as e:
-    tb = traceback.format_exc()
-    err = {{"cls": e.__class__.__name__, "msg": str(e), "tb": tb}}
-    _emit(tag, {{"ok": False, "err": err}})
-# --- end generated command ---
-""".lstrip()
-    @staticmethod
-    def parse_command_result(
-        stdout_text: str,
-        *,
-        result_tag: str = "<<<RESULT>>>",
-        decode: bool = True,
-    ) -> Any:
-        """
-        Parse Databricks command stdout and return decoded result or raise.
-        Protocol:
-          prints lines like:  <<<RESULT>>>{...json...}
-          last tagged line wins
-        If ok=false -> raise DatabricksCommandError
-        If ok=true and decode=True:
-          - encoding == "dill+b64": dill.loads(base64(payload))
-          - encoding == "repr": returns payload as-is
-        If decode=False: returns the raw parsed dict.
-        """
-        last: Optional[str] = None
-        for line in stdout_text.splitlines():
-            if line.startswith(result_tag):
-                last = line[len(result_tag):]
-            else:
-                print(line)
-        if last is None:
-            raise CommandResultParseError(
-                f"Result tag {result_tag!r} not found in command output",
-                stdout_text=stdout_text,
-            )
-        try:
-            msg: Dict[str, Any] = json.loads(last)
-        except Exception as e:
-            raise CommandResultParseError(
-                "Tagged result is not valid JSON",
-                stdout_text=stdout_text,
-            ) from e
-        if not decode:
-            return msg
-        ok = bool(msg.get("ok", False))
-        if not ok:
-            error = msg.get("err", {})
-            error_class = str(error.get("cls", "RuntimeError"))
-            error_message = str(error.get("msg", "Remote execution failed"))
-            error_traceback = str(error.get("tb", ""))
-            base = CommandError(
-                error=error_message,
-                traceback=error_traceback,
-                raw=msg,
-            )
-            if error_class == "ModuleNotFound":
-                raise ModuleNotFoundError(error_message) from base
-            raise base
-        encoding = msg.get("encoding")
-        payload = msg.get("payload")
-        if encoding == "dill+b64":
-            if not isinstance(payload, str):
-                raise CommandResultParseError(
-                    "Expected base64 string payload for dill+b64 encoding",
-                    stdout_text=stdout_text,
-                )
-            try:
-                data = base64.b64decode(payload.encode("ascii"))
-                return dill.loads(data)
-            except Exception as e:
-                raise CommandResultParseError(
-                    "Failed to decode dill+b64 payload",
-                    stdout_text=stdout_text,
-                ) from e
-        if encoding == "repr":
-            return payload
-        if encoding.startswith("dill+zlib") and encoding.endswith("+b64"):
-            if not isinstance(payload, str):
-                raise CommandResultParseError("Expected base64 string payload for zlib encoding",
-                                              stdout_text=stdout_text)
-            try:
-                data = base64.b64decode(payload.encode("ascii"))
-                raw = zlib.decompress(data)
-                return dill.loads(raw)
-            except Exception as e:
-                raise CommandResultParseError("Failed to decode dill+zlib+b64 payload", stdout_text=stdout_text) from e
-        raise CommandResultParseError(
-            f"Unknown result encoding: {encoding!r}",
-            stdout_text=stdout_text,
-        )

{ygg-0.1.19.dist-info → ygg-0.1.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{ygg-0.1.19.dist-info → ygg-0.1.21.dist-info}/top_level.txt RENAMED Viewed

File without changes

ygg 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl

ygg 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl