PyPI - ygg - Versions diffs - 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl - Mend

ygg 0.1.31py3-none-any.whl → 0.1.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
ygg-0.1.32.dist-info/RECORD +60 -0
yggdrasil/__init__.py +2 -0
yggdrasil/databricks/__init__.py +2 -0
yggdrasil/databricks/compute/__init__.py +2 -0
yggdrasil/databricks/compute/cluster.py +241 -2
yggdrasil/databricks/compute/execution_context.py +100 -11
yggdrasil/databricks/compute/remote.py +16 -0
yggdrasil/databricks/jobs/__init__.py +5 -0
yggdrasil/databricks/jobs/config.py +29 -4
yggdrasil/databricks/sql/__init__.py +2 -0
yggdrasil/databricks/sql/engine.py +217 -36
yggdrasil/databricks/sql/exceptions.py +1 -0
yggdrasil/databricks/sql/statement_result.py +147 -0
yggdrasil/databricks/sql/types.py +33 -1
yggdrasil/databricks/workspaces/__init__.py +2 -1
yggdrasil/databricks/workspaces/filesytem.py +183 -0
yggdrasil/databricks/workspaces/io.py +387 -9
yggdrasil/databricks/workspaces/path.py +297 -2
yggdrasil/databricks/workspaces/path_kind.py +3 -0
yggdrasil/databricks/workspaces/workspace.py +202 -5
yggdrasil/dataclasses/__init__.py +2 -0
yggdrasil/dataclasses/dataclass.py +42 -1
yggdrasil/libs/__init__.py +2 -0
yggdrasil/libs/databrickslib.py +9 -0
yggdrasil/libs/extensions/__init__.py +2 -0
yggdrasil/libs/extensions/polars_extensions.py +72 -0
yggdrasil/libs/extensions/spark_extensions.py +116 -0
yggdrasil/libs/pandaslib.py +7 -0
yggdrasil/libs/polarslib.py +7 -0
yggdrasil/libs/sparklib.py +41 -0
yggdrasil/pyutils/__init__.py +4 -0
yggdrasil/pyutils/callable_serde.py +106 -0
yggdrasil/pyutils/exceptions.py +16 -0
yggdrasil/pyutils/modules.py +44 -1
yggdrasil/pyutils/parallel.py +29 -0
yggdrasil/pyutils/python_env.py +301 -0
yggdrasil/pyutils/retry.py +57 -0
yggdrasil/requests/__init__.py +4 -0
yggdrasil/requests/msal.py +124 -3
yggdrasil/requests/session.py +18 -0
yggdrasil/types/__init__.py +2 -0
yggdrasil/types/cast/__init__.py +2 -1
yggdrasil/types/cast/arrow_cast.py +123 -1
yggdrasil/types/cast/cast_options.py +119 -1
yggdrasil/types/cast/pandas_cast.py +29 -0
yggdrasil/types/cast/polars_cast.py +47 -0
yggdrasil/types/cast/polars_pandas_cast.py +29 -0
yggdrasil/types/cast/registry.py +176 -0
yggdrasil/types/cast/spark_cast.py +76 -0
yggdrasil/types/cast/spark_pandas_cast.py +29 -0
yggdrasil/types/cast/spark_polars_cast.py +28 -0
yggdrasil/types/libs.py +2 -0
yggdrasil/types/python_arrow.py +191 -0
yggdrasil/types/python_defaults.py +73 -0
yggdrasil/version.py +1 -0
ygg-0.1.31.dist-info/RECORD +0 -59
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0

yggdrasil/libs/extensions/spark_extensions.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Spark DataFrame extension helpers for aliases and resampling."""
 import datetime
 import inspect
 import re
@@ -30,6 +32,15 @@ _COL_RE = re.compile(r"Column<\s*['\"]?`?(.+?)`?['\"]?\s*>")
 def _require_pyspark(fn_name: str) -> None:
+    """Raise when PySpark is unavailable for a requested helper."""
+    """Raise when PySpark is unavailable for a requested helper.
+    Args:
+        fn_name: Name of the calling function.
+    Returns:
+        None.
+    """
     if pyspark is None or F is None or T is None:
         raise RuntimeError(
             f"{fn_name} requires PySpark to be available. "
@@ -41,6 +52,15 @@ def getAliases(
     obj: Union[SparkDataFrame, SparkColumn, str, Iterable[Union[SparkDataFrame, SparkColumn, str]]],
     full: bool = True,
 ) -> list[str]:
+    """Return aliases for Spark columns/dataframes or collections.
+    Args:
+        obj: Spark DataFrame/Column, string, or iterable of these.
+        full: Whether to return full qualified names.
+    Returns:
+        List of alias strings.
+    """
     if obj is None:
         return []
@@ -92,6 +112,16 @@ def latest(
     partitionBy: List[Union[str, SparkColumn]],
     orderBy: List[Union[str, SparkColumn]],
 ) -> SparkDataFrame:
+    """Return the latest rows per partition based on ordering.
+    Args:
+        df: Spark DataFrame.
+        partitionBy: Columns to partition by.
+        orderBy: Columns to order by.
+    Returns:
+        Spark DataFrame with latest rows per partition.
+    """
     _require_pyspark("latest")
     partition_col_names = getAliases(partitionBy)
@@ -123,12 +153,30 @@ def _infer_time_col_spark(df: "pyspark.sql.DataFrame") -> str:
 def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str, Any]:
+    """Filter kwargs to only those accepted by the callable.
+    Args:
+        fn: Callable to inspect.
+        kwargs: Candidate keyword arguments.
+    Returns:
+        Filtered keyword arguments.
+    """
     sig = inspect.signature(fn)  # type: ignore[arg-type]
     allowed = set(sig.parameters.keys())
     return {k: v for k, v in kwargs.items() if (k in allowed and v is not None)}
 def _append_drop_col_to_spark_schema(schema: "T.StructType", drop_col: str) -> "T.StructType":
+    """Ensure the drop column exists in the Spark schema.
+    Args:
+        schema: Spark schema to augment.
+        drop_col: Column name to add if missing.
+    Returns:
+        Updated Spark schema.
+    """
     _require_pyspark("_append_drop_col_to_spark_schema")
     if drop_col in schema.fieldNames():
         return schema
@@ -169,6 +217,14 @@ def upsample(
     spark_schema = arrow_field_to_spark_field(options.target_field)
     def within_group(tb: pa.Table) -> pa.Table:
+        """Apply upsample logic to a grouped Arrow table.
+        Args:
+            tb: Arrow table for a grouped partition.
+        Returns:
+            Arrow table with upsampled data.
+        """
         res = (
             arrow_table_to_polars_dataframe(tb, options)
             .sort(time_col_name)
@@ -277,6 +333,14 @@ def resample(
         out_options = CastOptions.check_arg(out_arrow_field)
     def within_group(tb: pa.Table) -> pa.Table:
+        """Apply resample logic to a grouped Arrow table.
+        Args:
+            tb: Arrow table for a grouped partition.
+        Returns:
+            Arrow table with resampled data.
+        """
         from .polars_extensions import resample
         pdf = arrow_table_to_polars_dataframe(tb, in_options)
@@ -329,6 +393,18 @@ def checkJoin(
     *args,
     **kwargs,
 ):
+    """Join two DataFrames with schema-aware column casting.
+    Args:
+        df: Left Spark DataFrame.
+        other: Right Spark DataFrame.
+        on: Join keys or mapping.
+        *args: Positional args passed to join.
+        **kwargs: Keyword args passed to join.
+    Returns:
+        Joined Spark DataFrame.
+    """
     _require_pyspark("checkJoin")
     other = convert(other, SparkDataFrame)
@@ -371,12 +447,32 @@ def checkMapInArrow(
     *args,
     **kwargs,
 ):
+    """Wrap mapInArrow to enforce output schema conversion.
+    Args:
+        df: Spark DataFrame.
+        func: Generator function yielding RecordBatches.
+        schema: Output schema (Spark StructType or DDL string).
+        *args: Positional args passed to mapInArrow.
+        **kwargs: Keyword args passed to mapInArrow.
+    Returns:
+        Spark DataFrame with enforced schema.
+    """
     _require_pyspark("mapInArrow")
     spark_schema = convert(schema, T.StructType)
     arrow_schema = convert(schema, pa.Field)
     def patched(batches: Iterable[pa.RecordBatch]):
+        """Convert batches yielded by user function to the target schema.
+        Args:
+            batches: Input RecordBatch iterable.
+        Yields:
+            RecordBatch instances conforming to the output schema.
+        """
         for src in func(batches):
             yield convert(src, pa.RecordBatch, arrow_schema)
@@ -395,6 +491,18 @@ def checkMapInPandas(
     *args,
     **kwargs,
 ):
+    """Wrap mapInPandas to enforce output schema conversion.
+    Args:
+        df: Spark DataFrame.
+        func: Generator function yielding pandas DataFrames.
+        schema: Output schema (Spark StructType or DDL string).
+        *args: Positional args passed to mapInPandas.
+        **kwargs: Keyword args passed to mapInPandas.
+    Returns:
+        Spark DataFrame with enforced schema.
+    """
     _require_pyspark("mapInPandas")
     import pandas as _pd  # local import so we don't shadow the ..pandas module
@@ -403,6 +511,14 @@ def checkMapInPandas(
     arrow_schema = convert(schema, pa.Field)
     def patched(batches: Iterable[_pd.DataFrame]):
+        """Convert pandas batches yielded by user function to the target schema.
+        Args:
+            batches: Input pandas DataFrame iterable.
+        Yields:
+            pandas DataFrames conforming to the output schema.
+        """
         for src in func(batches):
             yield convert(src, _pd.DataFrame, arrow_schema)

yggdrasil/libs/pandaslib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional pandas dependency helpers."""
 try:
     import pandas  # type: ignore
     pandas = pandas
@@ -6,6 +8,11 @@ except ImportError:
 def require_pandas():
+    """Ensure pandas is available before using pandas helpers.
+    Returns:
+        None.
+    """
     if pandas is None:
         raise ImportError(
             "pandas is required to use this function. "

yggdrasil/libs/polarslib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional Polars dependency helpers."""
 try:
     import polars  # type: ignore
@@ -13,6 +15,11 @@ __all__ = [
 def require_polars():
+    """Ensure polars is available before using polars helpers.
+    Returns:
+        None.
+    """
     if polars is None:
         raise ImportError(
             "polars is required to use this function. "

yggdrasil/libs/sparklib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional Spark dependency helpers and Arrow/Spark type conversions."""
 from typing import Any
 import pyarrow as pa
@@ -51,18 +53,23 @@ except ImportError:  # pragma: no cover - Spark not available
     pyspark = None
     class SparkSession:
+        """Fallback SparkSession placeholder when pyspark is unavailable."""
         @classmethod
         def getActiveSession(cls):
+            """Return None to indicate no active session is available."""
             return None
     class SparkDataFrame:
+        """Fallback DataFrame placeholder when pyspark is unavailable."""
         pass
     class SparkColumn:
+        """Fallback Column placeholder when pyspark is unavailable."""
         pass
     class SparkDataType:
+        """Fallback DataType placeholder when pyspark is unavailable."""
         pass
     ARROW_TO_SPARK = {}
@@ -91,6 +98,12 @@ __all__ = [
 def require_pyspark(active_session: bool = False):
     """
     Optionally enforce that pyspark (and an active SparkSession) exists.
+    Args:
+        active_session: Require an active SparkSession if True.
+    Returns:
+        None.
     """
     if pyspark is None:
         raise ImportError(
@@ -116,6 +129,13 @@ def arrow_type_to_spark_type(
 ) -> "T.DataType":
     """
     Convert a pyarrow.DataType to a pyspark.sql.types.DataType.
+    Args:
+        arrow_type: Arrow data type to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Spark SQL data type.
     """
     require_pyspark()
@@ -191,6 +211,13 @@ def arrow_field_to_spark_field(
 ) -> "T.StructField":
     """
     Convert a pyarrow.Field to a pyspark StructField.
+    Args:
+        field: Arrow field to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Spark StructField representation.
     """
     spark_type = arrow_type_to_spark_type(field.type, cast_options)
@@ -208,6 +235,13 @@ def spark_type_to_arrow_type(
 ) -> pa.DataType:
     """
     Convert a pyspark.sql.types.DataType to a pyarrow.DataType.
+    Args:
+        spark_type: Spark SQL data type to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Arrow data type.
     """
     require_pyspark()
     from pyspark.sql.types import (
@@ -287,6 +321,13 @@ def spark_field_to_arrow_field(
 ) -> pa.Field:
     """
     Convert a pyspark StructField to a pyarrow.Field.
+    Args:
+        field: Spark StructField to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Arrow field.
     """
     arrow_type = spark_type_to_arrow_type(field.dataType, cast_options)

yggdrasil/pyutils/__init__.py CHANGED Viewed

@@ -1,4 +1,8 @@
+"""Python utility helpers for retries, parallelism, and environment management."""
 from .retry import retry
 from .parallel import parallelize
 from .python_env import PythonEnv
 from .callable_serde import CallableSerde
+__all__ = ["retry", "parallelize", "PythonEnv", "CallableSerde"]

yggdrasil/pyutils/callable_serde.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Callable serialization helpers for cross-process execution."""
 from __future__ import annotations
 import base64
@@ -26,6 +28,15 @@ _FLAG_COMPRESSED = 1
 def _resolve_attr_chain(mod: Any, qualname: str) -> Any:
+    """Resolve a dotted attribute path from a module.
+    Args:
+        mod: Module to traverse.
+        qualname: Dotted qualified name.
+    Returns:
+        Resolved attribute.
+    """
     obj = mod
     for part in qualname.split("."):
         obj = getattr(obj, part)
@@ -49,6 +60,14 @@ def _find_pkg_root_from_file(file_path: Path) -> Optional[Path]:
 def _callable_file_line(fn: Callable[..., Any]) -> Tuple[Optional[str], Optional[int]]:
+    """Return the source file path and line number for a callable.
+    Args:
+        fn: Callable to inspect.
+    Returns:
+        Tuple of (file path, line number).
+    """
     file = None
     line = None
     try:
@@ -85,6 +104,14 @@ def _referenced_global_names(fn: Callable[..., Any]) -> Set[str]:
 def _is_importable_reference(fn: Callable[..., Any]) -> bool:
+    """Return True when a callable can be imported by module and qualname.
+    Args:
+        fn: Callable to inspect.
+    Returns:
+        True if importable by module/qualname.
+    """
     mod_name = getattr(fn, "__module__", None)
     qualname = getattr(fn, "__qualname__", None)
     if not mod_name or not qualname:
@@ -245,6 +272,14 @@ class CallableSerde:
     @classmethod
     def from_callable(cls: type[T], x: Union[Callable[..., Any], T]) -> T:
+        """Create a CallableSerde from a callable or existing instance.
+        Args:
+            x: Callable or CallableSerde instance.
+        Returns:
+            CallableSerde instance.
+        """
         if isinstance(x, cls):
             return x
@@ -256,14 +291,29 @@ class CallableSerde:
     @property
     def module(self) -> Optional[str]:
+        """Return the callable's module name if available.
+        Returns:
+            Module name or None.
+        """
         return self._module or (getattr(self.fn, "__module__", None) if self.fn else None)
     @property
     def qualname(self) -> Optional[str]:
+        """Return the callable's qualified name if available.
+        Returns:
+            Qualified name or None.
+        """
         return self._qualname or (getattr(self.fn, "__qualname__", None) if self.fn else None)
     @property
     def file(self) -> Optional[str]:
+        """Return the filesystem path of the callable's source file.
+        Returns:
+            File path or None.
+        """
         if not self.fn:
             return None
         f, _ = _callable_file_line(self.fn)
@@ -271,6 +321,11 @@ class CallableSerde:
     @property
     def line(self) -> Optional[int]:
+        """Return the line number where the callable is defined.
+        Returns:
+            Line number or None.
+        """
         if not self.fn:
             return None
         _, ln = _callable_file_line(self.fn)
@@ -278,6 +333,11 @@ class CallableSerde:
     @property
     def pkg_root(self) -> Optional[str]:
+        """Return the inferred package root for the callable, if known.
+        Returns:
+            Package root path or None.
+        """
         if self._pkg_root:
             return self._pkg_root
         if not self.file:
@@ -287,6 +347,11 @@ class CallableSerde:
     @property
     def relpath_from_pkg_root(self) -> Optional[str]:
+        """Return the callable's path relative to the package root.
+        Returns:
+            Relative path or None.
+        """
         if not self.file or not self.pkg_root:
             return None
         try:
@@ -296,6 +361,11 @@ class CallableSerde:
     @property
     def importable(self) -> bool:
+        """Return True when the callable can be imported by reference.
+        Returns:
+            True if importable by module/qualname.
+        """
         if self.fn is None:
             return bool(self.module and self.qualname and "<locals>" not in (self.qualname or ""))
         return _is_importable_reference(self.fn)
@@ -309,6 +379,16 @@ class CallableSerde:
         dump_env: str = "none",          # "none" | "globals" | "closure" | "both"
         filter_used_globals: bool = True,
     ) -> Dict[str, Any]:
+        """Serialize the callable into a dict for transport.
+        Args:
+            prefer: Preferred serialization kind.
+            dump_env: Environment payload selection.
+            filter_used_globals: Filter globals to referenced names.
+        Returns:
+            Serialized payload dict.
+        """
         kind = prefer
         if kind == "import" and not self.importable:
             kind = "dill"
@@ -352,6 +432,15 @@ class CallableSerde:
     @classmethod
     def load(cls: type[T], d: Dict[str, Any], *, add_pkg_root_to_syspath: bool = True) -> T:
+        """Construct a CallableSerde from a serialized dict payload.
+        Args:
+            d: Serialized payload dict.
+            add_pkg_root_to_syspath: Add package root to sys.path if True.
+        Returns:
+            CallableSerde instance.
+        """
         obj = cls(
             fn=None,
             _kind=d.get("kind", "auto"),
@@ -369,6 +458,14 @@ class CallableSerde:
         return obj  # type: ignore[return-value]
     def materialize(self, *, add_pkg_root_to_syspath: bool = True) -> Callable[..., Any]:
+        """Resolve and return the underlying callable.
+        Args:
+            add_pkg_root_to_syspath: Add package root to sys.path if True.
+        Returns:
+            Resolved callable.
+        """
         if self.fn is not None:
             return self.fn
@@ -402,6 +499,15 @@ class CallableSerde:
         raise ValueError(f"Unknown kind: {kind}")
     def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        """Invoke the materialized callable with the provided arguments.
+        Args:
+            *args: Positional args for the callable.
+            **kwargs: Keyword args for the callable.
+        Returns:
+            Callable return value.
+        """
         fn = self.materialize()
         return fn(*args, **kwargs)

yggdrasil/pyutils/exceptions.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Utilities for parsing and re-raising exceptions from traceback strings."""
 import builtins
 import dataclasses as dc
 import re
@@ -26,6 +28,7 @@ _BARE_EXC_RE = re.compile(r"(?m)^\s*([A-Za-z_]\w*(?:Error|Exception|Warning|Inte
 @dc.dataclass(frozen=True)
 class ParsedException:
+    """Structured representation of a parsed exception type and message."""
     exc_type: Type[BaseException]
     message: str
     raw_type_name: str
@@ -34,10 +37,23 @@ class ParsedException:
 class RemoteTraceback(Exception):
     """Holds a traceback *string* and prints it as the chained cause."""
     def __init__(self, traceback_text: str):
+        """Store the traceback text for later display.
+        Args:
+            traceback_text: Traceback string to store.
+        Returns:
+            None.
+        """
         super().__init__("Remote traceback (text)")
         self.traceback_text = traceback_text
     def __str__(self) -> str:
+        """Render the exception with its stored traceback text.
+        Returns:
+            Rendered exception string with traceback text.
+        """
         return f"{self.args[0]}\n\n{self.traceback_text}"

yggdrasil/pyutils/modules.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Module dependency and pip index inspection utilities."""
 # modules.py
 from __future__ import annotations
@@ -43,6 +45,14 @@ MODULE_PROJECT_NAMES_ALIASES = {
 def module_name_to_project_name(module_name: str) -> str:
+    """Map module import names to PyPI project names when they differ.
+    Args:
+        module_name: Importable module name.
+    Returns:
+        PyPI project name.
+    """
     return MODULE_PROJECT_NAMES_ALIASES.get(module_name, module_name)
@@ -104,6 +114,7 @@ _REQ_NAME_RE = re.compile(r"^\s*([A-Za-z0-9][A-Za-z0-9._-]*)")
 @dc.dataclass(frozen=True)
 class DependencyMetadata:
+    """Metadata describing an installed or missing dependency."""
     project: str
     requirement: str
     installed: bool
@@ -136,6 +147,14 @@ def _req_project_name(req_line: str) -> Optional[str]:
 def _distribution_for_module(mod: Union[str, ModuleType]):
+    """Resolve the importlib.metadata distribution that provides a module.
+    Args:
+        mod: Module name or module object.
+    Returns:
+        importlib.metadata.Distribution instance.
+    """
     if ilm is None:
         raise RuntimeError("importlib.metadata is not available")
@@ -213,6 +232,14 @@ def module_dependencies(lib: Union[str, ModuleType]) -> List[DependencyMetadata]
 def _run_pip(*args: str) -> Tuple[int, str, str]:
+    """Run pip with arguments and return (returncode, stdout, stderr).
+    Args:
+        *args: Pip arguments.
+    Returns:
+        Tuple of (returncode, stdout, stderr).
+    """
     p = subprocess.run(
         [sys.executable, "-m", "pip", *args],
         text=True,
@@ -225,21 +252,37 @@ def _run_pip(*args: str) -> Tuple[int, str, str]:
 @dc.dataclass(frozen=True)
 class PipIndexSettings:
+    """Resolved pip index configuration from env and config sources."""
     index_url: Optional[str] = None
     extra_index_urls: List[str] = dc.field(default_factory=list)
     sources: Dict[str, Dict[str, Any]] = dc.field(default_factory=dict)  # {"env": {...}, "config": {...}}
     @classmethod
     def default_settings(cls):
+        """Return the cached default pip index settings.
+        Returns:
+            Default PipIndexSettings instance.
+        """
         return DEFAULT_PIP_INDEX_SETTINGS
     @property
     def extra_index_url(self):
+        """Return extra index URLs as a space-separated string.
+        Returns:
+            Space-separated extra index URLs or None.
+        """
         if self.extra_index_urls:
             return " ".join(self.extra_index_urls)
         return None
     def as_dict(self) -> dict:
+        """Return a dict representation of the settings.
+        Returns:
+            Dict representation of settings.
+        """
         return dc.asdict(self)
@@ -325,4 +368,4 @@ def get_pip_index_settings() -> PipIndexSettings:
 try:
     DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
 except:
-    DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()
+    DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()

ygg 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl

ygg 0.1.31py3-none-any.whl → 0.1.32py3-none-any.whl