PyPI - ygg - Versions diffs - 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl - Mend

ygg 0.1.30py3-none-any.whl → 0.1.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
ygg-0.1.32.dist-info/RECORD +60 -0
yggdrasil/__init__.py +2 -0
yggdrasil/databricks/__init__.py +2 -0
yggdrasil/databricks/compute/__init__.py +2 -0
yggdrasil/databricks/compute/cluster.py +241 -2
yggdrasil/databricks/compute/execution_context.py +100 -11
yggdrasil/databricks/compute/remote.py +16 -0
yggdrasil/databricks/jobs/__init__.py +5 -0
yggdrasil/databricks/jobs/config.py +31 -34
yggdrasil/databricks/sql/__init__.py +2 -0
yggdrasil/databricks/sql/engine.py +217 -36
yggdrasil/databricks/sql/exceptions.py +1 -0
yggdrasil/databricks/sql/statement_result.py +148 -1
yggdrasil/databricks/sql/types.py +49 -1
yggdrasil/databricks/workspaces/__init__.py +4 -1
yggdrasil/databricks/workspaces/filesytem.py +344 -0
yggdrasil/databricks/workspaces/io.py +1123 -0
yggdrasil/databricks/workspaces/path.py +1415 -0
yggdrasil/databricks/workspaces/path_kind.py +13 -0
yggdrasil/databricks/workspaces/workspace.py +298 -154
yggdrasil/dataclasses/__init__.py +2 -0
yggdrasil/dataclasses/dataclass.py +42 -1
yggdrasil/libs/__init__.py +2 -0
yggdrasil/libs/databrickslib.py +9 -0
yggdrasil/libs/extensions/__init__.py +2 -0
yggdrasil/libs/extensions/polars_extensions.py +72 -0
yggdrasil/libs/extensions/spark_extensions.py +116 -0
yggdrasil/libs/pandaslib.py +7 -0
yggdrasil/libs/polarslib.py +7 -0
yggdrasil/libs/sparklib.py +41 -0
yggdrasil/pyutils/__init__.py +4 -0
yggdrasil/pyutils/callable_serde.py +106 -0
yggdrasil/pyutils/exceptions.py +16 -0
yggdrasil/pyutils/modules.py +44 -1
yggdrasil/pyutils/parallel.py +29 -0
yggdrasil/pyutils/python_env.py +301 -0
yggdrasil/pyutils/retry.py +57 -0
yggdrasil/requests/__init__.py +4 -0
yggdrasil/requests/msal.py +124 -3
yggdrasil/requests/session.py +18 -0
yggdrasil/types/__init__.py +2 -0
yggdrasil/types/cast/__init__.py +2 -1
yggdrasil/types/cast/arrow_cast.py +131 -0
yggdrasil/types/cast/cast_options.py +119 -1
yggdrasil/types/cast/pandas_cast.py +29 -0
yggdrasil/types/cast/polars_cast.py +47 -0
yggdrasil/types/cast/polars_pandas_cast.py +29 -0
yggdrasil/types/cast/registry.py +176 -0
yggdrasil/types/cast/spark_cast.py +76 -0
yggdrasil/types/cast/spark_pandas_cast.py +29 -0
yggdrasil/types/cast/spark_polars_cast.py +28 -0
yggdrasil/types/libs.py +2 -0
yggdrasil/types/python_arrow.py +191 -0
yggdrasil/types/python_defaults.py +73 -0
yggdrasil/version.py +1 -0
ygg-0.1.30.dist-info/RECORD +0 -56
yggdrasil/databricks/workspaces/databricks_path.py +0 -784
{ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
{ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
{ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
{ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0

yggdrasil/dataclasses/dataclass.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Dataclass helpers that integrate with Arrow schemas and safe casting."""
 import dataclasses
 from inspect import isclass
 from typing import Any, Iterable, Mapping, Tuple
@@ -18,6 +20,7 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
     Args:
         cls_or_instance: The class or instance to check.
     Returns:
         True if the class or instance
         is a yggdrasil dataclass, False otherwise.
@@ -26,6 +29,14 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
 def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
+    """Return a cached Arrow Field describing the dataclass type.
+    Args:
+        cls_or_instance: Dataclass class or instance.
+    Returns:
+        Arrow field describing the dataclass schema.
+    """
     if is_yggdataclass(cls_or_instance):
         return cls_or_instance.__arrow_field__()
@@ -58,7 +69,7 @@ def yggdataclass(
     kw_only=False, slots=False,
     weakref_slot=False
 ):
-    """Add dunder methods based on the fields defined in the class.
+    """Decorate a class with dataclass behavior plus Arrow helpers.
     Examines PEP 526 __annotations__ to determine fields.
@@ -73,7 +84,24 @@ def yggdataclass(
     """
     def wrap(c):
+        """Wrap a class with yggdrasil dataclass enhancements.
+        Args:
+            c: Class to decorate.
+        Returns:
+            Decorated dataclass type.
+        """
         def _init_public_fields(cls):
+            """Return init-enabled, public dataclass fields.
+            Args:
+                cls: Dataclass type.
+            Returns:
+                List of dataclasses.Field objects.
+            """
             return [
                 field
                 for field in dataclasses.fields(cls)
@@ -83,6 +111,11 @@ def yggdataclass(
         if not hasattr(c, "default_instance"):
             @classmethod
             def default_instance(cls):
+                """Return a default instance built from type defaults.
+                Returns:
+                    Default instance of the dataclass.
+                """
                 from yggdrasil.types import default_scalar
                 if not hasattr(cls, "__default_instance__"):
@@ -135,6 +168,14 @@ def yggdataclass(
         if not hasattr(c, "__arrow_field__"):
             @classmethod
             def __arrow_field__(cls, name: str | None = None):
+                """Return an Arrow field representing the dataclass schema.
+                Args:
+                    name: Optional override for the field name.
+                Returns:
+                    Arrow field describing the dataclass schema.
+                """
                 from yggdrasil.types.python_arrow import arrow_field_from_hint
                 return arrow_field_from_hint(cls, name=name)

yggdrasil/libs/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Helper utilities for optional dependency integrations."""
 from .sparklib import *
 from .polarslib import *
 from .pandaslib import *

yggdrasil/libs/databrickslib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional Databricks SDK dependency helpers."""
 try:
     import databricks
     import databricks.sdk  # type: ignore
@@ -6,7 +8,9 @@ try:
     databricks_sdk = databricks.sdk
 except ImportError:
     class _DatabricksDummy:
+        """Placeholder object that raises if Databricks SDK is required."""
         def __getattr__(self, item):
+            """Raise an error when accessing missing Databricks SDK attributes."""
             require_databricks_sdk()
     databricks = _DatabricksDummy
@@ -14,6 +18,11 @@ except ImportError:
 def require_databricks_sdk():
+    """Ensure the Databricks SDK is available before use.
+    Returns:
+        None.
+    """
     if databricks_sdk is None:
         raise ImportError(
             "databricks_sdk is required to use this function. "

yggdrasil/libs/extensions/__init__.py CHANGED Viewed

@@ -1,2 +1,4 @@
+"""Extensions for Spark and Polars helpers."""
 from .spark_extensions import *
 from .polars_extensions import *

yggdrasil/libs/extensions/polars_extensions.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Polars DataFrame extension helpers for joins and resampling."""
 from __future__ import annotations
 import datetime
@@ -39,6 +41,14 @@ def join_coalesced(
 def _normalize_group_by(group_by: str | Sequence[str] | None) -> list[str] | None:
+    """Normalize group_by inputs into a list or None.
+    Args:
+        group_by: Grouping column or columns.
+    Returns:
+        List of column names or None.
+    """
     if group_by is None:
         return None
     if isinstance(group_by, str):
@@ -57,6 +67,15 @@ def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str,
 def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
+    """Build a Polars expression from an aggregation spec.
+    Args:
+        col: Column name to aggregate.
+        agg: Aggregation spec (expr, callable, or string).
+    Returns:
+        Polars expression.
+    """
     base = pl.col(col)
     if isinstance(agg, pl.Expr):
@@ -80,6 +99,14 @@ def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
 def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
+    """Normalize aggregation specs into a list of Polars expressions.
+    Args:
+        agg: Mapping or sequence of aggregation specs.
+    Returns:
+        List of Polars expressions.
+    """
     if isinstance(agg, Mapping):
         return [_expr_from_agg(col, spec) for col, spec in agg.items()]
@@ -91,11 +118,27 @@ def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
 def _is_datetime(dtype: object) -> bool:
+    """Return True when the dtype is a Polars datetime.
+    Args:
+        dtype: Polars dtype to inspect.
+    Returns:
+        True if dtype is Polars Datetime.
+    """
     # Datetime-only inference (per requirement), version-safe.
     return isinstance(dtype, pl.Datetime)
 def _infer_time_col(df: "pl.DataFrame") -> str:
+    """Infer the first datetime-like column name from a DataFrame.
+    Args:
+        df: Polars DataFrame to inspect.
+    Returns:
+        Column name of the first datetime field.
+    """
     # Find first Datetime column in schema order; ignore Date columns.
     for name, dtype in df.schema.items():
         if _is_datetime(dtype):
@@ -106,6 +149,15 @@ def _infer_time_col(df: "pl.DataFrame") -> str:
 def _ensure_datetime_like(df: "pl.DataFrame", time_col: str) -> "pl.DataFrame":
+    """Ensure a time column is cast to datetime for resampling.
+    Args:
+        df: Polars DataFrame.
+        time_col: Column name to validate.
+    Returns:
+        DataFrame with time column cast to datetime if needed.
+    """
     dtype = df.schema.get(time_col)
     if dtype is None:
         raise KeyError(f"resample: time_col '{time_col}' not found in DataFrame columns.")
@@ -151,6 +203,14 @@ def _timedelta_to_polars_duration(td: datetime.timedelta) -> str:
 def _normalize_duration(v: str | datetime.timedelta | None) -> str | None:
+    """Normalize duration inputs to a Polars duration string.
+    Args:
+        v: Duration string, timedelta, or None.
+    Returns:
+        Normalized duration string or None.
+    """
     if v is None:
         return None
     if isinstance(v, str):
@@ -168,6 +228,18 @@ def _upsample_single(
     offset: str | datetime.timedelta | None,
     keep_group_order: bool,
 ) -> "pl.DataFrame":
+    """Upsample a single DataFrame with normalized duration arguments.
+    Args:
+        df: Polars DataFrame to upsample.
+        time_col: Name of the time column.
+        every: Sampling interval.
+        offset: Optional offset interval.
+        keep_group_order: Preserve input order when grouping.
+    Returns:
+        Upsampled Polars DataFrame.
+    """
     df = df.sort(time_col)
     every_n = _normalize_duration(every)

yggdrasil/libs/extensions/spark_extensions.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Spark DataFrame extension helpers for aliases and resampling."""
 import datetime
 import inspect
 import re
@@ -30,6 +32,15 @@ _COL_RE = re.compile(r"Column<\s*['\"]?`?(.+?)`?['\"]?\s*>")
 def _require_pyspark(fn_name: str) -> None:
+    """Raise when PySpark is unavailable for a requested helper."""
+    """Raise when PySpark is unavailable for a requested helper.
+    Args:
+        fn_name: Name of the calling function.
+    Returns:
+        None.
+    """
     if pyspark is None or F is None or T is None:
         raise RuntimeError(
             f"{fn_name} requires PySpark to be available. "
@@ -41,6 +52,15 @@ def getAliases(
     obj: Union[SparkDataFrame, SparkColumn, str, Iterable[Union[SparkDataFrame, SparkColumn, str]]],
     full: bool = True,
 ) -> list[str]:
+    """Return aliases for Spark columns/dataframes or collections.
+    Args:
+        obj: Spark DataFrame/Column, string, or iterable of these.
+        full: Whether to return full qualified names.
+    Returns:
+        List of alias strings.
+    """
     if obj is None:
         return []
@@ -92,6 +112,16 @@ def latest(
     partitionBy: List[Union[str, SparkColumn]],
     orderBy: List[Union[str, SparkColumn]],
 ) -> SparkDataFrame:
+    """Return the latest rows per partition based on ordering.
+    Args:
+        df: Spark DataFrame.
+        partitionBy: Columns to partition by.
+        orderBy: Columns to order by.
+    Returns:
+        Spark DataFrame with latest rows per partition.
+    """
     _require_pyspark("latest")
     partition_col_names = getAliases(partitionBy)
@@ -123,12 +153,30 @@ def _infer_time_col_spark(df: "pyspark.sql.DataFrame") -> str:
 def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str, Any]:
+    """Filter kwargs to only those accepted by the callable.
+    Args:
+        fn: Callable to inspect.
+        kwargs: Candidate keyword arguments.
+    Returns:
+        Filtered keyword arguments.
+    """
     sig = inspect.signature(fn)  # type: ignore[arg-type]
     allowed = set(sig.parameters.keys())
     return {k: v for k, v in kwargs.items() if (k in allowed and v is not None)}
 def _append_drop_col_to_spark_schema(schema: "T.StructType", drop_col: str) -> "T.StructType":
+    """Ensure the drop column exists in the Spark schema.
+    Args:
+        schema: Spark schema to augment.
+        drop_col: Column name to add if missing.
+    Returns:
+        Updated Spark schema.
+    """
     _require_pyspark("_append_drop_col_to_spark_schema")
     if drop_col in schema.fieldNames():
         return schema
@@ -169,6 +217,14 @@ def upsample(
     spark_schema = arrow_field_to_spark_field(options.target_field)
     def within_group(tb: pa.Table) -> pa.Table:
+        """Apply upsample logic to a grouped Arrow table.
+        Args:
+            tb: Arrow table for a grouped partition.
+        Returns:
+            Arrow table with upsampled data.
+        """
         res = (
             arrow_table_to_polars_dataframe(tb, options)
             .sort(time_col_name)
@@ -277,6 +333,14 @@ def resample(
         out_options = CastOptions.check_arg(out_arrow_field)
     def within_group(tb: pa.Table) -> pa.Table:
+        """Apply resample logic to a grouped Arrow table.
+        Args:
+            tb: Arrow table for a grouped partition.
+        Returns:
+            Arrow table with resampled data.
+        """
         from .polars_extensions import resample
         pdf = arrow_table_to_polars_dataframe(tb, in_options)
@@ -329,6 +393,18 @@ def checkJoin(
     *args,
     **kwargs,
 ):
+    """Join two DataFrames with schema-aware column casting.
+    Args:
+        df: Left Spark DataFrame.
+        other: Right Spark DataFrame.
+        on: Join keys or mapping.
+        *args: Positional args passed to join.
+        **kwargs: Keyword args passed to join.
+    Returns:
+        Joined Spark DataFrame.
+    """
     _require_pyspark("checkJoin")
     other = convert(other, SparkDataFrame)
@@ -371,12 +447,32 @@ def checkMapInArrow(
     *args,
     **kwargs,
 ):
+    """Wrap mapInArrow to enforce output schema conversion.
+    Args:
+        df: Spark DataFrame.
+        func: Generator function yielding RecordBatches.
+        schema: Output schema (Spark StructType or DDL string).
+        *args: Positional args passed to mapInArrow.
+        **kwargs: Keyword args passed to mapInArrow.
+    Returns:
+        Spark DataFrame with enforced schema.
+    """
     _require_pyspark("mapInArrow")
     spark_schema = convert(schema, T.StructType)
     arrow_schema = convert(schema, pa.Field)
     def patched(batches: Iterable[pa.RecordBatch]):
+        """Convert batches yielded by user function to the target schema.
+        Args:
+            batches: Input RecordBatch iterable.
+        Yields:
+            RecordBatch instances conforming to the output schema.
+        """
         for src in func(batches):
             yield convert(src, pa.RecordBatch, arrow_schema)
@@ -395,6 +491,18 @@ def checkMapInPandas(
     *args,
     **kwargs,
 ):
+    """Wrap mapInPandas to enforce output schema conversion.
+    Args:
+        df: Spark DataFrame.
+        func: Generator function yielding pandas DataFrames.
+        schema: Output schema (Spark StructType or DDL string).
+        *args: Positional args passed to mapInPandas.
+        **kwargs: Keyword args passed to mapInPandas.
+    Returns:
+        Spark DataFrame with enforced schema.
+    """
     _require_pyspark("mapInPandas")
     import pandas as _pd  # local import so we don't shadow the ..pandas module
@@ -403,6 +511,14 @@ def checkMapInPandas(
     arrow_schema = convert(schema, pa.Field)
     def patched(batches: Iterable[_pd.DataFrame]):
+        """Convert pandas batches yielded by user function to the target schema.
+        Args:
+            batches: Input pandas DataFrame iterable.
+        Yields:
+            pandas DataFrames conforming to the output schema.
+        """
         for src in func(batches):
             yield convert(src, _pd.DataFrame, arrow_schema)

yggdrasil/libs/pandaslib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional pandas dependency helpers."""
 try:
     import pandas  # type: ignore
     pandas = pandas
@@ -6,6 +8,11 @@ except ImportError:
 def require_pandas():
+    """Ensure pandas is available before using pandas helpers.
+    Returns:
+        None.
+    """
     if pandas is None:
         raise ImportError(
             "pandas is required to use this function. "

yggdrasil/libs/polarslib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional Polars dependency helpers."""
 try:
     import polars  # type: ignore
@@ -13,6 +15,11 @@ __all__ = [
 def require_polars():
+    """Ensure polars is available before using polars helpers.
+    Returns:
+        None.
+    """
     if polars is None:
         raise ImportError(
             "polars is required to use this function. "

yggdrasil/libs/sparklib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional Spark dependency helpers and Arrow/Spark type conversions."""
 from typing import Any
 import pyarrow as pa
@@ -51,18 +53,23 @@ except ImportError:  # pragma: no cover - Spark not available
     pyspark = None
     class SparkSession:
+        """Fallback SparkSession placeholder when pyspark is unavailable."""
         @classmethod
         def getActiveSession(cls):
+            """Return None to indicate no active session is available."""
             return None
     class SparkDataFrame:
+        """Fallback DataFrame placeholder when pyspark is unavailable."""
         pass
     class SparkColumn:
+        """Fallback Column placeholder when pyspark is unavailable."""
         pass
     class SparkDataType:
+        """Fallback DataType placeholder when pyspark is unavailable."""
         pass
     ARROW_TO_SPARK = {}
@@ -91,6 +98,12 @@ __all__ = [
 def require_pyspark(active_session: bool = False):
     """
     Optionally enforce that pyspark (and an active SparkSession) exists.
+    Args:
+        active_session: Require an active SparkSession if True.
+    Returns:
+        None.
     """
     if pyspark is None:
         raise ImportError(
@@ -116,6 +129,13 @@ def arrow_type_to_spark_type(
 ) -> "T.DataType":
     """
     Convert a pyarrow.DataType to a pyspark.sql.types.DataType.
+    Args:
+        arrow_type: Arrow data type to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Spark SQL data type.
     """
     require_pyspark()
@@ -191,6 +211,13 @@ def arrow_field_to_spark_field(
 ) -> "T.StructField":
     """
     Convert a pyarrow.Field to a pyspark StructField.
+    Args:
+        field: Arrow field to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Spark StructField representation.
     """
     spark_type = arrow_type_to_spark_type(field.type, cast_options)
@@ -208,6 +235,13 @@ def spark_type_to_arrow_type(
 ) -> pa.DataType:
     """
     Convert a pyspark.sql.types.DataType to a pyarrow.DataType.
+    Args:
+        spark_type: Spark SQL data type to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Arrow data type.
     """
     require_pyspark()
     from pyspark.sql.types import (
@@ -287,6 +321,13 @@ def spark_field_to_arrow_field(
 ) -> pa.Field:
     """
     Convert a pyspark StructField to a pyarrow.Field.
+    Args:
+        field: Spark StructField to convert.
+        cast_options: Optional casting options.
+    Returns:
+        Arrow field.
     """
     arrow_type = spark_type_to_arrow_type(field.dataType, cast_options)

yggdrasil/pyutils/__init__.py CHANGED Viewed

@@ -1,4 +1,8 @@
+"""Python utility helpers for retries, parallelism, and environment management."""
 from .retry import retry
 from .parallel import parallelize
 from .python_env import PythonEnv
 from .callable_serde import CallableSerde
+__all__ = ["retry", "parallelize", "PythonEnv", "CallableSerde"]

ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl

ygg 0.1.30py3-none-any.whl → 0.1.32py3-none-any.whl