PyPI - ygg - Versions diffs - 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl - Mend

ygg 0.1.31py3-none-any.whl → 0.1.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/METADATA +1 -1
ygg-0.1.33.dist-info/RECORD +60 -0
yggdrasil/__init__.py +2 -0
yggdrasil/databricks/__init__.py +2 -0
yggdrasil/databricks/compute/__init__.py +2 -0
yggdrasil/databricks/compute/cluster.py +244 -3
yggdrasil/databricks/compute/execution_context.py +100 -11
yggdrasil/databricks/compute/remote.py +24 -0
yggdrasil/databricks/jobs/__init__.py +5 -0
yggdrasil/databricks/jobs/config.py +29 -4
yggdrasil/databricks/sql/__init__.py +2 -0
yggdrasil/databricks/sql/engine.py +217 -36
yggdrasil/databricks/sql/exceptions.py +1 -0
yggdrasil/databricks/sql/statement_result.py +147 -0
yggdrasil/databricks/sql/types.py +33 -1
yggdrasil/databricks/workspaces/__init__.py +2 -1
yggdrasil/databricks/workspaces/filesytem.py +183 -0
yggdrasil/databricks/workspaces/io.py +387 -9
yggdrasil/databricks/workspaces/path.py +297 -2
yggdrasil/databricks/workspaces/path_kind.py +3 -0
yggdrasil/databricks/workspaces/workspace.py +202 -5
yggdrasil/dataclasses/__init__.py +2 -0
yggdrasil/dataclasses/dataclass.py +42 -1
yggdrasil/libs/__init__.py +2 -0
yggdrasil/libs/databrickslib.py +9 -0
yggdrasil/libs/extensions/__init__.py +2 -0
yggdrasil/libs/extensions/polars_extensions.py +72 -0
yggdrasil/libs/extensions/spark_extensions.py +116 -0
yggdrasil/libs/pandaslib.py +7 -0
yggdrasil/libs/polarslib.py +7 -0
yggdrasil/libs/sparklib.py +41 -0
yggdrasil/pyutils/__init__.py +4 -0
yggdrasil/pyutils/callable_serde.py +106 -0
yggdrasil/pyutils/exceptions.py +16 -0
yggdrasil/pyutils/modules.py +44 -1
yggdrasil/pyutils/parallel.py +29 -0
yggdrasil/pyutils/python_env.py +301 -0
yggdrasil/pyutils/retry.py +57 -0
yggdrasil/requests/__init__.py +4 -0
yggdrasil/requests/msal.py +124 -3
yggdrasil/requests/session.py +18 -0
yggdrasil/types/__init__.py +2 -0
yggdrasil/types/cast/__init__.py +2 -1
yggdrasil/types/cast/arrow_cast.py +123 -1
yggdrasil/types/cast/cast_options.py +119 -1
yggdrasil/types/cast/pandas_cast.py +29 -0
yggdrasil/types/cast/polars_cast.py +47 -0
yggdrasil/types/cast/polars_pandas_cast.py +29 -0
yggdrasil/types/cast/registry.py +176 -0
yggdrasil/types/cast/spark_cast.py +76 -0
yggdrasil/types/cast/spark_pandas_cast.py +29 -0
yggdrasil/types/cast/spark_polars_cast.py +28 -0
yggdrasil/types/libs.py +2 -0
yggdrasil/types/python_arrow.py +191 -0
yggdrasil/types/python_defaults.py +73 -0
yggdrasil/version.py +1 -0
ygg-0.1.31.dist-info/RECORD +0 -59
{ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/WHEEL +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/entry_points.txt +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/licenses/LICENSE +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/top_level.txt +0 -0

yggdrasil/databricks/workspaces/workspace.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Workspace configuration and Databricks SDK helpers."""
 import dataclasses
 import logging
 import os
@@ -17,13 +19,13 @@ if TYPE_CHECKING:
     from ..compute.cluster import Cluster
 from .path import DatabricksPath, DatabricksPathKind
+from ...version import __version__ as YGGDRASIL_VERSION
 from ...libs.databrickslib import require_databricks_sdk, databricks_sdk
 if databricks_sdk is not None:
     from databricks.sdk import WorkspaceClient
-    from databricks.sdk.errors import ResourceDoesNotExist, NotFound
+    from databricks.sdk.errors import ResourceDoesNotExist
     from databricks.sdk.service.workspace import ExportFormat, ObjectInfo
-    from databricks.sdk.service import catalog as catalog_svc
     from databricks.sdk.dbutils import FileInfo
     from databricks.sdk.service.files import DirectoryEntry
@@ -45,7 +47,7 @@ def _get_env_product():
     v = os.getenv("DATABRICKS_PRODUCT")
     if not v:
-        return None
+        return "yggdrasil"
     return v.strip().lower()
@@ -53,7 +55,7 @@ def _get_env_product_version():
     v = os.getenv("DATABRICKS_PRODUCT_VERSION")
     if not v:
-        return None
+        return YGGDRASIL_VERSION
     return v.strip().lower()
@@ -67,6 +69,7 @@ def _get_env_product_tag():
 @dataclass
 class Workspace:
+    """Configuration wrapper for connecting to a Databricks workspace."""
     # Databricks / generic
     host: Optional[str] = None
     account_id: Optional[str] = None
@@ -113,6 +116,11 @@ class Workspace:
     # Pickle support
     # -------------------------
     def __getstate__(self):
+        """Serialize the workspace state for pickling.
+        Returns:
+            A pickle-ready state dictionary.
+        """
         state = self.__dict__.copy()
         state.pop("_sdk", None)
@@ -122,6 +130,11 @@ class Workspace:
         return state
     def __setstate__(self, state):
+        """Restore workspace state after unpickling.
+        Args:
+            state: Serialized state dictionary.
+        """
         self.__dict__.update(state)
         self._sdk = None
@@ -132,10 +145,25 @@ class Workspace:
             self.connect(reset=True)
     def __enter__(self) -> "Workspace":
+        """Enter a context manager and connect to the workspace.
+        Returns:
+            The connected Workspace instance.
+        """
         self._was_connected = self._sdk is not None
         return self.connect()
     def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        """Exit the context manager and close if newly connected.
+        Args:
+            exc_type: Exception type, if raised.
+            exc_val: Exception value, if raised.
+            exc_tb: Exception traceback, if raised.
+        Returns:
+            None.
+        """
         if not self._was_connected:
             self.close()
@@ -149,6 +177,14 @@ class Workspace:
         self,
         **kwargs
     ) -> "Workspace":
+        """Clone the workspace config with overrides.
+        Args:
+            **kwargs: Field overrides for the clone.
+        Returns:
+            A new Workspace instance with updated fields.
+        """
         state = self.__getstate__()
         state.update(kwargs)
         return Workspace().__setstate__(state)
@@ -158,9 +194,23 @@ class Workspace:
     # -------------------------
     @property
     def connected(self):
+        """Return True when a WorkspaceClient is cached.
+        Returns:
+            True if connected, otherwise False.
+        """
         return self._sdk is not None
     def connect(self, reset: bool = False, clone: bool = False) -> "Workspace":
+        """Connect to the workspace and cache the SDK client.
+        Args:
+            reset: Whether to reset the cached client before connecting.
+            clone: Whether to connect a cloned instance.
+        Returns:
+            The connected Workspace instance.
+        """
         if reset:
             self._sdk = None
@@ -270,6 +320,11 @@ class Workspace:
         return str(files[0]) if files else None
     def reset_local_cache(self):
+        """Remove cached browser OAuth tokens.
+        Returns:
+            None.
+        """
         local_cache = self._local_cache_token_path()
         if local_cache:
@@ -277,6 +332,11 @@ class Workspace:
     @property
     def current_user(self):
+        """Return the current Databricks user.
+        Returns:
+            The current user object from the SDK.
+        """
         try:
             return self.sdk().current_user.me()
         except:
@@ -285,6 +345,11 @@ class Workspace:
             raise
     def current_token(self) -> str:
+        """Return the active API token for this workspace.
+        Returns:
+            The bearer token string.
+        """
         if self.token:
             return self.token
@@ -301,6 +366,14 @@ class Workspace:
         self,
         workspace: Optional["Workspace"] = None,
     ):
+        """Return a PyArrow filesystem for Databricks paths.
+        Args:
+            workspace: Optional workspace override.
+        Returns:
+            A DatabricksFileSystem instance.
+        """
         from .filesytem import DatabricksFileSystem, DatabricksFileSystemHandler
         handler = DatabricksFileSystemHandler(
@@ -317,6 +390,16 @@ class Workspace:
         kind: Optional[DatabricksPathKind] = None,
         workspace: Optional["Workspace"] = None
     ):
+        """Create a DatabricksPath in this workspace.
+        Args:
+            parts: Path parts or string to parse.
+            kind: Optional path kind override.
+            workspace: Optional workspace override.
+        Returns:
+            A DatabricksPath instance.
+        """
         workspace = self if workspace is None else workspace
         if kind is None or isinstance(parts, str):
@@ -337,6 +420,12 @@ class Workspace:
     ) -> DatabricksPath:
         """
         Shared cache base under Volumes for the current user.
+        Args:
+            suffix: Optional path suffix to append.
+        Returns:
+            A DatabricksPath pointing at the shared cache location.
         """
         base = "/Workspace/Shared/.ygg/cache"
@@ -351,6 +440,11 @@ class Workspace:
     # ------------------------------------------------------------------ #
     def sdk(self) -> "WorkspaceClient":
+        """Return the connected WorkspaceClient.
+        Returns:
+            The WorkspaceClient instance.
+        """
         return self.connect()._sdk
     # ------------------------------------------------------------------ #
@@ -370,6 +464,13 @@ class Workspace:
           - other paths      -> Workspace paths (sdk.workspace.list)
         If recursive=True, yield all nested files/directories.
+        Args:
+            path: Path string to list.
+            recursive: Whether to list recursively.
+        Returns:
+            An iterator of workspace/DBFS/volume entries.
         """
         sdk = self.sdk()
@@ -422,6 +523,13 @@ class Workspace:
           via workspace.download(...).
         Returned object is a BinaryIO context manager.
+        Args:
+            path: Path to open.
+            workspace_format: Optional export format for workspace paths.
+        Returns:
+            A BinaryIO stream for reading.
         """
         sdk = self.sdk()
@@ -437,9 +545,19 @@ class Workspace:
     @staticmethod
     def is_in_databricks_environment():
+        """Return True when running on a Databricks runtime.
+        Returns:
+            True if running on Databricks, otherwise False.
+        """
         return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
     def default_tags(self):
+        """Return default resource tags for Databricks assets.
+        Returns:
+            A dict of default tags.
+        """
         return {
             k: v
             for k, v in (
@@ -451,6 +569,14 @@ class Workspace:
         }
     def merge_tags(self, existing: dict | None = None):
+        """Merge default tags with an existing set.
+        Args:
+            existing: Optional existing tags.
+        Returns:
+            A dict of merged tags.
+        """
         if existing:
             return self.default_tags()
@@ -461,6 +587,17 @@ class Workspace:
         schema_name: Optional[str] = None,
         **kwargs
     ):
+        """Return a SQLEngine configured for this workspace.
+        Args:
+            workspace: Optional workspace override.
+            catalog_name: Optional catalog name.
+            schema_name: Optional schema name.
+            **kwargs: Additional SQLEngine parameters.
+        Returns:
+            A SQLEngine instance.
+        """
         from ..sql import SQLEngine
         return SQLEngine(
@@ -476,11 +613,20 @@ class Workspace:
         cluster_name: Optional[str] = None,
         **kwargs
     ) -> "Cluster":
+        """Return a Cluster helper bound to this workspace.
+        Args:
+            cluster_id: Optional cluster id.
+            cluster_name: Optional cluster name.
+            **kwargs: Additional Cluster parameters.
+        Returns:
+            A Cluster instance.
+        """
         from ..compute.cluster import Cluster
         return Cluster(workspace=self, cluster_id=cluster_id, cluster_name=cluster_name, **kwargs)
 # ---------------------------------------------------------------------------
 # Workspace-bound base class
 # ---------------------------------------------------------------------------
@@ -490,23 +636,54 @@ DBXWorkspace = Workspace
 @dataclass
 class WorkspaceService(ABC):
+    """Base class for helpers that depend on a Workspace."""
     workspace: Workspace = dataclasses.field(default_factory=Workspace)
     def __post_init__(self):
+        """Ensure a Workspace instance is available.
+        Returns:
+            None.
+        """
         if self.workspace is None:
             self.workspace = Workspace()
     def __enter__(self):
+        """Enter a context manager and connect the workspace.
+        Returns:
+            The current WorkspaceService instance.
+        """
         self.workspace.__enter__()
         return self
     def __exit__(self, exc_type, exc_val, exc_tb):
+        """Exit the context manager and close the workspace.
+        Args:
+            exc_type: Exception type, if raised.
+            exc_val: Exception value, if raised.
+            exc_tb: Exception traceback, if raised.
+        Returns:
+            None.
+        """
         self.workspace.__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb)
     def is_in_databricks_environment(self):
+        """Return True when running on a Databricks runtime.
+        Returns:
+            True if running on Databricks, otherwise False.
+        """
         return self.workspace.is_in_databricks_environment()
     def connect(self):
+        """Connect the underlying workspace.
+        Returns:
+            The current WorkspaceService instance.
+        """
         self.workspace = self.workspace.connect()
         return self
@@ -516,6 +693,16 @@ class WorkspaceService(ABC):
         kind: Optional[DatabricksPathKind] = None,
         workspace: Optional["Workspace"] = None
     ):
+        """Create a DatabricksPath in the underlying workspace.
+        Args:
+            parts: Path parts or string to parse.
+            kind: Optional path kind override.
+            workspace: Optional workspace override.
+        Returns:
+            A DatabricksPath instance.
+        """
         return self.workspace.dbfs_path(
             kind=kind,
             parts=parts,
@@ -523,8 +710,18 @@ class WorkspaceService(ABC):
         )
     def sdk(self):
+        """Return the WorkspaceClient for the underlying workspace.
+        Returns:
+            The WorkspaceClient instance.
+        """
         return self.workspace.sdk()
     @property
     def current_user(self):
+        """Return the current Databricks user.
+        Returns:
+            The current user object from the SDK.
+        """
         return self.workspace.current_user

yggdrasil/dataclasses/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Enhanced dataclass helpers with Arrow awareness."""
 from .dataclass import yggdataclass
 __all__ = ["yggdataclass"]

yggdrasil/dataclasses/dataclass.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Dataclass helpers that integrate with Arrow schemas and safe casting."""
 import dataclasses
 from inspect import isclass
 from typing import Any, Iterable, Mapping, Tuple
@@ -18,6 +20,7 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
     Args:
         cls_or_instance: The class or instance to check.
     Returns:
         True if the class or instance
         is a yggdrasil dataclass, False otherwise.
@@ -26,6 +29,14 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
 def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
+    """Return a cached Arrow Field describing the dataclass type.
+    Args:
+        cls_or_instance: Dataclass class or instance.
+    Returns:
+        Arrow field describing the dataclass schema.
+    """
     if is_yggdataclass(cls_or_instance):
         return cls_or_instance.__arrow_field__()
@@ -58,7 +69,7 @@ def yggdataclass(
     kw_only=False, slots=False,
     weakref_slot=False
 ):
-    """Add dunder methods based on the fields defined in the class.
+    """Decorate a class with dataclass behavior plus Arrow helpers.
     Examines PEP 526 __annotations__ to determine fields.
@@ -73,7 +84,24 @@ def yggdataclass(
     """
     def wrap(c):
+        """Wrap a class with yggdrasil dataclass enhancements.
+        Args:
+            c: Class to decorate.
+        Returns:
+            Decorated dataclass type.
+        """
         def _init_public_fields(cls):
+            """Return init-enabled, public dataclass fields.
+            Args:
+                cls: Dataclass type.
+            Returns:
+                List of dataclasses.Field objects.
+            """
             return [
                 field
                 for field in dataclasses.fields(cls)
@@ -83,6 +111,11 @@ def yggdataclass(
         if not hasattr(c, "default_instance"):
             @classmethod
             def default_instance(cls):
+                """Return a default instance built from type defaults.
+                Returns:
+                    Default instance of the dataclass.
+                """
                 from yggdrasil.types import default_scalar
                 if not hasattr(cls, "__default_instance__"):
@@ -135,6 +168,14 @@ def yggdataclass(
         if not hasattr(c, "__arrow_field__"):
             @classmethod
             def __arrow_field__(cls, name: str | None = None):
+                """Return an Arrow field representing the dataclass schema.
+                Args:
+                    name: Optional override for the field name.
+                Returns:
+                    Arrow field describing the dataclass schema.
+                """
                 from yggdrasil.types.python_arrow import arrow_field_from_hint
                 return arrow_field_from_hint(cls, name=name)

yggdrasil/libs/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Helper utilities for optional dependency integrations."""
 from .sparklib import *
 from .polarslib import *
 from .pandaslib import *

yggdrasil/libs/databrickslib.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Optional Databricks SDK dependency helpers."""
 try:
     import databricks
     import databricks.sdk  # type: ignore
@@ -6,7 +8,9 @@ try:
     databricks_sdk = databricks.sdk
 except ImportError:
     class _DatabricksDummy:
+        """Placeholder object that raises if Databricks SDK is required."""
         def __getattr__(self, item):
+            """Raise an error when accessing missing Databricks SDK attributes."""
             require_databricks_sdk()
     databricks = _DatabricksDummy
@@ -14,6 +18,11 @@ except ImportError:
 def require_databricks_sdk():
+    """Ensure the Databricks SDK is available before use.
+    Returns:
+        None.
+    """
     if databricks_sdk is None:
         raise ImportError(
             "databricks_sdk is required to use this function. "

yggdrasil/libs/extensions/__init__.py CHANGED Viewed

@@ -1,2 +1,4 @@
+"""Extensions for Spark and Polars helpers."""
 from .spark_extensions import *
 from .polars_extensions import *

yggdrasil/libs/extensions/polars_extensions.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Polars DataFrame extension helpers for joins and resampling."""
 from __future__ import annotations
 import datetime
@@ -39,6 +41,14 @@ def join_coalesced(
 def _normalize_group_by(group_by: str | Sequence[str] | None) -> list[str] | None:
+    """Normalize group_by inputs into a list or None.
+    Args:
+        group_by: Grouping column or columns.
+    Returns:
+        List of column names or None.
+    """
     if group_by is None:
         return None
     if isinstance(group_by, str):
@@ -57,6 +67,15 @@ def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str,
 def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
+    """Build a Polars expression from an aggregation spec.
+    Args:
+        col: Column name to aggregate.
+        agg: Aggregation spec (expr, callable, or string).
+    Returns:
+        Polars expression.
+    """
     base = pl.col(col)
     if isinstance(agg, pl.Expr):
@@ -80,6 +99,14 @@ def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
 def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
+    """Normalize aggregation specs into a list of Polars expressions.
+    Args:
+        agg: Mapping or sequence of aggregation specs.
+    Returns:
+        List of Polars expressions.
+    """
     if isinstance(agg, Mapping):
         return [_expr_from_agg(col, spec) for col, spec in agg.items()]
@@ -91,11 +118,27 @@ def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
 def _is_datetime(dtype: object) -> bool:
+    """Return True when the dtype is a Polars datetime.
+    Args:
+        dtype: Polars dtype to inspect.
+    Returns:
+        True if dtype is Polars Datetime.
+    """
     # Datetime-only inference (per requirement), version-safe.
     return isinstance(dtype, pl.Datetime)
 def _infer_time_col(df: "pl.DataFrame") -> str:
+    """Infer the first datetime-like column name from a DataFrame.
+    Args:
+        df: Polars DataFrame to inspect.
+    Returns:
+        Column name of the first datetime field.
+    """
     # Find first Datetime column in schema order; ignore Date columns.
     for name, dtype in df.schema.items():
         if _is_datetime(dtype):
@@ -106,6 +149,15 @@ def _infer_time_col(df: "pl.DataFrame") -> str:
 def _ensure_datetime_like(df: "pl.DataFrame", time_col: str) -> "pl.DataFrame":
+    """Ensure a time column is cast to datetime for resampling.
+    Args:
+        df: Polars DataFrame.
+        time_col: Column name to validate.
+    Returns:
+        DataFrame with time column cast to datetime if needed.
+    """
     dtype = df.schema.get(time_col)
     if dtype is None:
         raise KeyError(f"resample: time_col '{time_col}' not found in DataFrame columns.")
@@ -151,6 +203,14 @@ def _timedelta_to_polars_duration(td: datetime.timedelta) -> str:
 def _normalize_duration(v: str | datetime.timedelta | None) -> str | None:
+    """Normalize duration inputs to a Polars duration string.
+    Args:
+        v: Duration string, timedelta, or None.
+    Returns:
+        Normalized duration string or None.
+    """
     if v is None:
         return None
     if isinstance(v, str):
@@ -168,6 +228,18 @@ def _upsample_single(
     offset: str | datetime.timedelta | None,
     keep_group_order: bool,
 ) -> "pl.DataFrame":
+    """Upsample a single DataFrame with normalized duration arguments.
+    Args:
+        df: Polars DataFrame to upsample.
+        time_col: Name of the time column.
+        every: Sampling interval.
+        offset: Optional offset interval.
+        keep_group_order: Preserve input order when grouping.
+    Returns:
+        Upsampled Polars DataFrame.
+    """
     df = df.sort(time_col)
     every_n = _normalize_duration(every)

ygg 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl

ygg 0.1.31py3-none-any.whl → 0.1.33py3-none-any.whl