PyPI - ygg - Versions diffs - 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl - Mend

ygg 0.1.31py3-none-any.whl → 0.1.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
ygg-0.1.32.dist-info/RECORD +60 -0
yggdrasil/__init__.py +2 -0
yggdrasil/databricks/__init__.py +2 -0
yggdrasil/databricks/compute/__init__.py +2 -0
yggdrasil/databricks/compute/cluster.py +241 -2
yggdrasil/databricks/compute/execution_context.py +100 -11
yggdrasil/databricks/compute/remote.py +16 -0
yggdrasil/databricks/jobs/__init__.py +5 -0
yggdrasil/databricks/jobs/config.py +29 -4
yggdrasil/databricks/sql/__init__.py +2 -0
yggdrasil/databricks/sql/engine.py +217 -36
yggdrasil/databricks/sql/exceptions.py +1 -0
yggdrasil/databricks/sql/statement_result.py +147 -0
yggdrasil/databricks/sql/types.py +33 -1
yggdrasil/databricks/workspaces/__init__.py +2 -1
yggdrasil/databricks/workspaces/filesytem.py +183 -0
yggdrasil/databricks/workspaces/io.py +387 -9
yggdrasil/databricks/workspaces/path.py +297 -2
yggdrasil/databricks/workspaces/path_kind.py +3 -0
yggdrasil/databricks/workspaces/workspace.py +202 -5
yggdrasil/dataclasses/__init__.py +2 -0
yggdrasil/dataclasses/dataclass.py +42 -1
yggdrasil/libs/__init__.py +2 -0
yggdrasil/libs/databrickslib.py +9 -0
yggdrasil/libs/extensions/__init__.py +2 -0
yggdrasil/libs/extensions/polars_extensions.py +72 -0
yggdrasil/libs/extensions/spark_extensions.py +116 -0
yggdrasil/libs/pandaslib.py +7 -0
yggdrasil/libs/polarslib.py +7 -0
yggdrasil/libs/sparklib.py +41 -0
yggdrasil/pyutils/__init__.py +4 -0
yggdrasil/pyutils/callable_serde.py +106 -0
yggdrasil/pyutils/exceptions.py +16 -0
yggdrasil/pyutils/modules.py +44 -1
yggdrasil/pyutils/parallel.py +29 -0
yggdrasil/pyutils/python_env.py +301 -0
yggdrasil/pyutils/retry.py +57 -0
yggdrasil/requests/__init__.py +4 -0
yggdrasil/requests/msal.py +124 -3
yggdrasil/requests/session.py +18 -0
yggdrasil/types/__init__.py +2 -0
yggdrasil/types/cast/__init__.py +2 -1
yggdrasil/types/cast/arrow_cast.py +123 -1
yggdrasil/types/cast/cast_options.py +119 -1
yggdrasil/types/cast/pandas_cast.py +29 -0
yggdrasil/types/cast/polars_cast.py +47 -0
yggdrasil/types/cast/polars_pandas_cast.py +29 -0
yggdrasil/types/cast/registry.py +176 -0
yggdrasil/types/cast/spark_cast.py +76 -0
yggdrasil/types/cast/spark_pandas_cast.py +29 -0
yggdrasil/types/cast/spark_polars_cast.py +28 -0
yggdrasil/types/libs.py +2 -0
yggdrasil/types/python_arrow.py +191 -0
yggdrasil/types/python_defaults.py +73 -0
yggdrasil/version.py +1 -0
ygg-0.1.31.dist-info/RECORD +0 -59
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
{ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0

yggdrasil/databricks/sql/statement_result.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Result wrapper for Databricks SQL statement execution."""
 import dataclasses
 import threading
 import time
@@ -49,6 +51,7 @@ __all__ = [
 @dataclasses.dataclass
 class StatementResult:
+    """Container for statement responses, data extraction, and conversions."""
     engine: "SQLEngine"
     statement_id: str
     disposition: "Disposition"
@@ -60,6 +63,11 @@ class StatementResult:
     _arrow_table: Optional[pa.Table] = dataclasses.field(default=None, repr=False)
     def __getstate__(self):
+        """Serialize statement results, converting Spark dataframes to Arrow.
+        Returns:
+            A pickle-ready state dictionary.
+        """
         state = self.__dict__.copy()
         _spark_df = state.pop("_spark_df", None)
@@ -70,29 +78,54 @@ class StatementResult:
         return state
     def __setstate__(self, state):
+        """Restore statement result state, rehydrating cached data.
+        Args:
+            state: Serialized state dictionary.
+        """
         _spark_df = state.pop("_spark_df")
     def __iter__(self):
+        """Iterate over Arrow record batches."""
         return self.to_arrow_batches()
     @property
     def is_spark_sql(self):
+        """Return True when this result was produced by Spark SQL."""
         return self._spark_df is not None
     @property
     def response(self):
+        """Return the latest statement response, refreshing when needed.
+        Returns:
+            The current StatementResponse object.
+        """
         if self._response is None and not self.is_spark_sql:
             self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
         return self._response
     @response.setter
     def response(self, value: "StatementResponse"):
+        """Update the cached response and refresh timestamp.
+        Args:
+            value: StatementResponse to cache.
+        """
         self._response = value
         self._response_refresh_time = time.time()
         self.statement_id = self._response.statement_id
     def fresh_response(self, delay: float):
+        """Refresh the response if it is older than ``delay`` seconds.
+        Args:
+            delay: Minimum age in seconds before refreshing.
+        Returns:
+            The refreshed StatementResponse object.
+        """
         if self.is_spark_sql:
             return self._response
@@ -102,6 +135,14 @@ class StatementResult:
         return self._response
     def result_data_at(self, chunk_index: int):
+        """Fetch a specific result chunk by index.
+        Args:
+            chunk_index: Result chunk index to retrieve.
+        Returns:
+            The SDK result chunk response.
+        """
         sdk = self.workspace.sdk()
         return sdk.statement_execution.get_statement_result_chunk_n(
@@ -111,10 +152,20 @@ class StatementResult:
     @property
     def workspace(self):
+        """Expose the underlying workspace from the engine.
+        Returns:
+            The Workspace instance backing this statement.
+        """
         return self.engine.workspace
     @property
     def status(self):
+        """Return the statement status, handling persisted data.
+        Returns:
+            A StatementStatus object.
+        """
         if self.persisted:
             return StatementStatus(
                 state=StatementState.SUCCEEDED
@@ -129,20 +180,40 @@ class StatementResult:
     @property
     def state(self):
+        """Return the statement state.
+        Returns:
+            The StatementState enum value.
+        """
         return self.status.state
     @property
     def manifest(self):
+        """Return the SQL result manifest, if available.
+        Returns:
+            The result manifest or None for Spark SQL results.
+        """
         if self.is_spark_sql:
             return None
         return self.response.manifest
     @property
     def result(self):
+        """Return the raw statement result object.
+        Returns:
+            The statement result payload from the API.
+        """
         return self.response.result
     @property
     def done(self):
+        """Return True when the statement is in a terminal state.
+        Returns:
+            True if the statement is done, otherwise False.
+        """
         if self.persisted:
             return True
@@ -155,6 +226,11 @@ class StatementResult:
     @property
     def failed(self):
+        """Return True when the statement failed or was cancelled.
+        Returns:
+            True if the statement failed or was cancelled.
+        """
         if self.persisted:
             return True
@@ -165,14 +241,29 @@ class StatementResult:
     @property
     def persisted(self):
+        """Return True when data is cached locally.
+        Returns:
+            True when cached Arrow or Spark data is present.
+        """
         return self._spark_df is not None or self._arrow_table is not None
     def persist(self):
+        """Cache the statement result locally as Arrow data.
+        Returns:
+            The current StatementResult instance.
+        """
         if not self.persisted:
             self._arrow_table = self.to_arrow_table()
         return self
     def external_links(self):
+        """Yield external result links for EXTERNAL_LINKS dispositions.
+        Yields:
+            External link objects in result order.
+        """
         assert self.disposition == Disposition.EXTERNAL_LINKS, "Cannot get from %s, disposition %s != %s" % (
             self, self.disposition, Disposition.EXTERNAL_LINKS
         )
@@ -222,6 +313,11 @@ class StatementResult:
                 )
     def raise_for_status(self):
+        """Raise a ValueError if the statement failed.
+        Returns:
+            None.
+        """
         if self.failed:
             # grab error info if present
             err = self.status.error
@@ -244,6 +340,15 @@ class StatementResult:
         timeout: Optional[int] = None,
         poll_interval: Optional[float] = None
     ):
+        """Wait for statement completion with optional timeout.
+        Args:
+            timeout: Maximum seconds to wait.
+            poll_interval: Initial poll interval in seconds.
+        Returns:
+            The current StatementResult instance.
+        """
         if self.done:
             return self
@@ -265,6 +370,11 @@ class StatementResult:
         return current
     def arrow_schema(self):
+        """Return the Arrow schema for the result.
+        Returns:
+            An Arrow Schema instance.
+        """
         if self.persisted:
             if self._arrow_table is not None:
                 return self._arrow_table.schema
@@ -277,6 +387,14 @@ class StatementResult:
         return pa.schema(fields)
     def to_arrow_table(self, parallel_pool: Optional[int] = 4) -> pa.Table:
+        """Collect the statement result into a single Arrow table.
+        Args:
+            parallel_pool: Maximum parallel fetch workers.
+        Returns:
+            An Arrow Table containing all rows.
+        """
         if self.persisted:
             if self._arrow_table:
                 return self._arrow_table
@@ -295,6 +413,14 @@ class StatementResult:
         self,
         parallel_pool: Optional[int] = 4
     ) -> Iterator[pa.RecordBatch]:
+        """Stream the result as Arrow record batches.
+        Args:
+            parallel_pool: Maximum parallel fetch workers.
+        Yields:
+            Arrow RecordBatch objects.
+        """
         if self.persisted:
             if self._arrow_table is not None:
                 for batch in self._arrow_table.to_batches(max_chunksize=64 * 1024):
@@ -379,15 +505,36 @@ class StatementResult:
         self,
         parallel_pool: Optional[int] = 4
     ) -> "pandas.DataFrame":
+        """Return the result as a pandas DataFrame.
+        Args:
+            parallel_pool: Maximum parallel fetch workers.
+        Returns:
+            A pandas DataFrame with the result rows.
+        """
         return self.to_arrow_table(parallel_pool=parallel_pool).to_pandas()
     def to_polars(
         self,
         parallel_pool: Optional[int] = 4
     ) -> "polars.DataFrame":
+        """Return the result as a polars DataFrame.
+        Args:
+            parallel_pool: Maximum parallel fetch workers.
+        Returns:
+            A polars DataFrame with the result rows.
+        """
         return polars.from_arrow(self.to_arrow_table(parallel_pool=parallel_pool))
     def to_spark(self):
+        """Return the result as a Spark DataFrame, caching it locally.
+        Returns:
+            A Spark DataFrame with the result rows.
+        """
         if self._spark_df:
             return self._spark_df

yggdrasil/databricks/sql/types.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Type utilities for Databricks SQL metadata and Arrow."""
 import json
 import re
 from typing import Union
@@ -86,6 +88,14 @@ _struct_re = re.compile(r"^STRUCT\s*<\s*(.+)\s*>$", re.IGNORECASE)
 def _split_top_level_commas(s: str):
+    """Split a type string by commas, respecting nested angle brackets.
+    Args:
+        s: Type string to split.
+    Returns:
+        A list of top-level comma-separated parts.
+    """
     parts, cur, depth = [], [], 0
     for ch in s:
         if ch == '<':
@@ -103,6 +113,14 @@ def _split_top_level_commas(s: str):
 def _safe_bytes(obj):
+    """Convert an object to UTF-8 bytes, with safe handling for None.
+    Args:
+        obj: Value to convert.
+    Returns:
+        UTF-8 encoded bytes.
+    """
     if not isinstance(obj, bytes):
         if not obj:
             return b""
@@ -120,6 +138,12 @@ def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
       - looks up base types in STRING_TYPE_MAP (expects uppercase keys)
       - supports DECIMAL(p,s), ARRAY<...>, MAP<k,v>, STRUCT<...> recursively
       - raises ValueError if it cannot map the provided type string
+    Args:
+        type_str: SQL type string to parse.
+    Returns:
+        The corresponding Arrow DataType.
     """
     if not type_str:
         raise ValueError("Empty type string")
@@ -177,6 +201,14 @@ def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
 def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
+    """Convert Databricks SQL/Catalog column info into an Arrow field.
+    Args:
+        col: ColumnInfo from SQL or Catalog APIs.
+    Returns:
+        An Arrow Field for the column.
+    """
     arrow_type = parse_sql_type_to_pa(col.type_text)
     if isinstance(col, CatalogColumnInfo):
@@ -198,4 +230,4 @@ def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
         arrow_type,
         nullable=nullable,
         metadata=md
-    )
+    )

yggdrasil/databricks/workspaces/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
+"""Workspace, filesystem, and path utilities for Databricks."""
 from .workspace import *
 from .path import *
 from .io import *

yggdrasil/databricks/workspaces/filesytem.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""PyArrow filesystem wrappers for Databricks paths."""
 __all__ = [
     "DatabricksFileSystem",
     "DatabricksFileSystemHandler"
@@ -14,26 +16,60 @@ if TYPE_CHECKING:
 class DatabricksFileSystemHandler(FileSystemHandler):
+    """PyArrow FileSystemHandler backed by Databricks paths."""
     def __init__(
         self,
         workspace: "Workspace",
     ):
+        """Create a handler bound to a Workspace.
+        Args:
+            workspace: Workspace instance to use.
+        """
         super().__init__()
         self.workspace = workspace
     def __enter__(self):
+        """Enter a context manager and connect to the workspace.
+        Returns:
+            A connected DatabricksFileSystemHandler instance.
+        """
         return self.connect(clone=True)
     def __exit__(self, exc_type, exc_val, exc_tb):
+        """Exit the context manager and close the workspace.
+        Args:
+            exc_type: Exception type, if raised.
+            exc_val: Exception value, if raised.
+            exc_tb: Exception traceback, if raised.
+        """
         self.workspace.__exit__(exc_type, exc_val, exc_tb)
     def _parse_path(self, obj: Any) -> "DatabricksPath":
+        """Parse a path-like object into a DatabricksPath.
+        Args:
+            obj: Path-like object to parse.
+        Returns:
+            A DatabricksPath instance.
+        """
         from .path import DatabricksPath
         return DatabricksPath.parse(obj, workspace=self.workspace)
     def connect(self, clone: bool = True):
+        """Connect the workspace and optionally return a cloned handler.
+        Args:
+            clone: Whether to return a cloned handler.
+        Returns:
+            A connected handler.
+        """
         workspace = self.connect(clone=clone)
         if clone:
@@ -45,9 +81,21 @@ class DatabricksFileSystemHandler(FileSystemHandler):
         return self
     def close(self):
+        """Close the underlying workspace client.
+        Returns:
+            None.
+        """
         self.workspace.close()
     def copy_file(self, src, dest, *, chunk_size: int = 4 * 1024 * 1024):
+        """Copy a file between Databricks paths.
+        Args:
+            src: Source path.
+            dest: Destination path.
+            chunk_size: Chunk size in bytes.
+        """
         src = self._parse_path(src)
         dest = self._parse_path(dest)
@@ -59,24 +107,66 @@ class DatabricksFileSystemHandler(FileSystemHandler):
                 w.write(chunk)
     def create_dir(self, path, *args, recursive: bool = True, **kwargs):
+        """Create a directory at the given path.
+        Args:
+            path: Directory path to create.
+            recursive: Whether to create parents.
+        Returns:
+            The created DatabricksPath instance.
+        """
         return self._parse_path(path).mkdir(parents=recursive)
     def delete_dir(self, path):
+        """Delete a directory recursively.
+        Args:
+            path: Directory path to delete.
+        """
         return self._parse_path(path).rmdir(recursive=True)
     def delete_dir_contents(self, path, *args, accept_root_dir: bool = False, **kwargs):
+        """Delete the contents of a directory.
+        Args:
+            path: Directory path whose contents should be removed.
+            accept_root_dir: Whether to allow deleting root contents.
+        """
         return self._parse_path(path).rmdir(recursive=True)
     def delete_root_dir_contents(self):
+        """Delete the contents of the root directory."""
         return self.delete_dir_contents("/", accept_root_dir=True)
     def delete_file(self, path):
+        """Delete a single file.
+        Args:
+            path: File path to delete.
+        """
         return self._parse_path(path).rmfile()
     def equals(self, other: FileSystem):
+        """Return True if the filesystem handler matches another.
+        Args:
+            other: Another FileSystem instance.
+        Returns:
+            True if equal, otherwise False.
+        """
         return self == other
     def from_uri(self, uri):
+        """Return a handler for the workspace in the provided URI.
+        Args:
+            uri: URI or path to parse.
+        Returns:
+            A DatabricksFileSystemHandler for the URI.
+        """
         uri = self._parse_path(uri)
         return self.__class__(
@@ -87,6 +177,14 @@ class DatabricksFileSystemHandler(FileSystemHandler):
         self,
         paths_or_selector: Union[FileSelector, str, "DatabricksPath", List[Union[str, "DatabricksPath"]]]
     ) -> Union[FileInfo, List[FileInfo]]:
+        """Return FileInfo objects for paths or selectors.
+        Args:
+            paths_or_selector: Path(s) or a FileSelector.
+        Returns:
+            A FileInfo or list of FileInfo objects.
+        """
         from .path import DatabricksPath
         if isinstance(paths_or_selector, (str, DatabricksPath)):
@@ -106,6 +204,14 @@ class DatabricksFileSystemHandler(FileSystemHandler):
         self,
         selector: FileSelector
     ):
+        """Return FileInfo entries for a FileSelector.
+        Args:
+            selector: FileSelector describing the listing.
+        Returns:
+            A list of FileInfo entries.
+        """
         base_dir = self._parse_path(selector.base_dir)
         return [
@@ -117,9 +223,20 @@ class DatabricksFileSystemHandler(FileSystemHandler):
         ]
     def get_type_name(self):
+        """Return the filesystem type name.
+        Returns:
+            The filesystem type name string.
+        """
         return "dbfs"
     def move(self, src, dest):
+        """Move a file by copying then deleting.
+        Args:
+            src: Source path.
+            dest: Destination path.
+        """
         src = self._parse_path(src)
         src.copy_to(dest)
@@ -127,6 +244,14 @@ class DatabricksFileSystemHandler(FileSystemHandler):
         src.remove(recursive=True)
     def normalize_path(self, path):
+        """Normalize a path to a full Databricks path string.
+        Args:
+            path: Path to normalize.
+        Returns:
+            The normalized full path string.
+        """
         return self._parse_path(path).full_path()
     def open(
@@ -135,12 +260,43 @@ class DatabricksFileSystemHandler(FileSystemHandler):
         mode: str = "r+",
         encoding: Optional[str] = None,
     ):
+        """Open a file path as a Databricks IO stream.
+        Args:
+            path: Path to open.
+            mode: File mode string.
+            encoding: Optional text encoding.
+        Returns:
+            A DatabricksIO instance.
+        """
         return self._parse_path(path).open(mode=mode, encoding=encoding, clone=False)
     def open_append_stream(self, path, compression='detect', buffer_size=None, metadata=None):
+        """Open an append stream.
+        Args:
+            path: Path to open.
+            compression: Optional compression hint.
+            buffer_size: Optional buffer size.
+            metadata: Optional metadata.
+        Returns:
+            A DatabricksIO instance.
+        """
         return self._parse_path(path).open(mode="ab")
     def open_input_file(self, path, mode: str = "rb", **kwargs):
+        """Open an input file as a PyArrow PythonFile.
+        Args:
+            path: Path to open.
+            mode: File mode string.
+            **kwargs: Additional options.
+        Returns:
+            A PyArrow PythonFile instance.
+        """
         buf = self._parse_path(path).open(mode=mode).connect(clone=True)
         return PythonFile(
@@ -149,13 +305,40 @@ class DatabricksFileSystemHandler(FileSystemHandler):
         )
     def open_input_stream(self, path, compression='detect', buffer_size=None):
+        """Open an input stream for reading bytes.
+        Args:
+            path: Path to open.
+            compression: Optional compression hint.
+            buffer_size: Optional buffer size.
+        Returns:
+            A DatabricksIO instance.
+        """
         return self._parse_path(path).open(mode="rb")
     def open_output_stream(self, path, compression='detect', buffer_size=None, metadata=None):
+        """Open an output stream for writing bytes.
+        Args:
+            path: Path to open.
+            compression: Optional compression hint.
+            buffer_size: Optional buffer size.
+            metadata: Optional metadata.
+        Returns:
+            A DatabricksIO instance.
+        """
         return self._parse_path(path).open(mode="wb")
 class DatabricksFileSystem(PyFileSystem):
+    """PyArrow filesystem wrapper for Databricks paths."""
     def __init__(self, handler): # real signature unknown; restored from __doc__
+        """Initialize the filesystem with a handler.
+        Args:
+            handler: FileSystemHandler instance.
+        """
         super().__init__(handler)

ygg 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl

ygg 0.1.31py3-none-any.whl → 0.1.32py3-none-any.whl