PyPI - ygg - Versions diffs - 0.1.34__py3-none-any.whl → 0.1.37__py3-none-any.whl - Mend

ygg 0.1.34py3-none-any.whl → 0.1.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/METADATA +1 -1
{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/RECORD +13 -12
yggdrasil/databricks/compute/cluster.py +48 -17
yggdrasil/databricks/compute/execution_context.py +2 -2
yggdrasil/databricks/compute/remote.py +25 -8
yggdrasil/databricks/sql/engine.py +43 -27
yggdrasil/databricks/sql/statement_result.py +36 -44
yggdrasil/pyutils/equality.py +107 -0
yggdrasil/version.py +1 -1
{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/WHEEL +0 -0
{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/entry_points.txt +0 -0
{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/licenses/LICENSE +0 -0
{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/top_level.txt +0 -0

{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ygg
-Version: 0.1.34
+Version: 0.1.37
 Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
 Author: Yggdrasil contributors
 License:                                  Apache License

{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
-ygg-0.1.34.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+ygg-0.1.37.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
 yggdrasil/__init__.py,sha256=PfH7Xwt6uue6oqe6S5V8NhDJcVQClkKrBE1KXhdelZc,117
-yggdrasil/version.py,sha256=cIz48TZT2Xc-LLdWHdfAlxnIA0OSZqt42ZJcukkGo6s,22
+yggdrasil/version.py,sha256=bC2HSZRduanhYcwfv2uqbh4LgiwM3nV4LyoWJhD4ftY,22
 yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
 yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
-yggdrasil/databricks/compute/cluster.py,sha256=KUyGcpEKiA5XgAbeX1iHzuhJ4pucFqch_galZwYJlnc,39599
-yggdrasil/databricks/compute/execution_context.py,sha256=Z0EvkhdR803Kh1UOh4wR0oyyLXzAJo4Lj5CRNmxW4q4,22287
-yggdrasil/databricks/compute/remote.py,sha256=rrqLMnzI0KvhXghtOrve3W-rudi-cTjS-8dJXKjHM3A,2266
+yggdrasil/databricks/compute/cluster.py,sha256=mnNzjCx7X3iK22oZ7K3pqot0AXq9JTdg97kT61j2_UU,40729
+yggdrasil/databricks/compute/execution_context.py,sha256=nxrNXoarq_JAB-Cpj0udHhq2jx-DmMbRWJdAezLrPis,22347
+yggdrasil/databricks/compute/remote.py,sha256=nEN_Fr1Ouul_iKOf4B5QjEGscYAcl7nHjGsl2toRzrU,2874
 yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
 yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
 yggdrasil/databricks/sql/__init__.py,sha256=y1n5yg-drZ8QVZbEgznsRG24kdJSnFis9l2YfYCsaCM,234
-yggdrasil/databricks/sql/engine.py,sha256=weYHosCVc9CZYaVooexEphNw6W_Ex0dphuGbfA48mEI,41104
+yggdrasil/databricks/sql/engine.py,sha256=kUFBddJJQC0AgDqH0l7GFs7d_Ony5rc8fOv4inLU6Vw,41051
 yggdrasil/databricks/sql/exceptions.py,sha256=Jqd_gT_VyPL8klJEHYEzpv5eHtmdY43WiQ7HZBaEqSk,53
-yggdrasil/databricks/sql/statement_result.py,sha256=VlHXhTcvTVya_2aJ-uUfUooZF_MqQuOZ8k7g6PBDhOM,17227
+yggdrasil/databricks/sql/statement_result.py,sha256=KXBLbEpwrjrAeH0ezKNlaa6Vm3jbG3R0ZCnEFHvqpoQ,16834
 yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
 yggdrasil/databricks/workspaces/__init__.py,sha256=Ti1I99JTC3koYJaCy8WYvkAox4KdcuMRk8b2rHroWCY,133
 yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
@@ -31,6 +31,7 @@ yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo
 yggdrasil/libs/extensions/spark_extensions.py,sha256=E64n-3SFTDgMuXwWitX6vOYP9ln2lpGKb0htoBLEZgc,16745
 yggdrasil/pyutils/__init__.py,sha256=tl-LapAc71TV7RMgf2ftKwrzr8iiLOGHeJgA3RvO93w,293
 yggdrasil/pyutils/callable_serde.py,sha256=euY7Kiy04i1tpWKuB0b2qQ1FokLC3nq0cv7PObWYUBE,21809
+yggdrasil/pyutils/equality.py,sha256=Xyf8D1dLUCm3spDEir8Zyj7O4US_fBJwEylJCfJ9slI,3080
 yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWgng,3388
 yggdrasil/pyutils/expiring_dict.py,sha256=q9gb09-2EUN-jQZumUw5BXOQGYcj1wb85qKtQlciSxg,5825
 yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
@@ -54,8 +55,8 @@ yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgT
 yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
 yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
 yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
-ygg-0.1.34.dist-info/METADATA,sha256=iGQcUq6tGnBBLiVo9jPak9PE-Ma8wWPxY2BsWKLGC2w,19204
-ygg-0.1.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ygg-0.1.34.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
-ygg-0.1.34.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
-ygg-0.1.34.dist-info/RECORD,,
+ygg-0.1.37.dist-info/METADATA,sha256=QOawaiOu5RrOUAhuIws2wNB1Nj3CQq38desRezzYMwk,19204
+ygg-0.1.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ygg-0.1.37.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
+ygg-0.1.37.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
+ygg-0.1.37.dist-info/RECORD,,

yggdrasil/databricks/compute/cluster.py CHANGED Viewed

@@ -24,6 +24,7 @@ from .execution_context import ExecutionContext
 from ..workspaces.workspace import WorkspaceService, Workspace
 from ... import retry, CallableSerde
 from ...libs.databrickslib import databricks_sdk
+from ...pyutils.equality import dicts_equal, dict_diff
 from ...pyutils.expiring_dict import ExpiringDict
 from ...pyutils.modules import PipIndexSettings
 from ...pyutils.python_env import PythonEnv
@@ -110,7 +111,7 @@ class Cluster(WorkspaceService):
     _details: Optional["ClusterDetails"] = dataclasses.field(default=None, repr=False)
     _details_refresh_time: float = dataclasses.field(default=0, repr=False)
-    _system_context: Optional[ExecutionContext] = None
+    _system_context: Optional[ExecutionContext] = dataclasses.field(default=None, repr=False)
     # host → Cluster instance
     _env_clusters: ClassVar[Dict[str, "Cluster"]] = {}
@@ -309,6 +310,11 @@ class Cluster(WorkspaceService):
             self.details = self.clusters_client().get(cluster_id=self.cluster_id)
         return self._details
+    def refresh(self, max_delay: float | None = None):
+        self.details = self.fresh_details(max_delay=max_delay)
+        return self
     @details.setter
     def details(self, value: "ClusterDetails"):
         """Cache cluster details and update identifiers."""
@@ -321,10 +327,10 @@ class Cluster(WorkspaceService):
     @property
     def state(self):
         """Return the current cluster state."""
-        details = self.fresh_details(max_delay=10)
+        self.refresh()
-        if details is not None:
-            return details.state
+        if self._details is not None:
+            return self._details.state
         return State.UNKNOWN
     @property
@@ -355,7 +361,7 @@ class Cluster(WorkspaceService):
     def wait_for_status(
         self,
         tick: float = 0.5,
-        timeout: float = 600,
+        timeout: Union[float, dt.timedelta] = 600,
         backoff: int = 2,
         max_sleep_time: float = 15
     ):
@@ -373,6 +379,9 @@ class Cluster(WorkspaceService):
         start = time.time()
         sleep_time = tick
+        if isinstance(timeout, dt.timedelta):
+            timeout = timeout.total_seconds()
         while self.is_pending:
             time.sleep(sleep_time)
@@ -658,8 +667,6 @@ class Cluster(WorkspaceService):
         Returns:
             The updated Cluster instance.
         """
-        self.install_libraries(libraries=libraries, wait_timeout=None, raise_error=False)
         existing_details = {
             k: v
             for k, v in self.details.as_shallow_dict().items()
@@ -672,22 +679,36 @@ class Cluster(WorkspaceService):
             if k in _EDIT_ARG_NAMES
         }
-        if update_details != existing_details:
+        same = dicts_equal(
+            existing_details,
+            update_details,
+            keys=_EDIT_ARG_NAMES,
+            treat_missing_as_none=True,
+            float_tol=0.0,  # set e.g. 1e-6 if you have float-y stuff
+        )
+        if not same:
+            diff = {
+                k: v[1]
+                for k, v in dict_diff(existing_details, update_details, keys=_EDIT_ARG_NAMES).items()
+            }
             logger.debug(
                 "Updating %s with %s",
-                self, update_details
+                self, diff
             )
             self.wait_for_status()
-            self.details = retry(tries=4, delay=0.5, max_delay=2)(
-                self.clusters_client().edit_and_wait
-            )(**update_details)
+            self.details = self.clusters_client().edit(**update_details)
+            self.wait_for_status()
             logger.info(
                 "Updated %s",
                 self
             )
+        self.install_libraries(libraries=libraries, wait_timeout=None, raise_error=False)
         return self
     def list_clusters(self) -> Iterator["Cluster"]:
@@ -742,7 +763,10 @@ class Cluster(WorkspaceService):
                 return None
             return Cluster(
-                workspace=self.workspace, cluster_id=details.cluster_id, _details=details
+                workspace=self.workspace,
+                cluster_id=details.cluster_id,
+                cluster_name=details.cluster_name,
+                _details=details
             )
         for cluster in self.list_clusters():
@@ -760,16 +784,18 @@ class Cluster(WorkspaceService):
     def ensure_running(
         self,
+        wait_timeout: Optional[dt.timedelta] = dt.timedelta(minutes=20)
     ) -> "Cluster":
         """Ensure the cluster is running.
         Returns:
             The current Cluster instance.
         """
-        return self.start()
+        return self.start(wait_timeout=wait_timeout)
     def start(
         self,
+        wait_timeout: Optional[dt.timedelta] = dt.timedelta(minutes=20)
     ) -> "Cluster":
         """Start the cluster if it is not already running.
@@ -780,8 +806,13 @@ class Cluster(WorkspaceService):
         if not self.is_running:
             logger.info("Starting %s", self)
-            self.details = self.clusters_client().start_and_wait(cluster_id=self.cluster_id)
-            return self.wait_installed_libraries()
+            if wait_timeout:
+                self.clusters_client().start(cluster_id=self.cluster_id)
+                self.wait_for_status(timeout=wait_timeout.total_seconds())
+                self.wait_installed_libraries(timeout=wait_timeout)
+            else:
+                self.clusters_client().start(cluster_id=self.cluster_id)
         return self
@@ -1124,7 +1155,7 @@ class Cluster(WorkspaceService):
                     "Waiting %s to install libraries timed out" % self
                 )
-            time.sleep(10)
+            time.sleep(5)
             statuses = list(self.installed_library_statuses())
         return self

yggdrasil/databricks/compute/execution_context.py CHANGED Viewed

@@ -78,8 +78,8 @@ class ExecutionContext:
     language: Optional["Language"] = None
     context_id: Optional[str] = None
-    _was_connected: Optional[bool] = None
-    _remote_metadata: Optional[RemoteMetadata] = None
+    _was_connected: Optional[bool] = dc.field(default=None, repr=False)
+    _remote_metadata: Optional[RemoteMetadata] = dc.field(default=None, repr=False)
     _lock: threading.RLock = dc.field(default_factory=threading.RLock, init=False, repr=False)

yggdrasil/databricks/compute/remote.py CHANGED Viewed

@@ -2,11 +2,12 @@
 import datetime as dt
 import logging
+import os
 from typing import (
     Callable,
     Optional,
     TypeVar,
-    List, TYPE_CHECKING,
+    List, TYPE_CHECKING, Union,
 )
 if TYPE_CHECKING:
@@ -25,10 +26,15 @@ ReturnType = TypeVar("ReturnType")
 logger = logging.getLogger(__name__)
+def identity(x):
+    return x
 def databricks_remote_compute(
+    _func: Optional[Callable] = None,
     cluster_id: Optional[str] = None,
     cluster_name: Optional[str] = None,
-    workspace: Optional[Workspace] = None,
+    workspace: Optional[Union[Workspace, str]] = None,
     cluster: Optional["Cluster"] = None,
     timeout: Optional[dt.timedelta] = None,
     env_keys: Optional[List[str]] = None,
@@ -38,6 +44,7 @@ def databricks_remote_compute(
     """Return a decorator that executes functions on a remote cluster.
     Args:
+        _func: function to decorate
         cluster_id: Optional cluster id to target.
         cluster_name: Optional cluster name to target.
         workspace: Workspace instance or host string for lookup.
@@ -51,13 +58,19 @@ def databricks_remote_compute(
         A decorator that runs functions on the resolved Databricks cluster.
     """
     if force_local or Workspace.is_in_databricks_environment():
-        def identity(x):
-            return x
+        return identity if _func is None else _func
+    if workspace is None:
+        workspace = os.getenv("DATABRICKS_HOST")
-        return identity
+    if workspace is None:
+        return identity if _func is None else _func
-    if isinstance(workspace, str):
-        workspace = Workspace(host=workspace)
+    if not isinstance(workspace, Workspace):
+        if isinstance(workspace, str):
+            workspace = Workspace(host=workspace).connect(clone=False)
+        else:
+            raise ValueError("Cannot initialize databricks workspace with %s" % type(workspace))
     if cluster is None:
         if cluster_id or cluster_name:
@@ -68,10 +81,14 @@ def databricks_remote_compute(
         else:
             cluster = workspace.clusters().replicated_current_environment(
                 workspace=workspace,
-                cluster_name=cluster_name
+                cluster_name=cluster_name,
+                single_user_name=workspace.current_user.user_name
             )
+    cluster.ensure_running(wait_timeout=None)
     return cluster.execution_decorator(
+        _func=_func,
         env_keys=env_keys,
         timeout=timeout,
         **options

yggdrasil/databricks/sql/engine.py CHANGED Viewed

@@ -198,8 +198,7 @@ class SQLEngine(WorkspaceService):
         """Short, single-line preview for logs (avoids spewing giant SQL)."""
         if not sql:
             return ""
-        one_line = " ".join(sql.split())
-        return one_line[:limit] + ("…" if len(one_line) > limit else "")
+        return sql[:limit] + ("…" if len(sql) > limit else "")
     def execute(
         self,
@@ -218,7 +217,6 @@ class SQLEngine(WorkspaceService):
         schema_name: Optional[str] = None,
         table_name: Optional[str] = None,
         wait_result: bool = True,
-        **kwargs,
     ) -> "StatementResult":
         """Execute a SQL statement via Spark or Databricks SQL Statement Execution API.
@@ -245,7 +243,6 @@ class SQLEngine(WorkspaceService):
             schema_name: Optional schema override for API engine.
             table_name: Optional table override used when `statement` is None.
             wait_result: Whether to block until completion (API engine).
-            **kwargs: Extra params forwarded to Databricks SDK execute_statement.
         Returns:
             StatementResult.
@@ -263,9 +260,12 @@ class SQLEngine(WorkspaceService):
             if spark_session is None:
                 raise ValueError("No spark session found to run sql query")
-            t0 = time.time()
-            df = spark_session.sql(statement)
-            logger.info("Spark SQL executed in %.3fs: %s", time.time() - t0, self._sql_preview(statement))
+            df: SparkDataFrame = spark_session.sql(statement)
+            if row_limit:
+                df = df.limit(row_limit)
+            logger.info("Spark SQL executed: %s", self._sql_preview(statement))
             # Avoid Disposition dependency if SDK imports are absent
             spark_disp = disposition if disposition is not None else getattr(globals().get("Disposition", object), "EXTERNAL_LINKS", None)
@@ -287,7 +287,6 @@ class SQLEngine(WorkspaceService):
         if not statement:
             full_name = self.table_full_name(catalog_name=catalog_name, schema_name=schema_name, table_name=table_name)
             statement = f"SELECT * FROM {full_name}"
-            logger.debug("Autogenerated statement: %s", self._sql_preview(statement))
         if not warehouse_id:
             warehouse_id = self._get_or_default_warehouse_id()
@@ -314,7 +313,11 @@ class SQLEngine(WorkspaceService):
             disposition=disposition,
         )
-        # BUGFIX: previously returned `wait_result` (a bool) on wait_result=False 🤦
+        logger.info(
+            "API SQL executed: %s",
+            self._sql_preview(statement)
+        )
         return execution.wait() if wait_result else execution
     def spark_table(
@@ -465,15 +468,7 @@ class SQLEngine(WorkspaceService):
             safe_chars=True,
         )
-        logger.info(
-            "Arrow insert into %s (mode=%s, match_by=%s, zorder_by=%s)",
-            location,
-            mode,
-            match_by,
-            zorder_by,
-        )
-        with self as connected:
+        with self.connect() as connected:
             if existing_schema is None:
                 try:
                     existing_schema = connected.get_table_schema(
@@ -482,7 +477,6 @@ class SQLEngine(WorkspaceService):
                         table_name=table_name,
                         to_arrow_schema=True,
                     )
-                    logger.debug("Fetched existing schema for %s (columns=%d)", location, len(existing_schema.names))
                 except ValueError as exc:
                     data_tbl = convert(data, pa.Table)
                     existing_schema = data_tbl.schema
@@ -527,7 +521,20 @@ class SQLEngine(WorkspaceService):
             transaction_id = self._random_suffix()
-            data_tbl = convert(data, pa.Table, options=cast_options, target_field=existing_schema)
+            data_tbl = convert(
+                data, pa.Table,
+                options=cast_options, target_field=existing_schema
+            )
+            num_rows = data_tbl.num_rows
+            logger.debug(
+                "Arrow inserting %s rows into %s (mode=%s, match_by=%s, zorder_by=%s)",
+                num_rows,
+                location,
+                mode,
+                match_by,
+                zorder_by,
+            )
             # Write in temp volume
             temp_volume_path = connected.dbfs_path(
@@ -545,7 +552,6 @@ class SQLEngine(WorkspaceService):
             statements: list[str] = []
             if match_by:
-                logger.info("Using MERGE INTO (match_by=%s)", match_by)
                 on_condition = " AND ".join([f"T.`{k}` = S.`{k}`" for k in match_by])
                 update_cols = [c for c in columns if c not in match_by]
@@ -588,6 +594,15 @@ FROM parquet.`{temp_volume_path}`"""
                 except Exception:
                     logger.exception("Failed cleaning temp volume: %s", temp_volume_path)
+            logger.info(
+                "Arrow inserted %s rows into %s (mode=%s, match_by=%s, zorder_by=%s)",
+                num_rows,
+                location,
+                mode,
+                match_by,
+                zorder_by,
+            )
             if zorder_by:
                 zcols = ", ".join([f"`{c}`" for c in zorder_by])
                 optimize_sql = f"OPTIMIZE {location} ZORDER BY ({zcols})"
@@ -675,7 +690,6 @@ FROM parquet.`{temp_volume_path}`"""
                 table_name=table_name,
                 to_arrow_schema=False,
             )
-            logger.debug("Fetched destination Spark schema for %s", location)
         except ValueError:
             logger.warning("Destination table missing; creating table %s via overwrite write", location)
             data = convert(data, pyspark.sql.DataFrame)
@@ -704,10 +718,8 @@ FROM parquet.`{temp_volume_path}`"""
         if match_by:
             cond = " AND ".join([f"t.`{k}` <=> s.`{k}`" for k in match_by])
-            logger.info("Running Delta MERGE (cond=%s)", cond)
             if mode.casefold() == "overwrite":
-                logger.info("Overwrite-by-key mode: delete matching keys then append")
                 data = data.cache()
                 distinct_keys = data.select([f"`{k}`" for k in match_by]).distinct()
@@ -815,6 +827,7 @@ FROM parquet.`{temp_volume_path}`"""
         optimize_write: bool = True,
         auto_compact: bool = True,
         execute: bool = True,
+        wait_result: bool = True
     ) -> Union[str, "StatementResult"]:
         """Generate (and optionally execute) CREATE TABLE DDL from an Arrow schema/field.
@@ -832,6 +845,7 @@ FROM parquet.`{temp_volume_path}`"""
             optimize_write: Sets delta.autoOptimize.optimizeWrite table property.
             auto_compact: Sets delta.autoOptimize.autoCompact table property.
             execute: If True, executes DDL and returns StatementResult; otherwise returns SQL string.
+            wait_result: Waits execution to complete
         Returns:
             StatementResult if execute=True, else the DDL SQL string.
@@ -897,11 +911,13 @@ FROM parquet.`{temp_volume_path}`"""
         statement = "\n".join(sql)
-        logger.info("Generated CREATE TABLE DDL for %s", location)
-        logger.debug("DDL:\n%s", statement)
+        logger.debug(
+            "Generated CREATE TABLE DDL for %s:\n%s",
+            location, statement
+        )
         if execute:
-            return self.execute(statement)
+            return self.execute(statement, wait_result=wait_result)
         return statement
     def _check_location_params(

yggdrasil/databricks/sql/statement_result.py CHANGED Viewed

@@ -44,6 +44,11 @@ if TYPE_CHECKING:
     from .engine import SQLEngine
+DONE_STATES = {
+    StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED,
+    StatementState.SUCCEEDED
+}
 __all__ = [
     "StatementResult"
 ]
@@ -57,7 +62,6 @@ class StatementResult:
     disposition: "Disposition"
     _response: Optional[StatementResponse] = dataclasses.field(default=None, repr=False)
-    _response_refresh_time: float = dataclasses.field(default=0, repr=False)
     _spark_df: Optional[SparkDataFrame] = dataclasses.field(default=None, repr=False)
     _arrow_table: Optional[pa.Table] = dataclasses.field(default=None, repr=False)
@@ -101,8 +105,35 @@ class StatementResult:
         Returns:
             The current StatementResponse object.
         """
-        if self._response is None and not self.is_spark_sql:
-            self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
+        if self.is_spark_sql:
+            return StatementResponse(
+                statement_id=self.statement_id or "sparksql",
+                status=StatementStatus(
+                    state=StatementState.SUCCEEDED
+                )
+            )
+        elif not self.statement_id:
+            return StatementResponse(
+                statement_id="unknown",
+                status=StatementStatus(
+                    state=StatementState.PENDING
+                )
+            )
+        statement_execution = self.workspace.sdk().statement_execution
+        if self._response is None:
+            self._response = (
+                statement_execution
+                .get_statement(self.statement_id)
+            )
+        if self._response.status.state not in DONE_STATES:
+            self._response = (
+                statement_execution
+                .get_statement(self.statement_id)
+            )
         return self._response
     @response.setter
@@ -113,27 +144,8 @@ class StatementResult:
             value: StatementResponse to cache.
         """
         self._response = value
-        self._response_refresh_time = time.time()
         self.statement_id = self._response.statement_id
-    def fresh_response(self, delay: float):
-        """Refresh the response if it is older than ``delay`` seconds.
-        Args:
-            delay: Minimum age in seconds before refreshing.
-        Returns:
-            The refreshed StatementResponse object.
-        """
-        if self.is_spark_sql:
-            return self._response
-        if self.statement_id and not self.done and time.time() - self._response_refresh_time > delay:
-            self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
-        return self._response
     def result_data_at(self, chunk_index: int):
         """Fetch a specific result chunk by index.
@@ -166,17 +178,7 @@ class StatementResult:
         Returns:
             A StatementStatus object.
         """
-        if self.persisted:
-            return StatementStatus(
-                state=StatementState.SUCCEEDED
-            )
-        if not self.statement_id:
-            return StatementStatus(
-                state=StatementState.PENDING
-            )
-        return self.fresh_response(delay=1).status
+        return self.response.status
     @property
     def state(self):
@@ -194,8 +196,6 @@ class StatementResult:
         Returns:
             The result manifest or None for Spark SQL results.
         """
-        if self.is_spark_sql:
-            return None
         return self.response.manifest
     @property
@@ -214,15 +214,7 @@ class StatementResult:
         Returns:
             True if the statement is done, otherwise False.
         """
-        if self.persisted:
-            return True
-        if self._response is None:
-            return False
-        return self._response.status.state in [
-            StatementState.CANCELED, StatementState.CLOSED, StatementState.FAILED, StatementState.SUCCEEDED
-        ]
+        return self.state in DONE_STATES
     @property
     def failed(self):

yggdrasil/pyutils/equality.py ADDED Viewed

@@ -0,0 +1,107 @@
+from __future__ import annotations
+import math
+from typing import Any, Dict, Iterable, Tuple
+_MISSING = object()
+__all__ = [
+    "dicts_equal",
+    "dict_diff"
+]
+def _normalize(obj: Any) -> Any:
+    """
+    Normalize nested structures so equality is stable:
+    - dict: sort keys + normalize values
+    - list/tuple: normalize items (keeps order)
+    - set: sort normalized items (orderless)
+    - float: keep as float (handled separately for tolerance)
+    """
+    if isinstance(obj, dict):
+        return {k: _normalize(obj[k]) for k in sorted(obj.keys())}
+    if isinstance(obj, (list, tuple)):
+        return [_normalize(x) for x in obj]
+    if isinstance(obj, set):
+        return sorted(_normalize(x) for x in obj)
+    return obj
+def _equal(a: Any, b: Any, float_tol: float = 0.0) -> bool:
+    # Float tolerance (optional)
+    if isinstance(a, float) or isinstance(b, float):
+        if a is None or b is None:
+            return a is b
+        try:
+            return math.isclose(float(a), float(b), rel_tol=float_tol, abs_tol=float_tol)
+        except Exception:
+            pass
+    # Deep normalize compare for dict/list/set
+    return _normalize(a) == _normalize(b)
+def dicts_equal(
+    a: Dict[str, Any],
+    b: Dict[str, Any],
+    *,
+    keys: Iterable[str] | None = None,
+    treat_missing_as_none: bool = True,
+    float_tol: float = 0.0,
+) -> bool:
+    """
+    Equality check for two dicts with options:
+    - keys: only compare these keys
+    - treat_missing_as_none: missing key == None if other side is None
+    - float_tol: tolerance for float comparisons
+    """
+    if keys is None:
+        keys = set(a.keys()) | set(b.keys())
+    for k in keys:
+        av = a.get(k, _MISSING)
+        bv = b.get(k, _MISSING)
+        if treat_missing_as_none:
+            if av is _MISSING and bv is None:
+                continue
+            if bv is _MISSING and av is None:
+                continue
+            if av is _MISSING and bv is _MISSING:
+                continue
+        if not _equal(av, bv, float_tol=float_tol):
+            return False
+    return True
+def dict_diff(
+    a: Dict[str, Any],
+    b: Dict[str, Any],
+    *,
+    keys: Iterable[str] | None = None,
+    treat_missing_as_none: bool = True,
+    float_tol: float = 0.0,
+) -> Dict[str, Tuple[Any, Any]]:
+    """
+    Returns {key: (a_val, b_val)} for all keys that differ.
+    """
+    if keys is None:
+        keys = set(a.keys()) | set(b.keys())
+    out: Dict[str, Tuple[Any, Any]] = {}
+    for k in keys:
+        av = a.get(k, _MISSING)
+        bv = b.get(k, _MISSING)
+        if treat_missing_as_none:
+            if av is _MISSING and bv is None:
+                continue
+            if bv is _MISSING and av is None:
+                continue
+            if av is _MISSING and bv is _MISSING:
+                continue
+        if not _equal(av, bv, float_tol=float_tol):
+            out[k] = (None if av is _MISSING else av, None if bv is _MISSING else bv)
+    return out

yggdrasil/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.34"
1	+ __version__ = "0.1.37"

{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/WHEEL RENAMED Viewed

File without changes

{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ygg-0.1.34.dist-info → ygg-0.1.37.dist-info}/top_level.txt RENAMED Viewed

File without changes

ygg 0.1.34__py3-none-any.whl → 0.1.37__py3-none-any.whl

ygg 0.1.34py3-none-any.whl → 0.1.37py3-none-any.whl