PyPI - ygg - Versions diffs - 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl - Mend

ygg 0.1.48py3-none-any.whl → 0.1.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/METADATA +3 -1
{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/RECORD +18 -18
yggdrasil/databricks/compute/cluster.py +45 -19
yggdrasil/databricks/compute/execution_context.py +19 -11
yggdrasil/databricks/compute/remote.py +4 -1
yggdrasil/databricks/sql/statement_result.py +12 -5
yggdrasil/databricks/workspaces/io.py +80 -56
yggdrasil/databricks/workspaces/path.py +101 -50
yggdrasil/databricks/workspaces/workspace.py +45 -27
yggdrasil/libs/pandaslib.py +6 -0
yggdrasil/libs/polarslib.py +5 -0
yggdrasil/pyutils/python_env.py +7 -4
yggdrasil/types/cast/polars_cast.py +1 -0
yggdrasil/version.py +1 -1
{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/WHEEL +0 -0
{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/entry_points.txt +0 -0
{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/licenses/LICENSE +0 -0
{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/top_level.txt +0 -0

{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ygg
-Version: 0.1.48
+Version: 0.1.50
 Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
 Author: Yggdrasil contributors
 License:                                  Apache License
@@ -235,6 +235,8 @@ Requires-Dist: pytest-asyncio; extra == "dev"
 Requires-Dist: black; extra == "dev"
 Requires-Dist: ruff; extra == "dev"
 Requires-Dist: mypy; extra == "dev"
+Requires-Dist: build; extra == "dev"
+Requires-Dist: twine; extra == "dev"
 Dynamic: license-file
 # Yggdrasil (Python)

{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/RECORD RENAMED Viewed

@@ -1,31 +1,31 @@
-ygg-0.1.48.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+ygg-0.1.50.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
 yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
-yggdrasil/version.py,sha256=GL56LdSW6fsXlq5LHiGjsIVgyhxVQeeDdO3Sd6nzZYc,22
+yggdrasil/version.py,sha256=pMWaMbj0sqJPaN27zeKuthOtJ3nuofEVeTxWuJmKhTw,22
 yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
 yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
-yggdrasil/databricks/compute/cluster.py,sha256=0QjYHlaXSMgYqzMRy1Jypm2j7xoGRkPdwURZsQn_73U,43228
-yggdrasil/databricks/compute/execution_context.py,sha256=anOxfNms83dZ5FTknbfT8uj889LjheMqEx9W5NtJC9E,23094
-yggdrasil/databricks/compute/remote.py,sha256=nEN_Fr1Ouul_iKOf4B5QjEGscYAcl7nHjGsl2toRzrU,2874
+yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
+yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
+yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
 yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
 yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
 yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
 yggdrasil/databricks/sql/engine.py,sha256=K5WmGKpXU78JA3UdK8dLxBD_GXKidZJFe7hytuC5UHg,41029
 yggdrasil/databricks/sql/exceptions.py,sha256=uC-BoG0u0LtORKUS1X3iLID8nc-0TV5MQN3M8RXHsO4,1495
-yggdrasil/databricks/sql/statement_result.py,sha256=kMBvpwyRv3_JUZSvxMS0c9Vqlh6LtCRJvXsDpu9RIAs,16137
+yggdrasil/databricks/sql/statement_result.py,sha256=GZyVhhrUK5opNo-8HGqsMx0Rp9fa_0zqvn8McSHPQ8U,16310
 yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
 yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
 yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
 yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
-yggdrasil/databricks/workspaces/io.py,sha256=Tdde4LaGNJNT50R11OkEYZyNacyIW9QrOXMAicAlIr4,32208
-yggdrasil/databricks/workspaces/path.py,sha256=-XnCD9p42who3DAwnITVE1KyrZUSoXDKHA8iZi-7wk4,47743
+yggdrasil/databricks/workspaces/io.py,sha256=D-B31roMGEJesAtUWl-O30lZJfgo-oFdK6KExzFc13I,33260
+yggdrasil/databricks/workspaces/path.py,sha256=BAzaxEL2mWJ_6EnETnQdsPj06zkrbTO2f3reruR439k,49265
 yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
-yggdrasil/databricks/workspaces/workspace.py,sha256=c6CBBun2BskEnsP74pbLVOe_TKXZs4L4r4gPQtIzlQE,23821
+yggdrasil/databricks/workspaces/workspace.py,sha256=zBlQdYNT_xKwUCYo3O4Q4g-8pfMvff3I26efyCfY_TY,24961
 yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
 yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
 yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
 yggdrasil/libs/databrickslib.py,sha256=NHJeUViHhZc8LI5oDVfi1axRyUy_pDJLy4hjD0KZEBQ,980
-yggdrasil/libs/pandaslib.py,sha256=Edm3SXgvr8qe2wsojuRvD1ewNB-Sff0RWoTqaddVruI,509
-yggdrasil/libs/polarslib.py,sha256=7EWP5iS8F9cW79M6d8Yg5ysjnOY3w4_k7TW-5DCRACw,511
+yggdrasil/libs/pandaslib.py,sha256=GoUjh9dxZAFLe9hs8-6RliLD3jsH_BexYW1w-8BZzb0,618
+yggdrasil/libs/polarslib.py,sha256=hnL8x6ygsyIoiJyIUMaeoji3fRzab4lBiHcMqa29C_Q,618
 yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10177
 yggdrasil/libs/extensions/__init__.py,sha256=mcXW5Li3Cbprbs4Ci-b5A0Ju0wmLcfvEiFusTx6xNjU,117
 yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo2X24zIAPSKXV37SSA,12397
@@ -37,7 +37,7 @@ yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWg
 yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
 yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
 yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
-yggdrasil/pyutils/python_env.py,sha256=tuglnjdqHQjNh18qDladVoSEOjCD0RcnMEPYJ0tArOs,50985
+yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
 yggdrasil/pyutils/retry.py,sha256=n5sr-Zu7fYrdLbjJ4WifK2lk0gEGmHv5FYt2HaCm1Qc,11916
 yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
 yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
@@ -49,14 +49,14 @@ yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdaf
 yggdrasil/types/cast/arrow_cast.py,sha256=_OMYc4t5GlgE4ztlWaCoK8Jnba09rgDbmHVP-QXhOL0,41523
 yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
 yggdrasil/types/cast/pandas_cast.py,sha256=I3xu0sZ59ZbK3NDcQ2dslzdeKzhpFV5zR02ZEixd5hI,8713
-yggdrasil/types/cast/polars_cast.py,sha256=K2nnQ7bexArneYEhUPgV_6er4JNq6N5RmbMUhw-2_Xw,28766
+yggdrasil/types/cast/polars_cast.py,sha256=RILcbfL4o1XDMp5H-06c0BMrDal5pehOT7ACiItDB6E,28791
 yggdrasil/types/cast/polars_pandas_cast.py,sha256=CS0P7teVv15IdX5g7v40RfkH1VMg6b-HM0V_gOfacm8,5071
 yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgTokg,21491
 yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
 yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
 yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
-ygg-0.1.48.dist-info/METADATA,sha256=gpScM9WWu0y7C5ebXB6gsJBe9VbehZEU__E7HfWp8hk,18452
-ygg-0.1.48.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ygg-0.1.48.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
-ygg-0.1.48.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
-ygg-0.1.48.dist-info/RECORD,,
+ygg-0.1.50.dist-info/METADATA,sha256=ygOCZJjNIbuKuD-qKLnttguy71qIBxR0KnHDJE_XPSU,18528
+ygg-0.1.50.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ygg-0.1.50.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
+ygg-0.1.50.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
+ygg-0.1.50.dist-info/RECORD,,

yggdrasil/databricks/compute/cluster.py CHANGED Viewed

@@ -144,6 +144,7 @@ class Cluster(WorkspaceService):
         single_user_name: Optional[str] = None,
         runtime_engine: Optional["RuntimeEngine"] = None,
         libraries: Optional[list[str]] = None,
+        update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
         **kwargs
     ) -> "Cluster":
         """Create or reuse a cluster that mirrors the current Python environment.
@@ -152,9 +153,10 @@ class Cluster(WorkspaceService):
             workspace: Workspace to use for the cluster.
             cluster_id: Optional cluster id to reuse.
             cluster_name: Optional cluster name to reuse.
-            single_user_name: Optional user name for single-user clusters.
+            single_user_name: Optional username for single-user clusters.
             runtime_engine: Optional Databricks runtime engine.
             libraries: Optional list of libraries to install.
+            update_timeout: wait timeout, if None it will not wait completion
             **kwargs: Additional cluster specification overrides.
         Returns:
@@ -176,6 +178,7 @@ class Cluster(WorkspaceService):
                 single_user_name=single_user_name,
                 runtime_engine=runtime_engine,
                 libraries=libraries,
+                update_timeout=update_timeout,
                 **kwargs
             )
         )
@@ -190,6 +193,7 @@ class Cluster(WorkspaceService):
         single_user_name: Optional[str] = "current",
         runtime_engine: Optional["RuntimeEngine"] = None,
         libraries: Optional[list[str]] = None,
+        update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
         **kwargs
     ) -> "Cluster":
         """Create/update a cluster to match the local Python environment.
@@ -198,9 +202,10 @@ class Cluster(WorkspaceService):
             source: Optional PythonEnv to mirror (defaults to current).
             cluster_id: Optional cluster id to update.
             cluster_name: Optional cluster name to update.
-            single_user_name: Optional single user name for the cluster.
+            single_user_name: Optional single username for the cluster.
             runtime_engine: Optional runtime engine selection.
             libraries: Optional list of libraries to install.
+            update_timeout: wait timeout, if None it will not wait completion
             **kwargs: Additional cluster specification overrides.
         Returns:
@@ -242,6 +247,7 @@ class Cluster(WorkspaceService):
             single_user_name=single_user_name,
             runtime_engine=runtime_engine or RuntimeEngine.PHOTON,
             libraries=libraries,
+            update_timeout=update_timeout,
             **kwargs
         )
@@ -380,7 +386,9 @@ class Cluster(WorkspaceService):
         start = time.time()
         sleep_time = tick
-        if isinstance(timeout, dt.timedelta):
+        if not timeout:
+            timeout = 20 * 60.0
+        elif isinstance(timeout, dt.timedelta):
             timeout = timeout.total_seconds()
         while self.is_pending:
@@ -412,12 +420,14 @@ class Cluster(WorkspaceService):
         # Extract "major.minor" from strings like "17.3.x-scala2.13-ml-gpu"
         v = self.spark_version
-        if v is None:
+        if not v:
             return None
         parts = v.split(".")
         if len(parts) < 2:
             return None
         return ".".join(parts[:2])  # e.g. "17.3"
     @property
@@ -428,8 +438,10 @@ class Cluster(WorkspaceService):
         When the runtime can't be mapped, returns ``None``.
         """
         v = self.runtime_version
-        if v is None:
+        if not v:
             return None
         return _PYTHON_BY_DBR.get(v)
     # ------------------------------------------------------------------ #
@@ -586,6 +598,7 @@ class Cluster(WorkspaceService):
         cluster_id: Optional[str] = None,
         cluster_name: Optional[str] = None,
         libraries: Optional[List[Union[str, "Library"]]] = None,
+        update_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
         **cluster_spec: Any
     ):
         """Create a new cluster or update an existing one.
@@ -594,6 +607,7 @@ class Cluster(WorkspaceService):
             cluster_id: Optional cluster id to update.
             cluster_name: Optional cluster name to update or create.
             libraries: Optional libraries to install.
+            update_timeout: wait timeout, if None it will not wait completion
             **cluster_spec: Cluster specification overrides.
         Returns:
@@ -609,24 +623,28 @@ class Cluster(WorkspaceService):
             return found.update(
                 cluster_name=cluster_name,
                 libraries=libraries,
+                wait_timeout=update_timeout,
                 **cluster_spec
             )
         return self.create(
             cluster_name=cluster_name,
             libraries=libraries,
+            wait_timeout=update_timeout,
             **cluster_spec
         )
     def create(
         self,
         libraries: Optional[List[Union[str, "Library"]]] = None,
+        wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
         **cluster_spec: Any
     ) -> str:
         """Create a new cluster and optionally install libraries.
         Args:
             libraries: Optional list of libraries to install after creation.
+            wait_timeout: wait timeout, if None it will not wait completion
             **cluster_spec: Cluster specification overrides.
         Returns:
@@ -646,14 +664,17 @@ class Cluster(WorkspaceService):
             update_details,
         )
-        self.details = self.clusters_client().create_and_wait(**update_details)
+        self.details = self.clusters_client().create(**update_details)
         LOGGER.info(
             "Created %s",
             self
         )
-        self.install_libraries(libraries=libraries, raise_error=False)
+        self.install_libraries(libraries=libraries, raise_error=False, wait_timeout=None)
+        if wait_timeout:
+            self.wait_for_status(timeout=wait_timeout)
         return self
@@ -661,7 +682,7 @@ class Cluster(WorkspaceService):
         self,
         libraries: Optional[List[Union[str, "Library"]]] = None,
         access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
-        wait_timeout: Union[float, dt.timedelta] = dt.timedelta(minutes=20),
+        wait_timeout: Optional[Union[float, dt.timedelta]] = dt.timedelta(minutes=20),
         **cluster_spec: Any
     ) -> "Cluster":
         """Update cluster configuration and optionally install libraries.
@@ -708,7 +729,7 @@ class Cluster(WorkspaceService):
                 self, diff
             )
-            self.wait_for_status()
+            self.wait_for_status(timeout=wait_timeout)
             self.clusters_client().edit(**update_details)
             self.update_permissions(access_control_list=access_control_list)
@@ -727,7 +748,7 @@ class Cluster(WorkspaceService):
         access_control_list: Optional[List["ClusterAccessControlRequest"]] = None,
     ):
         if not access_control_list:
-            access_control_list = self.default_permissions()
+            return self
         access_control_list = self._check_permission(access_control_list)
@@ -745,6 +766,7 @@ class Cluster(WorkspaceService):
                 permission_level=ClusterPermissionLevel.CAN_MANAGE
             )
             for name in current_groups
+            if name not in {"users"}
         ]
     def _check_permission(
@@ -862,18 +884,22 @@ class Cluster(WorkspaceService):
         Returns:
             The current Cluster instance.
         """
+        if self.is_running:
+            return self
         self.wait_for_status()
-        if not self.is_running:
-            LOGGER.debug("Starting %s", self)
+        if self.is_running:
+            return self
-            if wait_timeout:
-                self.clusters_client().start(cluster_id=self.cluster_id)
-                self.wait_for_status(timeout=wait_timeout.total_seconds())
-            else:
-                self.clusters_client().start(cluster_id=self.cluster_id)
+        LOGGER.debug("Starting %s", self)
+        self.clusters_client().start(cluster_id=self.cluster_id)
-            LOGGER.info("Started %s", self)
+        LOGGER.info("Started %s", self)
+        if wait_timeout:
+            self.wait_for_status(timeout=wait_timeout.total_seconds())
         return self
@@ -889,7 +915,7 @@ class Cluster(WorkspaceService):
         if self.is_running:
             self.details = self.clusters_client().restart_and_wait(cluster_id=self.cluster_id)
-            return self.wait_for_status()
+            return self
         return self.start()

yggdrasil/databricks/compute/execution_context.py CHANGED Viewed

@@ -180,7 +180,7 @@ print(json.dumps(meta))"""
         """
         return self.cluster.workspace.sdk()
-    def _create_command(
+    def create_command(
         self,
         language: "Language",
     ) -> any:
@@ -192,17 +192,29 @@ print(json.dumps(meta))"""
         Returns:
             The created command execution context response.
         """
-        self.cluster.ensure_running()
         LOGGER.debug(
             "Creating Databricks command execution context for %s",
             self.cluster
         )
-        created = self._workspace_client().command_execution.create_and_wait(
-            cluster_id=self.cluster.cluster_id,
-            language=language,
+        try:
+            created = self._workspace_client().command_execution.create_and_wait(
+                cluster_id=self.cluster.cluster_id,
+                language=language,
+            )
+        except:
+            self.cluster.ensure_running()
+            created = self._workspace_client().command_execution.create_and_wait(
+                cluster_id=self.cluster.cluster_id,
+                language=language,
+            )
+        LOGGER.info(
+            "Created Databricks command execution context %s",
+            self
         )
         created = getattr(created, "response", created)
         return created
@@ -220,10 +232,6 @@ print(json.dumps(meta))"""
             The connected ExecutionContext instance.
         """
         if self.context_id is not None:
-            LOGGER.debug(
-                "Execution context already open for %s",
-                self
-            )
             return self
         self.language = language or self.language
@@ -231,7 +239,7 @@ print(json.dumps(meta))"""
         if self.language is None:
             self.language = Language.PYTHON
-        ctx = self._create_command(language=self.language)
+        ctx = self.create_command(language=self.language)
         context_id = ctx.id
         if not context_id:

yggdrasil/databricks/compute/remote.py CHANGED Viewed

@@ -39,6 +39,7 @@ def databricks_remote_compute(
     timeout: Optional[dt.timedelta] = None,
     env_keys: Optional[List[str]] = None,
     force_local: bool = False,
+    update_timeout: Optional[Union[float, dt.timedelta]] = None,
     **options
 ) -> Callable[[Callable[..., ReturnType]], Callable[..., ReturnType]]:
     """Return a decorator that executes functions on a remote cluster.
@@ -52,6 +53,7 @@ def databricks_remote_compute(
         timeout: Optional execution timeout for remote calls.
         env_keys: Optional environment variable names to forward.
         force_local: Force local execution
+        update_timeout: creation or update wait timeout
         **options: Extra options forwarded to the execution decorator.
     Returns:
@@ -82,7 +84,8 @@ def databricks_remote_compute(
             cluster = workspace.clusters().replicated_current_environment(
                 workspace=workspace,
                 cluster_name=cluster_name,
-                single_user_name=workspace.current_user.user_name
+                single_user_name=workspace.current_user.user_name,
+                update_timeout=update_timeout
             )
     cluster.ensure_running(wait_timeout=None)

yggdrasil/databricks/sql/statement_result.py CHANGED Viewed

@@ -344,10 +344,17 @@ class StatementResult:
         if self.persisted:
             if self._arrow_table is not None:
                 return self._arrow_table.schema
-            return spark_schema_to_arrow_schema(self._spark_df.schema)
+            elif self._spark_df is not None:
+                return spark_schema_to_arrow_schema(self._spark_df.schema)
+            raise NotImplementedError("")
+        manifest = self.manifest
+        if manifest is None:
+            return pa.schema([])
         fields = [
-            column_info_to_arrow_field(_) for _ in self.manifest.schema.columns
+            column_info_to_arrow_field(_) for _ in manifest.schema.columns
         ]
         return pa.schema(fields)
@@ -362,7 +369,7 @@ class StatementResult:
             An Arrow Table containing all rows.
         """
         if self.persisted:
-            if self._arrow_table:
+            if self._arrow_table is not None:
                 return self._arrow_table
             else:
                 return self._spark_df.toArrow()
@@ -370,7 +377,6 @@ class StatementResult:
         batches = list(self.to_arrow_batches(parallel_pool=parallel_pool))
         if not batches:
-            # empty table with no columns
             return pa.Table.from_batches([], schema=self.arrow_schema())
         return pa.Table.from_batches(batches)
@@ -501,8 +507,9 @@ class StatementResult:
         Returns:
             A Spark DataFrame with the result rows.
         """
-        if self._spark_df:
+        if self._spark_df is not None:
             return self._spark_df
         self._spark_df = arrow_table_to_spark_dataframe(self.to_arrow_table())
         return self._spark_df

yggdrasil/databricks/workspaces/io.py CHANGED Viewed

@@ -13,8 +13,8 @@ from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat
 from .path_kind import DatabricksPathKind
 from ...libs.databrickslib import databricks
-from ...types.cast.pandas_cast import PandasDataFrame
-from ...types.cast.polars_pandas_cast import PolarsDataFrame
+from ...libs.pandaslib import pandas, PandasDataFrame
+from ...libs.polarslib import polars, PolarsDataFrame
 from ...types.cast.registry import convert
 if databricks is not None:
@@ -42,7 +42,6 @@ class DatabricksIO(ABC, IO):
         path: "DatabricksPath",
         mode: str,
         encoding: Optional[str] = None,
-        compression: Optional[str] = "detect",
         position: int = 0,
         buffer: Optional[io.BytesIO] = None,
     ):
@@ -50,7 +49,6 @@ class DatabricksIO(ABC, IO):
         self.encoding = encoding
         self.mode = mode
-        self.compression = compression
         self.path = path
@@ -111,7 +109,6 @@ class DatabricksIO(ABC, IO):
                 path=path,
                 mode=mode,
                 encoding=encoding,
-                compression=compression,
                 position=position,
                 buffer=buffer,
             )
@@ -120,7 +117,6 @@ class DatabricksIO(ABC, IO):
                 path=path,
                 mode=mode,
                 encoding=encoding,
-                compression=compression,
                 position=position,
                 buffer=buffer,
             )
@@ -129,7 +125,6 @@ class DatabricksIO(ABC, IO):
                 path=path,
                 mode=mode,
                 encoding=encoding,
-                compression=compression,
                 position=position,
                 buffer=buffer,
             )
@@ -226,7 +221,6 @@ class DatabricksIO(ABC, IO):
             path=kwargs.get("path", self.path),
             mode=kwargs.get("mode", self.mode),
             encoding=kwargs.get("encoding", self.encoding),
-            compression=kwargs.get("compression", self.compression),
             position=kwargs.get("position", self.position),
             buffer=kwargs.get("buffer", self._buffer),
         )
@@ -264,8 +258,7 @@ class DatabricksIO(ABC, IO):
             None.
         """
         self.flush()
-        if self._buffer is not None:
-            self._buffer.close()
+        self.clear_buffer()
     def fileno(self):
         """Return a pseudo file descriptor based on object hash.
@@ -403,9 +396,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             The read bytes or string depending on mode.
         """
-        if not self.readable():
-            raise IOError("File not open for reading")
         current_position = self.position
         all_data = self.read_all_bytes(use_cache=use_cache)
@@ -431,9 +421,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             The next line as bytes or string.
         """
-        if not self.readable():
-            raise IOError("File not open for reading")
         if self.encoding:
             # Text-mode: accumulate characters
             out_chars = []
@@ -475,9 +462,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             A list of lines.
         """
-        if not self.readable():
-            raise IOError("File not open for reading")
         lines = []
         total = 0
@@ -492,14 +476,6 @@ class DatabricksIO(ABC, IO):
         return lines
-    def appendable(self):
-        """Return True when the file is open in append mode.
-        Returns:
-            True if in append mode.
-        """
-        return "a" in self.mode
     def writable(self):
         """Return True to indicate write support.
@@ -561,9 +537,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             The number of bytes written.
         """
-        if not self.writable():
-            raise IOError("File not open for writing")
         if isinstance(data, str):
             data = data.encode(self.encoding or "utf-8")
@@ -664,8 +637,12 @@ class DatabricksIO(ABC, IO):
             return self.write_polars(table, file_format=file_format, batch_size=batch_size, **kwargs)
         elif isinstance(table, PandasDataFrame):
             return self.write_pandas(table, file_format=file_format, batch_size=batch_size, **kwargs)
-        else:
-            raise ValueError(f"Cannot write {type(table)} to {self.path}")
+        return self.write_arrow(
+            table=table,
+            file_format=file_format,
+            batch_size=batch_size
+        )
     # ---- Arrow ----
@@ -689,16 +666,18 @@ class DatabricksIO(ABC, IO):
         self.seek(0)
         if isinstance(file_format, ParquetFileFormat):
-            return pq.read_table(self, **kwargs)
+            pq.read_table(self, **kwargs)
-        if isinstance(file_format, CsvFileFormat):
-            return pcsv.read_csv(self, parse_options=file_format.parse_options)
+        elif isinstance(file_format, CsvFileFormat):
+            pcsv.read_csv(self, parse_options=file_format.parse_options)
-        raise ValueError(f"Unsupported file format for Arrow table: {file_format}")
+        else:
+            ValueError(f"Unsupported file format for Arrow table: {file_format}")
     def write_arrow(
         self,
         table: Union[pa.Table, pa.RecordBatch],
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
@@ -706,6 +685,7 @@ class DatabricksIO(ABC, IO):
         Args:
             table: Arrow table or batch to write.
+            file_format: Optional file format override.
             batch_size: Optional batch size for writes.
             **kwargs: Format-specific options.
@@ -717,6 +697,7 @@ class DatabricksIO(ABC, IO):
         return self.write_arrow_table(
             table=table,
+            file_format=file_format,
             batch_size=batch_size,
             **kwargs
         )
@@ -776,12 +757,14 @@ class DatabricksIO(ABC, IO):
     def read_arrow_batches(
         self,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
         """Yield Arrow record batches from the file.
         Args:
+            file_format: Optional file format override.
             batch_size: Optional batch size for reads.
             **kwargs: Format-specific options.
@@ -790,7 +773,11 @@ class DatabricksIO(ABC, IO):
         """
         return (
             self
-            .read_arrow_table(batch_size=batch_size, **kwargs)
+            .read_arrow_table(
+                file_format=file_format,
+                batch_size=batch_size,
+                **kwargs
+            )
             .to_batches(max_chunksize=batch_size)
         )
@@ -798,23 +785,36 @@ class DatabricksIO(ABC, IO):
     def read_pandas(
         self,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
         """Read the file into a pandas DataFrame.
         Args:
+            file_format: Optional file format override.
             batch_size: Optional batch size for reads.
             **kwargs: Format-specific options.
         Returns:
             A pandas DataFrame with the file contents.
         """
-        return self.read_arrow_table(batch_size=batch_size, **kwargs).to_pandas()
+        file_format = self.path.file_format if file_format is None else file_format
+        self.seek(0)
+        if isinstance(file_format, ParquetFileFormat):
+            pandas.read_parquet(self, **kwargs)
+        elif isinstance(file_format, CsvFileFormat):
+            pandas.read_csv(self, **kwargs)
+        else:
+            raise ValueError(f"Unsupported file format for Pandas DataFrame: {file_format}")
     def write_pandas(
         self,
-        df,
+        df: PandasDataFrame,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
@@ -822,13 +822,26 @@ class DatabricksIO(ABC, IO):
         Args:
             df: pandas DataFrame to write.
+            file_format: Optional file format override.
             batch_size: Optional batch size for writes.
             **kwargs: Format-specific options.
         Returns:
             None.
         """
-        self.write_arrow_table(pa.table(df), batch_size=batch_size, **kwargs)
+        file_format = self.path.file_format if file_format is None else FileFormat
+        buffer = io.BytesIO()
+        if isinstance(file_format, ParquetFileFormat):
+            df.to_parquet(buffer, **kwargs)
+        elif isinstance(file_format, CsvFileFormat):
+            df.to_csv(buffer, **kwargs)
+        else:
+            raise ValueError(f"Unsupported file format for Pandas DataFrame: {file_format}")
+        self.write_all_bytes(data=buffer.getvalue())
     # ---- Polars ----
@@ -848,22 +861,21 @@ class DatabricksIO(ABC, IO):
         Returns:
             A polars DataFrame with the file contents.
         """
-        import polars as pl
         file_format = self.path.file_format if file_format is None else file_format
         self.seek(0)
         if isinstance(file_format, ParquetFileFormat):
-            return pl.read_parquet(self, **kwargs)
+            polars.read_parquet(self, **kwargs)
-        if isinstance(file_format, CsvFileFormat):
-            return pl.read_csv(self, **kwargs)
+        elif isinstance(file_format, CsvFileFormat):
+            polars.read_csv(self, **kwargs)
-        raise ValueError(f"Unsupported file format for Polars DataFrame: {file_format}")
+        else:
+            raise ValueError(f"Unsupported file format for Polars DataFrame: {file_format}")
     def write_polars(
         self,
-        df,
+        df: PolarsDataFrame,
         file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
@@ -975,28 +987,40 @@ class DatabricksVolumeIO(DatabricksIO):
         """Read bytes from a volume file.
         Args:
-            start: Starting byte offset.
+            start: Starting byte offset (0-based).
             length: Number of bytes to read.
             allow_not_found: Whether to suppress missing-path errors.
         Returns:
             Bytes read from the file.
         """
-        if length == 0:
+        if length <= 0:
             return b""
+        if start < 0:
+            raise ValueError(f"start must be >= 0, got {start}")
+        if length < 0:
+            raise ValueError(f"length must be >= 0, got {length}")
         sdk = self.workspace.sdk()
         client = sdk.files
         full_path = self.path.files_full_path()
-        resp = client.download(full_path)
-        result = (
-            resp.contents
-            .seek(start, io.SEEK_SET)
-            .read(length)
-        )
+        try:
+            resp = client.download(full_path)
+        except Exception as e:
+            # Databricks SDK exceptions vary a bit by version; keep it pragmatic.
+            if allow_not_found and any(s in str(e).lower() for s in ("not found", "not exist", "404")):
+                return b""
+            raise
-        return result
+        data = resp.contents.read()
+        # If start is past EOF, return empty (common file-like behavior).
+        if start >= len(data):
+            return b""
+        end = start + length
+        return data[start:end]
     def write_all_bytes(self, data: bytes):
         """Write bytes to a volume file.

yggdrasil/databricks/workspaces/path.py CHANGED Viewed

@@ -12,17 +12,18 @@ from pathlib import PurePosixPath
 from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Iterable
 import pyarrow as pa
+import pyarrow.dataset as ds
 from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
 from pyarrow.fs import FileInfo, FileType, FileSystem
-import pyarrow.dataset as ds
 from .io import DatabricksIO
 from .path_kind import DatabricksPathKind
 from ...libs.databrickslib import databricks
-from ...types import cast_arrow_tabular, cast_polars_dataframe
+from ...libs.pandaslib import PandasDataFrame
+from ...libs.polarslib import polars, PolarsDataFrame
+from ...types.cast.arrow_cast import cast_arrow_tabular
 from ...types.cast.cast_options import CastOptions
-from ...types.cast.polars_cast import polars_converter
-from ...types.cast.polars_pandas_cast import PolarsDataFrame
+from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
 from ...types.cast.registry import convert, register_converter
 if databricks is not None:
@@ -494,13 +495,17 @@ class DatabricksPath:
         try:
             info = sdk.files.get_directory_metadata(full_path)
-            mtime = (
-                dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
-                if info.last_modified
-                else None
-            )
-            return self.reset_metadata(is_file=False, is_dir=True, size=info, mtime=mtime)
+            if info is None:
+                mtime = dt.datetime.now(tz=dt.timezone.utc)
+            else:
+                mtime = (
+                    dt.datetime.strptime(info.last_modified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=dt.timezone.utc)
+                    if info.last_modified
+                    else None
+                )
+            return self.reset_metadata(is_file=False, is_dir=True, size=0, mtime=mtime)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
@@ -635,22 +640,12 @@ class DatabricksPath:
         Returns:
             The DatabricksPath instance.
         """
-        try:
-            if self.kind == DatabricksPathKind.WORKSPACE:
-                self.make_workspace_dir(parents=parents, exist_ok=exist_ok)
-            elif self.kind == DatabricksPathKind.VOLUME:
-                self.make_volume_dir(parents=parents, exist_ok=exist_ok)
-            elif self.kind == DatabricksPathKind.DBFS:
-                self.make_dbfs_dir(parents=parents, exist_ok=exist_ok)
-        except (NotFound, ResourceDoesNotExist):
-            if not parents or self.parent == self:
-                raise
-            self.parent.mkdir(parents=True, exist_ok=True)
-            self.mkdir(parents=False, exist_ok=exist_ok)
-        except (AlreadyExists, ResourceAlreadyExists):
-            if not exist_ok:
-                raise
+        if self.kind == DatabricksPathKind.WORKSPACE:
+            self.make_workspace_dir(parents=parents, exist_ok=exist_ok)
+        elif self.kind == DatabricksPathKind.VOLUME:
+            self.make_volume_dir(parents=parents, exist_ok=exist_ok)
+        elif self.kind == DatabricksPathKind.DBFS:
+            self.make_dbfs_dir(parents=parents, exist_ok=exist_ok)
         return self
@@ -766,15 +761,13 @@ class DatabricksPath:
         Returns:
             The DatabricksPath instance.
         """
-        try:
-            if self.kind == DatabricksPathKind.VOLUME:
-                return self._remove_volume_file()
-            elif self.kind == DatabricksPathKind.WORKSPACE:
-                return self._remove_workspace_file()
-            elif self.kind == DatabricksPathKind.DBFS:
-                return self._remove_dbfs_file()
-        finally:
-            self.reset_metadata()
+        if self.kind == DatabricksPathKind.VOLUME:
+            return self._remove_volume_file()
+        elif self.kind == DatabricksPathKind.WORKSPACE:
+            return self._remove_workspace_file()
+        elif self.kind == DatabricksPathKind.DBFS:
+            return self._remove_dbfs_file()
         return self
     def _remove_volume_file(self):
@@ -783,6 +776,9 @@ class DatabricksPath:
             sdk.files.delete(self.files_full_path())
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
+        finally:
+            self.reset_metadata()
         return self
     def _remove_workspace_file(self):
@@ -791,6 +787,9 @@ class DatabricksPath:
             sdk.workspace.delete(self.workspace_full_path(), recursive=True)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
+        finally:
+            self.reset_metadata()
         return self
     def _remove_dbfs_file(self):
@@ -799,6 +798,9 @@ class DatabricksPath:
             sdk.dbfs.delete(self.dbfs_full_path(), recursive=True)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
+        finally:
+            self.reset_metadata()
         return self
     def rmdir(self, recursive: bool = True):
@@ -823,7 +825,9 @@ class DatabricksPath:
             sdk.workspace.delete(self.workspace_full_path(), recursive=recursive)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
-        self.reset_metadata()
+        finally:
+            self.reset_metadata()
         return self
     def _remove_dbfs_dir(self, recursive: bool = True):
@@ -832,7 +836,9 @@ class DatabricksPath:
             sdk.dbfs.delete(self.dbfs_full_path(), recursive=recursive)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
-        self.reset_metadata()
+        finally:
+            self.reset_metadata()
         return self
     def _remove_volume_dir(self, recursive: bool = True):
@@ -1038,7 +1044,7 @@ class DatabricksPath:
         Returns:
             None.
         """
-        if self.is_file() and dest.is_file():
+        if self.is_file():
             with self.open(mode="rb") as src:
                 src.copy_to(dest=dest)
@@ -1063,6 +1069,13 @@ class DatabricksPath:
         else:
             raise FileNotFoundError(f"Path {self} does not exist, or dest is not same file or folder type")
+    def write_bytes(self, data: bytes):
+        if hasattr(data, "read"):
+            data = data.read()
+        with self.open("wb") as f:
+            f.write_all_bytes(data=data)
     # -------------------------
     # Data ops (Arrow / Pandas / Polars)
     # -------------------------
@@ -1206,6 +1219,7 @@ class DatabricksPath:
     def read_pandas(
         self,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         concat: bool = True,
         **kwargs
@@ -1213,6 +1227,7 @@ class DatabricksPath:
         """Read the path into a pandas DataFrame.
         Args:
+            file_format: Optional file format override.
             batch_size: Optional batch size for reads.
             concat: Whether to concatenate results for directories.
             **kwargs: Format-specific options.
@@ -1221,14 +1236,26 @@ class DatabricksPath:
             A pandas DataFrame or list of DataFrames if concat=False.
         """
         if concat:
-            return self.read_arrow_table(batch_size=batch_size, concat=True, **kwargs).to_pandas()
+            return self.read_arrow_table(
+                file_format=file_format,
+                batch_size=batch_size,
+                concat=True,
+                **kwargs
+            ).to_pandas()
+        tables = self.read_arrow_table(
+            batch_size=batch_size,
+            file_format=file_format,
+            concat=False,
+            **kwargs
+        )
-        tables = self.read_arrow_table(batch_size=batch_size, concat=False, **kwargs)
         return [t.to_pandas() for t in tables]  # type: ignore[arg-type]
     def write_pandas(
         self,
-        df,
+        df: PandasDataFrame,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
@@ -1236,13 +1263,41 @@ class DatabricksPath:
         Args:
             df: pandas DataFrame to write.
+            file_format: Optional file format override.
             batch_size: Optional batch size for writes.
             **kwargs: Format-specific options.
         Returns:
             The DatabricksPath instance.
         """
-        return self.write_arrow_table(pa.table(df), batch_size=batch_size, **kwargs)
+        with self.connect(clone=False) as connected:
+            if connected.is_dir_sink():
+                seed = int(time.time() * 1000)
+                def df_batches(pdf, bs: int):
+                    for start in range(0, len(pdf), batch_size):
+                        yield pdf.iloc[start:start + batch_size]
+                for i, batch in enumerate(df_batches(df, batch_size)):
+                    part_path = connected / f"{seed}-{i:05d}-{_rand_str(4)}.parquet"
+                    with part_path.open(mode="wb", clone=False) as f:
+                        f.write_pandas(
+                            batch,
+                            file_format=file_format,
+                            batch_size=batch_size,
+                            **kwargs
+                        )
+            else:
+                with connected.open(mode="wb", clone=False) as f:
+                    f.write_pandas(
+                        df,
+                        file_format=file_format,
+                        batch_size=batch_size,
+                        **kwargs
+                    )
+        return self
     def read_polars(
         self,
@@ -1264,8 +1319,6 @@ class DatabricksPath:
         Returns:
             A polars DataFrame or list of DataFrames if concat=False.
         """
-        import polars as pl
         if self.is_file():
             with self.open("rb") as f:
                 return f.read_polars(batch_size=batch_size, **kwargs)
@@ -1278,10 +1331,10 @@ class DatabricksPath:
                         dfs.append(f.read_polars(batch_size=batch_size, **kwargs))
             if not dfs:
-                return pl.DataFrame()
+                return polars.DataFrame()
             if concat:
-                return pl.concat(dfs, how=how, rechunk=rechunk)
+                return polars.concat(dfs, how=how, rechunk=rechunk)
             return dfs  # type: ignore[return-value]
         raise FileNotFoundError(f"Path does not exist: {self}")
@@ -1312,12 +1365,10 @@ class DatabricksPath:
         Notes:
         - If `df` is a LazyFrame, we collect it first (optionally streaming).
         """
-        import polars as pl
-        if isinstance(df, pl.LazyFrame):
+        if isinstance(df, polars.LazyFrame):
             df = df.collect()
-        if not isinstance(df, pl.DataFrame):
+        if not isinstance(df, polars.DataFrame):
             raise TypeError(f"write_polars expects pl.DataFrame or pl.LazyFrame, got {type(df)!r}")
         with self.connect() as connected:

yggdrasil/databricks/workspaces/workspace.py CHANGED Viewed

@@ -8,7 +8,6 @@ from abc import ABC
 from dataclasses import dataclass
 from pathlib import Path
 from typing import (
-    Any,
     BinaryIO,
     Iterator,
     Optional,
@@ -55,7 +54,9 @@ def _get_env_product_version():
     v = os.getenv("DATABRICKS_PRODUCT_VERSION")
     if not v:
-        return YGGDRASIL_VERSION
+        if _get_env_product() == "yggdrasil":
+            return YGGDRASIL_VERSION
+        return None
     return v.strip().lower()
@@ -106,11 +107,12 @@ class Workspace:
     product: Optional[str] = dataclasses.field(default_factory=_get_env_product, repr=False)
     product_version: Optional[str] = dataclasses.field(default_factory=_get_env_product_version, repr=False)
     product_tag: Optional[str] = dataclasses.field(default_factory=_get_env_product_tag, repr=False)
+    custom_tags: Optional[dict] = dataclasses.field(default=None, repr=False)
     # Runtime cache (never serialized)
-    _sdk: Any = dataclasses.field(init=False, default=None, repr=False, compare=False, hash=False)
-    _was_connected: bool = dataclasses.field(init=False, default=False, repr=False, compare=False)
-    _cached_token: Optional[str] = dataclasses.field(init=False, default=None, repr=False, compare=False)
+    _sdk: Optional["WorkspaceClient"] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
+    _was_connected: bool = dataclasses.field(default=None, repr=False, compare=False, hash=False)
+    _cached_token: Optional[str] = dataclasses.field(default=None, repr=False, compare=False, hash=False)
     # -------------------------
     # Pickle support
@@ -175,19 +177,43 @@ class Workspace:
     # -------------------------
     def clone_instance(
         self,
-        **kwargs
     ) -> "Workspace":
         """Clone the workspace config with overrides.
-        Args:
-            **kwargs: Field overrides for the clone.
         Returns:
             A new Workspace instance with updated fields.
         """
-        state = self.__getstate__()
-        state.update(kwargs)
-        return Workspace().__setstate__(state)
+        return Workspace(
+            host = self.host,
+            account_id = self.account_id,
+            token = self.token,
+            client_id = self.client_id,
+            client_secret = self.client_secret,
+            token_audience = self.token_audience,
+            azure_workspace_resource_id = self.azure_workspace_resource_id,
+            azure_use_msi = self.azure_use_msi,
+            azure_client_secret = self.azure_client_secret,
+            azure_client_id = self.azure_client_id,
+            azure_tenant_id = self.azure_tenant_id,
+            azure_environment = self.azure_environment,
+            google_credentials = self.google_credentials,
+            google_service_account = self.google_service_account,
+            profile = self.profile,
+            config_file = self.config_file,
+            auth_type = self.auth_type,
+            http_timeout_seconds = self.http_timeout_seconds,
+            retry_timeout_seconds = self.retry_timeout_seconds,
+            debug_truncate_bytes = self.debug_truncate_bytes,
+            debug_headers = self.debug_headers,
+            rate_limit = self.rate_limit,
+            product = self.product,
+            product_version = self.product_version,
+            product_tag = self.product_tag,
+            custom_tags = self.custom_tags,
+            _sdk = self._sdk,
+            _was_connected = self._was_connected,
+            _cached_token = self._cached_token,
+        )
     # -------------------------
     # SDK connection
@@ -300,8 +326,9 @@ class Workspace:
         Drop the cached WorkspaceClient (no actual close needed, but this
         avoids reusing stale config).
         """
-        self._sdk = None
-        self._was_connected = False
+        if self._sdk is not None:
+            self._sdk = None
+            self._was_connected = False
     # ------------------------------------------------------------------ #
     # Properties
@@ -561,28 +588,19 @@ class Workspace:
         Returns:
             A dict of default tags.
         """
-        return {
+        base = {
             k: v
             for k, v in (
                 ("Product", self.product),
-                ("ProductVersion", self.product_version),
                 ("ProductTag", self.product_tag),
-                ("ProductUser", self.current_user.user_name)
             )
             if v
         }
-    def merge_tags(self, existing: dict | None = None):
-        """Merge default tags with an existing set.
-        Args:
-            existing: Optional existing tags.
+        if self.custom_tags:
+            base.update(self.custom_tags)
-        Returns:
-            A dict of merged tags.
-        """
-        if existing:
-            return self.default_tags()
+        return base
     def sql(
         self,

yggdrasil/libs/pandaslib.py CHANGED Viewed

@@ -3,9 +3,14 @@
 try:
     import pandas  # type: ignore
     pandas = pandas
+    PandasDataFrame = pandas.DataFrame
 except ImportError:
     pandas = None
+    class PandasDataFrame:
+        pass
 def require_pandas():
     """Ensure pandas is available before using pandas helpers.
@@ -23,4 +28,5 @@ def require_pandas():
 __all__ = [
     "pandas",
     "require_pandas",
+    "PandasDataFrame"
 ]

yggdrasil/libs/polarslib.py CHANGED Viewed

@@ -4,13 +4,18 @@ try:
     import polars  # type: ignore
     polars = polars
+    PolarsDataFrame = polars.DataFrame
 except ImportError:
     polars = None
+    class PolarsDataFrame:
+        pass
 __all__ = [
     "polars",
     "require_polars",
+    "PolarsDataFrame"
 ]

yggdrasil/pyutils/python_env.py CHANGED Viewed

@@ -16,7 +16,7 @@ import sys
 import tempfile
 import threading
 from contextlib import contextmanager
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List, Tuple
@@ -415,11 +415,13 @@ def _locked_env(root: Path):
 # PythonEnv
 # -----------------------
-@dataclass(frozen=True)
+@dataclass
 class PythonEnv:
     """Represent a managed Python environment rooted at a filesystem path."""
     root: Path
+    _version: Optional[str] = field(default=None, repr=False)
     def __post_init__(self) -> None:
         """Normalize the root path after dataclass initialization.
@@ -862,8 +864,9 @@ class PythonEnv:
         Returns:
             Version string.
         """
-        out = self.exec_code("import sys; print(sys.version.split()[0])", check=True)
-        return out.strip()
+        if self._version is None:
+            self._version = self.exec_code("import sys; print(sys.version.split()[0])", check=True).strip()
+        return self._version
     @property
     def version_info(self) -> tuple[int, int, int]:

yggdrasil/types/cast/polars_cast.py CHANGED Viewed

@@ -15,6 +15,7 @@ from ..python_defaults import default_arrow_scalar
 from ...libs.polarslib import polars
 __all__ = [
+    "polars_converter",
     "cast_polars_array",
     "cast_polars_dataframe",
     "arrow_type_to_polars_type",

yggdrasil/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.48"
1	+ __version__ = "0.1.50"

{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/WHEEL RENAMED Viewed

File without changes

{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ygg-0.1.48.dist-info → ygg-0.1.50.dist-info}/top_level.txt RENAMED Viewed

File without changes

ygg 0.1.48__py3-none-any.whl → 0.1.50__py3-none-any.whl

ygg 0.1.48py3-none-any.whl → 0.1.50py3-none-any.whl