PyPI - ygg - Versions diffs - 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl - Mend

ygg 0.1.49py3-none-any.whl → 0.1.51py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/METADATA +1 -1
{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/RECORD +13 -13
yggdrasil/databricks/sql/statement_result.py +12 -5
yggdrasil/databricks/workspaces/io.py +58 -46
yggdrasil/databricks/workspaces/path.py +95 -48
yggdrasil/libs/pandaslib.py +6 -0
yggdrasil/libs/polarslib.py +5 -0
yggdrasil/types/cast/polars_cast.py +1 -0
yggdrasil/version.py +1 -1
{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/WHEEL +0 -0
{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/entry_points.txt +0 -0
{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/licenses/LICENSE +0 -0
{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/top_level.txt +0 -0

{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ygg
-Version: 0.1.49
+Version: 0.1.51
 Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
 Author: Yggdrasil contributors
 License:                                  Apache License

{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-ygg-0.1.49.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+ygg-0.1.51.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
 yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
-yggdrasil/version.py,sha256=pnii9XXudF0U50FobVvNgNzGy9lA9q_DntGQAvyqaFA,22
+yggdrasil/version.py,sha256=Vba463sBalMddSSVNE_881HL99Fg7msGZpAiG0JX6bg,22
 yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
 yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
 yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
@@ -11,21 +11,21 @@ yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4
 yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
 yggdrasil/databricks/sql/engine.py,sha256=K5WmGKpXU78JA3UdK8dLxBD_GXKidZJFe7hytuC5UHg,41029
 yggdrasil/databricks/sql/exceptions.py,sha256=uC-BoG0u0LtORKUS1X3iLID8nc-0TV5MQN3M8RXHsO4,1495
-yggdrasil/databricks/sql/statement_result.py,sha256=kMBvpwyRv3_JUZSvxMS0c9Vqlh6LtCRJvXsDpu9RIAs,16137
+yggdrasil/databricks/sql/statement_result.py,sha256=GZyVhhrUK5opNo-8HGqsMx0Rp9fa_0zqvn8McSHPQ8U,16310
 yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
 yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
 yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
 yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
-yggdrasil/databricks/workspaces/io.py,sha256=CDq9NsYFjlSJ1QbKFlfWvZLQPVoWyZ4b3XR_lxNPcZE,32776
-yggdrasil/databricks/workspaces/path.py,sha256=BxDwxE7q1-NLKEZQT4xLM3LeCeQKO3wUy7R-Ce-cSMk,47875
+yggdrasil/databricks/workspaces/io.py,sha256=PhXMVrK8ngDl6kKjnh8_jlZ2GsKtU2nLSi1nFgV4Sks,33302
+yggdrasil/databricks/workspaces/path.py,sha256=HA73r0qedm8IiE_FPrDzRLc5BBkU9_a1qF2JXdWXMQk,49290
 yggdrasil/databricks/workspaces/path_kind.py,sha256=Xc319NysH8_6E9C0Q8nCxDHYG07_SnzyUVKHe0dNdDQ,305
 yggdrasil/databricks/workspaces/workspace.py,sha256=zBlQdYNT_xKwUCYo3O4Q4g-8pfMvff3I26efyCfY_TY,24961
 yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
 yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
 yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
 yggdrasil/libs/databrickslib.py,sha256=NHJeUViHhZc8LI5oDVfi1axRyUy_pDJLy4hjD0KZEBQ,980
-yggdrasil/libs/pandaslib.py,sha256=Edm3SXgvr8qe2wsojuRvD1ewNB-Sff0RWoTqaddVruI,509
-yggdrasil/libs/polarslib.py,sha256=7EWP5iS8F9cW79M6d8Yg5ysjnOY3w4_k7TW-5DCRACw,511
+yggdrasil/libs/pandaslib.py,sha256=GoUjh9dxZAFLe9hs8-6RliLD3jsH_BexYW1w-8BZzb0,618
+yggdrasil/libs/polarslib.py,sha256=hnL8x6ygsyIoiJyIUMaeoji3fRzab4lBiHcMqa29C_Q,618
 yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10177
 yggdrasil/libs/extensions/__init__.py,sha256=mcXW5Li3Cbprbs4Ci-b5A0Ju0wmLcfvEiFusTx6xNjU,117
 yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo2X24zIAPSKXV37SSA,12397
@@ -49,14 +49,14 @@ yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdaf
 yggdrasil/types/cast/arrow_cast.py,sha256=_OMYc4t5GlgE4ztlWaCoK8Jnba09rgDbmHVP-QXhOL0,41523
 yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
 yggdrasil/types/cast/pandas_cast.py,sha256=I3xu0sZ59ZbK3NDcQ2dslzdeKzhpFV5zR02ZEixd5hI,8713
-yggdrasil/types/cast/polars_cast.py,sha256=K2nnQ7bexArneYEhUPgV_6er4JNq6N5RmbMUhw-2_Xw,28766
+yggdrasil/types/cast/polars_cast.py,sha256=RILcbfL4o1XDMp5H-06c0BMrDal5pehOT7ACiItDB6E,28791
 yggdrasil/types/cast/polars_pandas_cast.py,sha256=CS0P7teVv15IdX5g7v40RfkH1VMg6b-HM0V_gOfacm8,5071
 yggdrasil/types/cast/registry.py,sha256=_zdFGmUBB7P-e_LIcJlOxMcxAkXoA-UXB6HqLMgTokg,21491
 yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
 yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
 yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
-ygg-0.1.49.dist-info/METADATA,sha256=CHTqeVyiYa1868ZDwISDHKyXYxPeUH0mHhvHLYYoDbg,18528
-ygg-0.1.49.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ygg-0.1.49.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
-ygg-0.1.49.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
-ygg-0.1.49.dist-info/RECORD,,
+ygg-0.1.51.dist-info/METADATA,sha256=JprFwC_aHRV7jMw6YBV4-uAZcTZrEFVu7eE6_2dulG4,18528
+ygg-0.1.51.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ygg-0.1.51.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
+ygg-0.1.51.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
+ygg-0.1.51.dist-info/RECORD,,

yggdrasil/databricks/sql/statement_result.py CHANGED Viewed

@@ -344,10 +344,17 @@ class StatementResult:
         if self.persisted:
             if self._arrow_table is not None:
                 return self._arrow_table.schema
-            return spark_schema_to_arrow_schema(self._spark_df.schema)
+            elif self._spark_df is not None:
+                return spark_schema_to_arrow_schema(self._spark_df.schema)
+            raise NotImplementedError("")
+        manifest = self.manifest
+        if manifest is None:
+            return pa.schema([])
         fields = [
-            column_info_to_arrow_field(_) for _ in self.manifest.schema.columns
+            column_info_to_arrow_field(_) for _ in manifest.schema.columns
         ]
         return pa.schema(fields)
@@ -362,7 +369,7 @@ class StatementResult:
             An Arrow Table containing all rows.
         """
         if self.persisted:
-            if self._arrow_table:
+            if self._arrow_table is not None:
                 return self._arrow_table
             else:
                 return self._spark_df.toArrow()
@@ -370,7 +377,6 @@ class StatementResult:
         batches = list(self.to_arrow_batches(parallel_pool=parallel_pool))
         if not batches:
-            # empty table with no columns
             return pa.Table.from_batches([], schema=self.arrow_schema())
         return pa.Table.from_batches(batches)
@@ -501,8 +507,9 @@ class StatementResult:
         Returns:
             A Spark DataFrame with the result rows.
         """
-        if self._spark_df:
+        if self._spark_df is not None:
             return self._spark_df
         self._spark_df = arrow_table_to_spark_dataframe(self.to_arrow_table())
         return self._spark_df

yggdrasil/databricks/workspaces/io.py CHANGED Viewed

@@ -13,8 +13,8 @@ from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat
 from .path_kind import DatabricksPathKind
 from ...libs.databrickslib import databricks
-from ...types.cast.pandas_cast import PandasDataFrame
-from ...types.cast.polars_pandas_cast import PolarsDataFrame
+from ...libs.pandaslib import pandas, PandasDataFrame
+from ...libs.polarslib import polars, PolarsDataFrame
 from ...types.cast.registry import convert
 if databricks is not None:
@@ -42,7 +42,6 @@ class DatabricksIO(ABC, IO):
         path: "DatabricksPath",
         mode: str,
         encoding: Optional[str] = None,
-        compression: Optional[str] = "detect",
         position: int = 0,
         buffer: Optional[io.BytesIO] = None,
     ):
@@ -50,7 +49,6 @@ class DatabricksIO(ABC, IO):
         self.encoding = encoding
         self.mode = mode
-        self.compression = compression
         self.path = path
@@ -111,7 +109,6 @@ class DatabricksIO(ABC, IO):
                 path=path,
                 mode=mode,
                 encoding=encoding,
-                compression=compression,
                 position=position,
                 buffer=buffer,
             )
@@ -120,7 +117,6 @@ class DatabricksIO(ABC, IO):
                 path=path,
                 mode=mode,
                 encoding=encoding,
-                compression=compression,
                 position=position,
                 buffer=buffer,
             )
@@ -129,7 +125,6 @@ class DatabricksIO(ABC, IO):
                 path=path,
                 mode=mode,
                 encoding=encoding,
-                compression=compression,
                 position=position,
                 buffer=buffer,
             )
@@ -226,7 +221,6 @@ class DatabricksIO(ABC, IO):
             path=kwargs.get("path", self.path),
             mode=kwargs.get("mode", self.mode),
             encoding=kwargs.get("encoding", self.encoding),
-            compression=kwargs.get("compression", self.compression),
             position=kwargs.get("position", self.position),
             buffer=kwargs.get("buffer", self._buffer),
         )
@@ -264,8 +258,7 @@ class DatabricksIO(ABC, IO):
             None.
         """
         self.flush()
-        if self._buffer is not None:
-            self._buffer.close()
+        self.clear_buffer()
     def fileno(self):
         """Return a pseudo file descriptor based on object hash.
@@ -403,9 +396,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             The read bytes or string depending on mode.
         """
-        if not self.readable():
-            raise IOError("File not open for reading")
         current_position = self.position
         all_data = self.read_all_bytes(use_cache=use_cache)
@@ -431,9 +421,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             The next line as bytes or string.
         """
-        if not self.readable():
-            raise IOError("File not open for reading")
         if self.encoding:
             # Text-mode: accumulate characters
             out_chars = []
@@ -475,9 +462,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             A list of lines.
         """
-        if not self.readable():
-            raise IOError("File not open for reading")
         lines = []
         total = 0
@@ -492,14 +476,6 @@ class DatabricksIO(ABC, IO):
         return lines
-    def appendable(self):
-        """Return True when the file is open in append mode.
-        Returns:
-            True if in append mode.
-        """
-        return "a" in self.mode
     def writable(self):
         """Return True to indicate write support.
@@ -561,9 +537,6 @@ class DatabricksIO(ABC, IO):
         Returns:
             The number of bytes written.
         """
-        if not self.writable():
-            raise IOError("File not open for writing")
         if isinstance(data, str):
             data = data.encode(self.encoding or "utf-8")
@@ -664,8 +637,12 @@ class DatabricksIO(ABC, IO):
             return self.write_polars(table, file_format=file_format, batch_size=batch_size, **kwargs)
         elif isinstance(table, PandasDataFrame):
             return self.write_pandas(table, file_format=file_format, batch_size=batch_size, **kwargs)
-        else:
-            raise ValueError(f"Cannot write {type(table)} to {self.path}")
+        return self.write_arrow(
+            table=table,
+            file_format=file_format,
+            batch_size=batch_size
+        )
     # ---- Arrow ----
@@ -691,14 +668,16 @@ class DatabricksIO(ABC, IO):
         if isinstance(file_format, ParquetFileFormat):
             return pq.read_table(self, **kwargs)
-        if isinstance(file_format, CsvFileFormat):
+        elif isinstance(file_format, CsvFileFormat):
             return pcsv.read_csv(self, parse_options=file_format.parse_options)
-        raise ValueError(f"Unsupported file format for Arrow table: {file_format}")
+        else:
+            ValueError(f"Unsupported file format for Arrow table: {file_format}")
     def write_arrow(
         self,
         table: Union[pa.Table, pa.RecordBatch],
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
@@ -706,6 +685,7 @@ class DatabricksIO(ABC, IO):
         Args:
             table: Arrow table or batch to write.
+            file_format: Optional file format override.
             batch_size: Optional batch size for writes.
             **kwargs: Format-specific options.
@@ -717,6 +697,7 @@ class DatabricksIO(ABC, IO):
         return self.write_arrow_table(
             table=table,
+            file_format=file_format,
             batch_size=batch_size,
             **kwargs
         )
@@ -776,12 +757,14 @@ class DatabricksIO(ABC, IO):
     def read_arrow_batches(
         self,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
         """Yield Arrow record batches from the file.
         Args:
+            file_format: Optional file format override.
             batch_size: Optional batch size for reads.
             **kwargs: Format-specific options.
@@ -790,7 +773,11 @@ class DatabricksIO(ABC, IO):
         """
         return (
             self
-            .read_arrow_table(batch_size=batch_size, **kwargs)
+            .read_arrow_table(
+                file_format=file_format,
+                batch_size=batch_size,
+                **kwargs
+            )
             .to_batches(max_chunksize=batch_size)
         )
@@ -798,23 +785,36 @@ class DatabricksIO(ABC, IO):
     def read_pandas(
         self,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
         """Read the file into a pandas DataFrame.
         Args:
+            file_format: Optional file format override.
             batch_size: Optional batch size for reads.
             **kwargs: Format-specific options.
         Returns:
             A pandas DataFrame with the file contents.
         """
-        return self.read_arrow_table(batch_size=batch_size, **kwargs).to_pandas()
+        file_format = self.path.file_format if file_format is None else file_format
+        self.seek(0)
+        if isinstance(file_format, ParquetFileFormat):
+            return pandas.read_parquet(self, **kwargs)
+        elif isinstance(file_format, CsvFileFormat):
+            return pandas.read_csv(self, **kwargs)
+        else:
+            raise ValueError(f"Unsupported file format for Pandas DataFrame: {file_format}")
     def write_pandas(
         self,
-        df,
+        df: PandasDataFrame,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
@@ -822,13 +822,26 @@ class DatabricksIO(ABC, IO):
         Args:
             df: pandas DataFrame to write.
+            file_format: Optional file format override.
             batch_size: Optional batch size for writes.
             **kwargs: Format-specific options.
         Returns:
             None.
         """
-        self.write_arrow_table(pa.table(df), batch_size=batch_size, **kwargs)
+        file_format = self.path.file_format if file_format is None else FileFormat
+        buffer = io.BytesIO()
+        if isinstance(file_format, ParquetFileFormat):
+            df.to_parquet(buffer, **kwargs)
+        elif isinstance(file_format, CsvFileFormat):
+            df.to_csv(buffer, **kwargs)
+        else:
+            raise ValueError(f"Unsupported file format for Pandas DataFrame: {file_format}")
+        self.write_all_bytes(data=buffer.getvalue())
     # ---- Polars ----
@@ -848,22 +861,21 @@ class DatabricksIO(ABC, IO):
         Returns:
             A polars DataFrame with the file contents.
         """
-        import polars as pl
         file_format = self.path.file_format if file_format is None else file_format
         self.seek(0)
         if isinstance(file_format, ParquetFileFormat):
-            return pl.read_parquet(self, **kwargs)
+            return polars.read_parquet(self, **kwargs)
-        if isinstance(file_format, CsvFileFormat):
-            return pl.read_csv(self, **kwargs)
+        elif isinstance(file_format, CsvFileFormat):
+            return polars.read_csv(self, **kwargs)
-        raise ValueError(f"Unsupported file format for Polars DataFrame: {file_format}")
+        else:
+            raise ValueError(f"Unsupported file format for Polars DataFrame: {file_format}")
     def write_polars(
         self,
-        df,
+        df: PolarsDataFrame,
         file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
@@ -997,7 +1009,7 @@ class DatabricksVolumeIO(DatabricksIO):
             resp = client.download(full_path)
         except Exception as e:
             # Databricks SDK exceptions vary a bit by version; keep it pragmatic.
-            if allow_not_found and any(s in str(e).lower() for s in ("not found", "does not exist", "404")):
+            if allow_not_found and any(s in str(e).lower() for s in ("not found", "not exist", "404")):
                 return b""
             raise

yggdrasil/databricks/workspaces/path.py CHANGED Viewed

@@ -12,17 +12,18 @@ from pathlib import PurePosixPath
 from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Iterable
 import pyarrow as pa
+import pyarrow.dataset as ds
 from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
 from pyarrow.fs import FileInfo, FileType, FileSystem
-import pyarrow.dataset as ds
 from .io import DatabricksIO
 from .path_kind import DatabricksPathKind
 from ...libs.databrickslib import databricks
-from ...types import cast_arrow_tabular, cast_polars_dataframe
+from ...libs.pandaslib import PandasDataFrame
+from ...libs.polarslib import polars, PolarsDataFrame
+from ...types.cast.arrow_cast import cast_arrow_tabular
 from ...types.cast.cast_options import CastOptions
-from ...types.cast.polars_cast import polars_converter
-from ...types.cast.polars_pandas_cast import PolarsDataFrame
+from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
 from ...types.cast.registry import convert, register_converter
 if databricks is not None:
@@ -504,7 +505,7 @@ class DatabricksPath:
                     else None
                 )
-            return self.reset_metadata(is_file=False, is_dir=True, size=info, mtime=mtime)
+            return self.reset_metadata(is_file=False, is_dir=True, size=0, mtime=mtime)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
@@ -639,22 +640,12 @@ class DatabricksPath:
         Returns:
             The DatabricksPath instance.
         """
-        try:
-            if self.kind == DatabricksPathKind.WORKSPACE:
-                self.make_workspace_dir(parents=parents, exist_ok=exist_ok)
-            elif self.kind == DatabricksPathKind.VOLUME:
-                self.make_volume_dir(parents=parents, exist_ok=exist_ok)
-            elif self.kind == DatabricksPathKind.DBFS:
-                self.make_dbfs_dir(parents=parents, exist_ok=exist_ok)
-        except (NotFound, ResourceDoesNotExist):
-            if not parents or self.parent == self:
-                raise
-            self.parent.mkdir(parents=True, exist_ok=True)
-            self.mkdir(parents=False, exist_ok=exist_ok)
-        except (AlreadyExists, ResourceAlreadyExists):
-            if not exist_ok:
-                raise
+        if self.kind == DatabricksPathKind.WORKSPACE:
+            self.make_workspace_dir(parents=parents, exist_ok=exist_ok)
+        elif self.kind == DatabricksPathKind.VOLUME:
+            self.make_volume_dir(parents=parents, exist_ok=exist_ok)
+        elif self.kind == DatabricksPathKind.DBFS:
+            self.make_dbfs_dir(parents=parents, exist_ok=exist_ok)
         return self
@@ -770,15 +761,13 @@ class DatabricksPath:
         Returns:
             The DatabricksPath instance.
         """
-        try:
-            if self.kind == DatabricksPathKind.VOLUME:
-                return self._remove_volume_file()
-            elif self.kind == DatabricksPathKind.WORKSPACE:
-                return self._remove_workspace_file()
-            elif self.kind == DatabricksPathKind.DBFS:
-                return self._remove_dbfs_file()
-        finally:
-            self.reset_metadata()
+        if self.kind == DatabricksPathKind.VOLUME:
+            return self._remove_volume_file()
+        elif self.kind == DatabricksPathKind.WORKSPACE:
+            return self._remove_workspace_file()
+        elif self.kind == DatabricksPathKind.DBFS:
+            return self._remove_dbfs_file()
         return self
     def _remove_volume_file(self):
@@ -787,6 +776,9 @@ class DatabricksPath:
             sdk.files.delete(self.files_full_path())
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
+        finally:
+            self.reset_metadata()
         return self
     def _remove_workspace_file(self):
@@ -795,6 +787,9 @@ class DatabricksPath:
             sdk.workspace.delete(self.workspace_full_path(), recursive=True)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
+        finally:
+            self.reset_metadata()
         return self
     def _remove_dbfs_file(self):
@@ -803,6 +798,9 @@ class DatabricksPath:
             sdk.dbfs.delete(self.dbfs_full_path(), recursive=True)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
+        finally:
+            self.reset_metadata()
         return self
     def rmdir(self, recursive: bool = True):
@@ -827,7 +825,9 @@ class DatabricksPath:
             sdk.workspace.delete(self.workspace_full_path(), recursive=recursive)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
-        self.reset_metadata()
+        finally:
+            self.reset_metadata()
         return self
     def _remove_dbfs_dir(self, recursive: bool = True):
@@ -836,7 +836,9 @@ class DatabricksPath:
             sdk.dbfs.delete(self.dbfs_full_path(), recursive=recursive)
         except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
             pass
-        self.reset_metadata()
+        finally:
+            self.reset_metadata()
         return self
     def _remove_volume_dir(self, recursive: bool = True):
@@ -1042,7 +1044,7 @@ class DatabricksPath:
         Returns:
             None.
         """
-        if self.is_file() and dest.is_file():
+        if self.is_file():
             with self.open(mode="rb") as src:
                 src.copy_to(dest=dest)
@@ -1067,6 +1069,13 @@ class DatabricksPath:
         else:
             raise FileNotFoundError(f"Path {self} does not exist, or dest is not same file or folder type")
+    def write_bytes(self, data: bytes):
+        if hasattr(data, "read"):
+            data = data.read()
+        with self.open("wb") as f:
+            f.write_all_bytes(data=data)
     # -------------------------
     # Data ops (Arrow / Pandas / Polars)
     # -------------------------
@@ -1112,9 +1121,9 @@ class DatabricksPath:
         """
         if self.is_file():
             with self.open("rb") as f:
-                return f.read_arrow_table(batch_size=batch_size, **kwargs)
-        if self.is_dir():
+                data = f.read_arrow_table(batch_size=batch_size, **kwargs)
+            return data
+        elif self.is_dir():
             tables: list[pa.Table] = []
             for child in self.ls(recursive=True):
                 if child.is_file():
@@ -1210,6 +1219,7 @@ class DatabricksPath:
     def read_pandas(
         self,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         concat: bool = True,
         **kwargs
@@ -1217,6 +1227,7 @@ class DatabricksPath:
         """Read the path into a pandas DataFrame.
         Args:
+            file_format: Optional file format override.
             batch_size: Optional batch size for reads.
             concat: Whether to concatenate results for directories.
             **kwargs: Format-specific options.
@@ -1225,14 +1236,26 @@ class DatabricksPath:
             A pandas DataFrame or list of DataFrames if concat=False.
         """
         if concat:
-            return self.read_arrow_table(batch_size=batch_size, concat=True, **kwargs).to_pandas()
+            return self.read_arrow_table(
+                file_format=file_format,
+                batch_size=batch_size,
+                concat=True,
+                **kwargs
+            ).to_pandas()
+        tables = self.read_arrow_table(
+            batch_size=batch_size,
+            file_format=file_format,
+            concat=False,
+            **kwargs
+        )
-        tables = self.read_arrow_table(batch_size=batch_size, concat=False, **kwargs)
         return [t.to_pandas() for t in tables]  # type: ignore[arg-type]
     def write_pandas(
         self,
-        df,
+        df: PandasDataFrame,
+        file_format: Optional[FileFormat] = None,
         batch_size: Optional[int] = None,
         **kwargs
     ):
@@ -1240,13 +1263,41 @@ class DatabricksPath:
         Args:
             df: pandas DataFrame to write.
+            file_format: Optional file format override.
             batch_size: Optional batch size for writes.
             **kwargs: Format-specific options.
         Returns:
             The DatabricksPath instance.
         """
-        return self.write_arrow_table(pa.table(df), batch_size=batch_size, **kwargs)
+        with self.connect(clone=False) as connected:
+            if connected.is_dir_sink():
+                seed = int(time.time() * 1000)
+                def df_batches(pdf, bs: int):
+                    for start in range(0, len(pdf), batch_size):
+                        yield pdf.iloc[start:start + batch_size]
+                for i, batch in enumerate(df_batches(df, batch_size)):
+                    part_path = connected / f"{seed}-{i:05d}-{_rand_str(4)}.parquet"
+                    with part_path.open(mode="wb", clone=False) as f:
+                        f.write_pandas(
+                            batch,
+                            file_format=file_format,
+                            batch_size=batch_size,
+                            **kwargs
+                        )
+            else:
+                with connected.open(mode="wb", clone=False) as f:
+                    f.write_pandas(
+                        df,
+                        file_format=file_format,
+                        batch_size=batch_size,
+                        **kwargs
+                    )
+        return self
     def read_polars(
         self,
@@ -1268,8 +1319,6 @@ class DatabricksPath:
         Returns:
             A polars DataFrame or list of DataFrames if concat=False.
         """
-        import polars as pl
         if self.is_file():
             with self.open("rb") as f:
                 return f.read_polars(batch_size=batch_size, **kwargs)
@@ -1282,10 +1331,10 @@ class DatabricksPath:
                         dfs.append(f.read_polars(batch_size=batch_size, **kwargs))
             if not dfs:
-                return pl.DataFrame()
+                return polars.DataFrame()
             if concat:
-                return pl.concat(dfs, how=how, rechunk=rechunk)
+                return polars.concat(dfs, how=how, rechunk=rechunk)
             return dfs  # type: ignore[return-value]
         raise FileNotFoundError(f"Path does not exist: {self}")
@@ -1316,12 +1365,10 @@ class DatabricksPath:
         Notes:
         - If `df` is a LazyFrame, we collect it first (optionally streaming).
         """
-        import polars as pl
-        if isinstance(df, pl.LazyFrame):
+        if isinstance(df, polars.LazyFrame):
             df = df.collect()
-        if not isinstance(df, pl.DataFrame):
+        if not isinstance(df, polars.DataFrame):
             raise TypeError(f"write_polars expects pl.DataFrame or pl.LazyFrame, got {type(df)!r}")
         with self.connect() as connected:

yggdrasil/libs/pandaslib.py CHANGED Viewed

@@ -3,9 +3,14 @@
 try:
     import pandas  # type: ignore
     pandas = pandas
+    PandasDataFrame = pandas.DataFrame
 except ImportError:
     pandas = None
+    class PandasDataFrame:
+        pass
 def require_pandas():
     """Ensure pandas is available before using pandas helpers.
@@ -23,4 +28,5 @@ def require_pandas():
 __all__ = [
     "pandas",
     "require_pandas",
+    "PandasDataFrame"
 ]

yggdrasil/libs/polarslib.py CHANGED Viewed

@@ -4,13 +4,18 @@ try:
     import polars  # type: ignore
     polars = polars
+    PolarsDataFrame = polars.DataFrame
 except ImportError:
     polars = None
+    class PolarsDataFrame:
+        pass
 __all__ = [
     "polars",
     "require_polars",
+    "PolarsDataFrame"
 ]

yggdrasil/types/cast/polars_cast.py CHANGED Viewed

@@ -15,6 +15,7 @@ from ..python_defaults import default_arrow_scalar
 from ...libs.polarslib import polars
 __all__ = [
+    "polars_converter",
     "cast_polars_array",
     "cast_polars_dataframe",
     "arrow_type_to_polars_type",

yggdrasil/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.49"
1	+ __version__ = "0.1.51"

{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/WHEEL RENAMED Viewed

File without changes

{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ygg-0.1.49.dist-info → ygg-0.1.51.dist-info}/top_level.txt RENAMED Viewed

File without changes

ygg 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl

ygg 0.1.49py3-none-any.whl → 0.1.51py3-none-any.whl