ygg 0.1.57__py3-none-any.whl → 0.1.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/METADATA +2 -2
  2. ygg-0.1.64.dist-info/RECORD +74 -0
  3. yggdrasil/ai/__init__.py +2 -0
  4. yggdrasil/ai/session.py +87 -0
  5. yggdrasil/ai/sql_session.py +310 -0
  6. yggdrasil/databricks/__init__.py +0 -3
  7. yggdrasil/databricks/compute/cluster.py +68 -113
  8. yggdrasil/databricks/compute/command_execution.py +674 -0
  9. yggdrasil/databricks/compute/exceptions.py +19 -0
  10. yggdrasil/databricks/compute/execution_context.py +491 -282
  11. yggdrasil/databricks/compute/remote.py +4 -14
  12. yggdrasil/databricks/exceptions.py +10 -0
  13. yggdrasil/databricks/sql/__init__.py +0 -4
  14. yggdrasil/databricks/sql/engine.py +178 -178
  15. yggdrasil/databricks/sql/exceptions.py +9 -1
  16. yggdrasil/databricks/sql/statement_result.py +108 -120
  17. yggdrasil/databricks/sql/warehouse.py +339 -92
  18. yggdrasil/databricks/workspaces/io.py +185 -40
  19. yggdrasil/databricks/workspaces/path.py +114 -100
  20. yggdrasil/databricks/workspaces/workspace.py +210 -61
  21. yggdrasil/exceptions.py +7 -0
  22. yggdrasil/libs/databrickslib.py +22 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -2
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/modules.py +6 -7
  30. yggdrasil/pyutils/python_env.py +16 -21
  31. yggdrasil/pyutils/waiting_config.py +171 -0
  32. yggdrasil/requests/msal.py +9 -96
  33. yggdrasil/types/cast/arrow_cast.py +3 -0
  34. yggdrasil/types/cast/pandas_cast.py +157 -169
  35. yggdrasil/types/cast/polars_cast.py +11 -43
  36. yggdrasil/types/dummy_class.py +81 -0
  37. yggdrasil/types/file_format.py +6 -2
  38. yggdrasil/types/python_defaults.py +92 -76
  39. yggdrasil/version.py +1 -1
  40. ygg-0.1.57.dist-info/RECORD +0 -66
  41. yggdrasil/databricks/ai/loki.py +0 -53
  42. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/WHEEL +0 -0
  43. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/entry_points.txt +0 -0
  44. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/licenses/LICENSE +0 -0
  45. {ygg-0.1.57.dist-info → ygg-0.1.64.dist-info}/top_level.txt +0 -0
  46. /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
@@ -1,20 +1,19 @@
1
1
  """Databricks path abstraction spanning DBFS, workspace, and volumes."""
2
2
 
3
3
  # src/yggdrasil/databricks/workspaces/databricks_path.py
4
- from __future__ import annotations
5
-
6
4
  import dataclasses
7
5
  import datetime as dt
6
+ import io
8
7
  import random
9
8
  import string
10
9
  import time
11
10
  from pathlib import PurePosixPath
12
- from typing import Optional, Tuple, Union, TYPE_CHECKING, List
11
+ from threading import Thread
12
+ from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Any, IO
13
13
 
14
+ import dill
14
15
  import pyarrow as pa
15
- import pyarrow.dataset as ds
16
16
  from pyarrow import ArrowInvalid
17
- from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
18
17
  from pyarrow.fs import FileInfo, FileType, FileSystem
19
18
 
20
19
  from .io import DatabricksIO
@@ -22,15 +21,13 @@ from .path_kind import DatabricksPathKind
22
21
  from .volumes_path import get_volume_status, get_volume_metadata
23
22
  from ...libs.databrickslib import databricks
24
23
  from ...libs.pandaslib import PandasDataFrame
25
- from ...libs.polarslib import polars, PolarsDataFrame
26
- from ...types.cast.arrow_cast import cast_arrow_tabular
27
- from ...types.cast.cast_options import CastOptions
28
- from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
29
- from ...types.cast.registry import convert, register_converter
30
- from ...types.file_format import ExcelFileFormat
24
+ from ...libs.polarslib import polars
25
+ from ...types.cast.registry import convert
26
+ from ...types.file_format import FileFormat, ExcelFileFormat, ParquetFileFormat, JsonFileFormat, CsvFileFormat
31
27
 
32
28
  if databricks is not None:
33
- from databricks.sdk.service.catalog import VolumeType, PathOperation, VolumeInfo
29
+ from databricks.sdk.errors import InternalError
30
+ from databricks.sdk.service.catalog import VolumeType, VolumeInfo, PathOperation
34
31
  from databricks.sdk.service.workspace import ObjectType
35
32
  from databricks.sdk.errors.platform import (
36
33
  NotFound,
@@ -176,6 +173,8 @@ class DatabricksPath:
176
173
  if not obj:
177
174
  return cls.empty_instance(workspace=workspace)
178
175
 
176
+ if isinstance(obj, str):
177
+ obj = [obj]
179
178
  if not isinstance(obj, (str, list)):
180
179
  if isinstance(obj, DatabricksPath):
181
180
  if workspace is not None and obj._workspace is None:
@@ -191,6 +190,7 @@ class DatabricksPath:
191
190
  obj = str(obj)
192
191
 
193
192
 
193
+
194
194
  obj = _flatten_parts(obj)
195
195
 
196
196
  if obj and not obj[0]:
@@ -246,16 +246,23 @@ class DatabricksPath:
246
246
  if self._workspace is not None:
247
247
  self._workspace.__exit__(exc_type, exc_val, exc_tb)
248
248
 
249
+ self.close(wait=False)
250
+
249
251
  def __str__(self):
250
252
  return self.full_path()
251
253
 
252
254
  def __repr__(self):
253
255
  return self.url()
254
256
 
257
+ def __del__(self):
258
+ self.close(wait=False)
259
+
255
260
  def __fspath__(self):
256
261
  return self.full_path()
257
262
 
258
263
  def url(self):
264
+ if self._workspace is not None:
265
+ return self._workspace.safe_host + self.full_path()
259
266
  return "dbfs://%s" % self.full_path()
260
267
 
261
268
  def full_path(self) -> str:
@@ -282,7 +289,7 @@ class DatabricksPath:
282
289
  Returns:
283
290
  A PyArrow FileSystem instance.
284
291
  """
285
- return self.workspace.filesytem(workspace=workspace)
292
+ return self.workspace.filesystem(workspace=workspace)
286
293
 
287
294
  @property
288
295
  def parent(self):
@@ -496,9 +503,15 @@ class DatabricksPath:
496
503
 
497
504
  return self
498
505
 
499
- def close(self):
506
+ def close(self, wait: bool = True):
500
507
  if self.temporary:
501
- self.remove(recursive=True)
508
+ if wait:
509
+ self.remove(recursive=True)
510
+ else:
511
+ Thread(
512
+ target=self.remove,
513
+ kwargs={"recursive": True}
514
+ ).start()
502
515
 
503
516
  def storage_location(self) -> str:
504
517
  info = self.volume_info()
@@ -586,7 +599,7 @@ class DatabricksPath:
586
599
  mtime = float(info.modified_at) / 1000.0 if info.modified_at is not None else None
587
600
 
588
601
  return self.reset_metadata(is_file=is_file, is_dir=is_dir, size=size, mtime=mtime)
589
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
602
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
590
603
  pass
591
604
 
592
605
  found = next(self.ls(fetch_size=1, recursive=False, allow_not_found=True), None)
@@ -730,7 +743,7 @@ class DatabricksPath:
730
743
  properties=default_tags,
731
744
  comment="Catalog auto generated by yggdrasil"
732
745
  )
733
- except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
746
+ except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest, InternalError):
734
747
  if not exist_ok:
735
748
  raise
736
749
 
@@ -742,7 +755,7 @@ class DatabricksPath:
742
755
  properties=default_tags,
743
756
  comment="Schema auto generated by yggdrasil"
744
757
  )
745
- except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
758
+ except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest, InternalError):
746
759
  if not exist_ok:
747
760
  raise
748
761
 
@@ -808,37 +821,54 @@ class DatabricksPath:
808
821
 
809
822
  def remove(
810
823
  self,
811
- recursive: bool = True
824
+ recursive: bool = True,
825
+ allow_not_found: bool = True
812
826
  ):
813
827
  """Remove the path as a file or directory.
814
828
 
815
829
  Args:
816
830
  recursive: Whether to delete directories recursively.
831
+ allow_not_found: Allow not found path
817
832
 
818
833
  Returns:
819
834
  The DatabricksPath instance.
820
835
  """
821
836
  if self.kind == DatabricksPathKind.VOLUME:
822
- return self._remove_volume_obj(recursive=recursive)
837
+ return self._remove_volume_obj(recursive=recursive, allow_not_found=allow_not_found)
823
838
  elif self.kind == DatabricksPathKind.WORKSPACE:
824
- return self._remove_workspace_obj(recursive=recursive)
839
+ return self._remove_workspace_obj(recursive=recursive, allow_not_found=allow_not_found)
825
840
  elif self.kind == DatabricksPathKind.DBFS:
826
- return self._remove_dbfs_obj(recursive=recursive)
841
+ return self._remove_dbfs_obj(recursive=recursive, allow_not_found=allow_not_found)
827
842
 
828
- def _remove_volume_obj(self, recursive: bool = True):
843
+ def _remove_volume_obj(
844
+ self,
845
+ recursive: bool = True,
846
+ allow_not_found: bool = True
847
+ ):
829
848
  if self.is_file():
830
- return self._remove_volume_file()
831
- return self._remove_volume_dir(recursive=recursive)
849
+ return self._remove_volume_file(allow_not_found=allow_not_found)
850
+ elif self.is_dir():
851
+ return self._remove_volume_dir(recursive=recursive, allow_not_found=allow_not_found)
832
852
 
833
- def _remove_workspace_obj(self, recursive: bool = True):
853
+ def _remove_workspace_obj(
854
+ self,
855
+ recursive: bool = True,
856
+ allow_not_found: bool = True
857
+ ):
834
858
  if self.is_file():
835
- return self._remove_workspace_file()
836
- return self._remove_workspace_dir(recursive=recursive)
859
+ return self._remove_workspace_file(allow_not_found=allow_not_found)
860
+ elif self.is_dir():
861
+ return self._remove_workspace_dir(recursive=recursive, allow_not_found=allow_not_found)
837
862
 
838
- def _remove_dbfs_obj(self, recursive: bool = True):
863
+ def _remove_dbfs_obj(
864
+ self,
865
+ recursive: bool = True,
866
+ allow_not_found: bool = True
867
+ ):
839
868
  if self.is_file():
840
- return self._remove_dbfs_file()
841
- return self._remove_dbfs_dir(recursive=recursive)
869
+ return self._remove_dbfs_file(allow_not_found=allow_not_found)
870
+ elif self.is_dir():
871
+ return self._remove_dbfs_dir(recursive=recursive, allow_not_found=allow_not_found)
842
872
 
843
873
  def rmfile(self, allow_not_found: bool = True):
844
874
  """Remove the path as a file.
@@ -859,7 +889,7 @@ class DatabricksPath:
859
889
  sdk = self.workspace.sdk()
860
890
  try:
861
891
  sdk.files.delete(self.files_full_path())
862
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
892
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
863
893
  if not allow_not_found:
864
894
  raise
865
895
  finally:
@@ -871,7 +901,7 @@ class DatabricksPath:
871
901
  sdk = self.workspace.sdk()
872
902
  try:
873
903
  sdk.workspace.delete(self.workspace_full_path(), recursive=True)
874
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
904
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
875
905
  if not allow_not_found:
876
906
  raise
877
907
  finally:
@@ -883,7 +913,7 @@ class DatabricksPath:
883
913
  sdk = self.workspace.sdk()
884
914
  try:
885
915
  sdk.dbfs.delete(self.dbfs_full_path(), recursive=True)
886
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
916
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
887
917
  if not allow_not_found:
888
918
  raise
889
919
  finally:
@@ -940,7 +970,7 @@ class DatabricksPath:
940
970
 
941
971
  if not with_root:
942
972
  sdk.workspace.mkdirs(full_path)
943
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
973
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
944
974
  if not allow_not_found:
945
975
  raise
946
976
  finally:
@@ -962,7 +992,7 @@ class DatabricksPath:
962
992
 
963
993
  if not with_root:
964
994
  sdk.dbfs.mkdirs(full_path)
965
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
995
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
966
996
  if not allow_not_found:
967
997
  raise
968
998
  finally:
@@ -983,7 +1013,7 @@ class DatabricksPath:
983
1013
  if rel:
984
1014
  try:
985
1015
  sdk.files.delete_directory(full_path)
986
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied) as e:
1016
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError) as e:
987
1017
  message = str(e)
988
1018
 
989
1019
  if recursive and "directory is not empty" in message:
@@ -998,13 +1028,13 @@ class DatabricksPath:
998
1028
  elif volume_name:
999
1029
  try:
1000
1030
  sdk.volumes.delete(f"{catalog_name}.{schema_name}.{volume_name}")
1001
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1031
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1002
1032
  if not allow_not_found:
1003
1033
  raise
1004
1034
  elif schema_name:
1005
1035
  try:
1006
1036
  sdk.schemas.delete(f"{catalog_name}.{schema_name}", force=True)
1007
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1037
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1008
1038
  if not allow_not_found:
1009
1039
  raise
1010
1040
 
@@ -1064,7 +1094,7 @@ class DatabricksPath:
1064
1094
  yield from base._ls_volume(recursive=recursive)
1065
1095
  else:
1066
1096
  yield base
1067
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1097
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1068
1098
  if not allow_not_found:
1069
1099
  raise
1070
1100
  elif schema_name is None:
@@ -1082,7 +1112,7 @@ class DatabricksPath:
1082
1112
  yield from base._ls_volume(recursive=recursive)
1083
1113
  else:
1084
1114
  yield base
1085
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1115
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1086
1116
  if not allow_not_found:
1087
1117
  raise
1088
1118
  else:
@@ -1100,7 +1130,7 @@ class DatabricksPath:
1100
1130
  yield from base._ls_volume(recursive=recursive)
1101
1131
  else:
1102
1132
  yield base
1103
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1133
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1104
1134
  if not allow_not_found:
1105
1135
  raise
1106
1136
  else:
@@ -1121,7 +1151,7 @@ class DatabricksPath:
1121
1151
  yield from base._ls_volume(recursive=recursive)
1122
1152
  else:
1123
1153
  yield base
1124
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1154
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1125
1155
  if not allow_not_found:
1126
1156
  raise
1127
1157
 
@@ -1140,7 +1170,7 @@ class DatabricksPath:
1140
1170
  _is_dir=is_dir,
1141
1171
  _size=info.size,
1142
1172
  )
1143
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1173
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1144
1174
  if not allow_not_found:
1145
1175
  raise
1146
1176
 
@@ -1158,7 +1188,7 @@ class DatabricksPath:
1158
1188
  _is_dir=info.is_dir,
1159
1189
  _size=info.file_size,
1160
1190
  )
1161
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1191
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1162
1192
  if not allow_not_found:
1163
1193
  raise
1164
1194
 
@@ -1225,13 +1255,6 @@ class DatabricksPath:
1225
1255
  else:
1226
1256
  raise FileNotFoundError(f"Path {self} does not exist, or dest is not same file or folder type")
1227
1257
 
1228
- def write_bytes(self, data: bytes):
1229
- if hasattr(data, "read"):
1230
- data = data.read()
1231
-
1232
- with self.open("wb") as f:
1233
- f.write_all_bytes(data=data)
1234
-
1235
1258
  def temporary_credentials(
1236
1259
  self,
1237
1260
  operation: Optional["PathOperation"] = None
@@ -1248,6 +1271,14 @@ class DatabricksPath:
1248
1271
  operation=operation or PathOperation.PATH_READ,
1249
1272
  )
1250
1273
 
1274
+ def read_bytes(self, use_cache: bool = False):
1275
+ with self.open("rb") as f:
1276
+ return f.read_all_bytes(use_cache=use_cache)
1277
+
1278
+ def write_bytes(self, data: Union[bytes, IO[bytes]]):
1279
+ with self.open("wb") as f:
1280
+ f.write_all_bytes(data=data)
1281
+
1251
1282
  # -------------------------
1252
1283
  # Data ops (Arrow / Pandas / Polars)
1253
1284
  # -------------------------
@@ -1267,6 +1298,8 @@ class DatabricksPath:
1267
1298
  Returns:
1268
1299
  A PyArrow Dataset instance.
1269
1300
  """
1301
+ import pyarrow.dataset as ds
1302
+
1270
1303
  filesystem = self.filesystem(workspace=workspace) if filesystem is None else filesystem
1271
1304
 
1272
1305
  return ds.dataset(
@@ -1356,7 +1389,6 @@ class DatabricksPath:
1356
1389
  table: pa.Table,
1357
1390
  file_format: Optional[FileFormat] = None,
1358
1391
  batch_size: Optional[int] = None,
1359
- **kwargs
1360
1392
  ):
1361
1393
  """Write an Arrow table to the path, sharding if needed.
1362
1394
 
@@ -1364,7 +1396,6 @@ class DatabricksPath:
1364
1396
  table: Arrow table to write.
1365
1397
  file_format: Optional file format override.
1366
1398
  batch_size: Optional batch size for writes.
1367
- **kwargs: Format-specific options.
1368
1399
 
1369
1400
  Returns:
1370
1401
  The DatabricksPath instance.
@@ -1377,7 +1408,11 @@ class DatabricksPath:
1377
1408
  part_path = connected / f"{seed}-{i:05d}-{_rand_str(4)}.parquet"
1378
1409
 
1379
1410
  with part_path.open(mode="wb") as f:
1380
- f.write_arrow_batch(batch, file_format=file_format)
1411
+ f.write_arrow_batch(
1412
+ batch,
1413
+ file_format=file_format,
1414
+ batch_size=batch_size,
1415
+ )
1381
1416
 
1382
1417
  return connected
1383
1418
 
@@ -1387,7 +1422,6 @@ class DatabricksPath:
1387
1422
  table,
1388
1423
  file_format=file_format,
1389
1424
  batch_size=batch_size,
1390
- **kwargs
1391
1425
  )
1392
1426
 
1393
1427
  return self
@@ -1397,7 +1431,6 @@ class DatabricksPath:
1397
1431
  file_format: Optional[FileFormat] = None,
1398
1432
  batch_size: Optional[int] = None,
1399
1433
  concat: bool = True,
1400
- **kwargs
1401
1434
  ):
1402
1435
  """Read the path into a pandas DataFrame.
1403
1436
 
@@ -1405,7 +1438,6 @@ class DatabricksPath:
1405
1438
  file_format: Optional file format override.
1406
1439
  batch_size: Optional batch size for reads.
1407
1440
  concat: Whether to concatenate results for directories.
1408
- **kwargs: Format-specific options.
1409
1441
 
1410
1442
  Returns:
1411
1443
  A pandas DataFrame or list of DataFrames if concat=False.
@@ -1415,14 +1447,12 @@ class DatabricksPath:
1415
1447
  file_format=file_format,
1416
1448
  batch_size=batch_size,
1417
1449
  concat=True,
1418
- **kwargs
1419
1450
  ).to_pandas()
1420
1451
 
1421
1452
  tables = self.read_arrow_table(
1422
1453
  batch_size=batch_size,
1423
1454
  file_format=file_format,
1424
1455
  concat=False,
1425
- **kwargs
1426
1456
  )
1427
1457
 
1428
1458
  return [t.to_pandas() for t in tables] # type: ignore[arg-type]
@@ -1432,7 +1462,6 @@ class DatabricksPath:
1432
1462
  df: PandasDataFrame,
1433
1463
  file_format: Optional[FileFormat] = None,
1434
1464
  batch_size: Optional[int] = None,
1435
- **kwargs
1436
1465
  ):
1437
1466
  """Write a pandas DataFrame to the path.
1438
1467
 
@@ -1440,7 +1469,6 @@ class DatabricksPath:
1440
1469
  df: pandas DataFrame to write.
1441
1470
  file_format: Optional file format override.
1442
1471
  batch_size: Optional batch size for writes.
1443
- **kwargs: Format-specific options.
1444
1472
 
1445
1473
  Returns:
1446
1474
  The DatabricksPath instance.
@@ -1461,7 +1489,6 @@ class DatabricksPath:
1461
1489
  batch,
1462
1490
  file_format=file_format,
1463
1491
  batch_size=batch_size,
1464
- **kwargs
1465
1492
  )
1466
1493
  else:
1467
1494
  with connected.open(mode="wb", clone=False) as f:
@@ -1469,7 +1496,6 @@ class DatabricksPath:
1469
1496
  df,
1470
1497
  file_format=file_format,
1471
1498
  batch_size=batch_size,
1472
- **kwargs
1473
1499
  )
1474
1500
 
1475
1501
  return self
@@ -1521,7 +1547,6 @@ class DatabricksPath:
1521
1547
  df,
1522
1548
  file_format: Optional[FileFormat] = None,
1523
1549
  batch_size: Optional[int] = None,
1524
- **kwargs
1525
1550
  ):
1526
1551
  """
1527
1552
  Write Polars to a DatabricksPath.
@@ -1536,7 +1561,6 @@ class DatabricksPath:
1536
1561
  df: polars DataFrame or LazyFrame to write.
1537
1562
  file_format: Optional file format override.
1538
1563
  batch_size: Optional rows per part for directory sinks.
1539
- **kwargs: Format-specific options.
1540
1564
 
1541
1565
  Returns:
1542
1566
  The DatabricksPath instance.
@@ -1550,7 +1574,7 @@ class DatabricksPath:
1550
1574
  with self.connect() as connected:
1551
1575
  if connected.is_dir_sink():
1552
1576
  seed = int(time.time() * 1000)
1553
- rows_per_part = batch_size or 1_000_000
1577
+ rows_per_part = batch_size or 1024 * 1024
1554
1578
 
1555
1579
  # Always parquet for directory sinks (lake layout standard)
1556
1580
  for i, chunk in enumerate(df.iter_slices(n_rows=rows_per_part)):
@@ -1561,7 +1585,6 @@ class DatabricksPath:
1561
1585
  df,
1562
1586
  file_format=file_format,
1563
1587
  batch_size=batch_size,
1564
- **kwargs
1565
1588
  )
1566
1589
  else:
1567
1590
  with connected.open(mode="wb", clone=False) as f:
@@ -1569,11 +1592,33 @@ class DatabricksPath:
1569
1592
  df,
1570
1593
  file_format=file_format,
1571
1594
  batch_size=batch_size,
1572
- **kwargs
1573
1595
  )
1574
1596
 
1575
1597
  return self
1576
1598
 
1599
+ def read_pickle(
1600
+ self,
1601
+ ) -> Any:
1602
+ content = self.read_bytes()
1603
+ obj = dill.loads(content)
1604
+
1605
+ return obj
1606
+
1607
+ def write_pickle(
1608
+ self,
1609
+ obj: Any,
1610
+ file_format: Optional[FileFormat] = None,
1611
+ ):
1612
+ buffer = io.BytesIO()
1613
+
1614
+ if isinstance(obj, PandasDataFrame):
1615
+ obj.to_pickle(buffer)
1616
+ else:
1617
+ buffer.write(dill.dumps(obj))
1618
+
1619
+ self.write_bytes(data=buffer.getvalue())
1620
+
1621
+
1577
1622
  def sql(
1578
1623
  self,
1579
1624
  query: str,
@@ -1634,34 +1679,3 @@ class DatabricksPath:
1634
1679
  raise ValueError(
1635
1680
  "Invalid engine %s, must be in duckdb, polars" % engine
1636
1681
  )
1637
-
1638
-
1639
- if databricks is not None:
1640
- @register_converter(DatabricksPath, pa.Table)
1641
- def databricks_path_to_arrow_table(
1642
- data: DatabricksPath,
1643
- options: Optional[CastOptions] = None,
1644
- ) -> pa.Table:
1645
- return cast_arrow_tabular(
1646
- data.read_arrow_table(),
1647
- options
1648
- )
1649
-
1650
-
1651
- @register_converter(DatabricksPath, ds.Dataset)
1652
- def databricks_path_to_arrow_table(
1653
- data: DatabricksPath,
1654
- options: Optional[CastOptions] = None,
1655
- ) -> ds.Dataset:
1656
- return data.arrow_dataset()
1657
-
1658
-
1659
- @polars_converter(DatabricksPath, PolarsDataFrame)
1660
- def databricks_path_to_polars(
1661
- data: DatabricksPath,
1662
- options: Optional[CastOptions] = None,
1663
- ) -> PolarsDataFrame:
1664
- return cast_polars_dataframe(
1665
- data.read_polars(),
1666
- options
1667
- )