ygg 0.1.57__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. yggdrasil/ai/__init__.py +2 -0
  4. yggdrasil/ai/session.py +89 -0
  5. yggdrasil/ai/sql_session.py +310 -0
  6. yggdrasil/databricks/__init__.py +0 -3
  7. yggdrasil/databricks/compute/cluster.py +68 -113
  8. yggdrasil/databricks/compute/command_execution.py +674 -0
  9. yggdrasil/databricks/compute/exceptions.py +19 -0
  10. yggdrasil/databricks/compute/execution_context.py +491 -282
  11. yggdrasil/databricks/compute/remote.py +4 -14
  12. yggdrasil/databricks/exceptions.py +10 -0
  13. yggdrasil/databricks/sql/__init__.py +0 -4
  14. yggdrasil/databricks/sql/engine.py +161 -173
  15. yggdrasil/databricks/sql/exceptions.py +9 -1
  16. yggdrasil/databricks/sql/statement_result.py +108 -120
  17. yggdrasil/databricks/sql/warehouse.py +331 -92
  18. yggdrasil/databricks/workspaces/io.py +89 -9
  19. yggdrasil/databricks/workspaces/path.py +120 -72
  20. yggdrasil/databricks/workspaces/workspace.py +214 -61
  21. yggdrasil/exceptions.py +7 -0
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -2
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/python_env.py +14 -13
  30. yggdrasil/pyutils/waiting_config.py +171 -0
  31. yggdrasil/types/cast/arrow_cast.py +3 -0
  32. yggdrasil/types/cast/pandas_cast.py +157 -169
  33. yggdrasil/types/cast/polars_cast.py +11 -43
  34. yggdrasil/types/dummy_class.py +81 -0
  35. yggdrasil/version.py +1 -1
  36. ygg-0.1.57.dist-info/RECORD +0 -66
  37. yggdrasil/databricks/ai/loki.py +0 -53
  38. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/WHEEL +0 -0
  39. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  40. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  41. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
  42. /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
@@ -5,12 +5,15 @@ from __future__ import annotations
5
5
 
6
6
  import dataclasses
7
7
  import datetime as dt
8
+ import io
8
9
  import random
9
10
  import string
10
11
  import time
11
12
  from pathlib import PurePosixPath
12
- from typing import Optional, Tuple, Union, TYPE_CHECKING, List
13
+ from threading import Thread
14
+ from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Any, IO
13
15
 
16
+ import dill
14
17
  import pyarrow as pa
15
18
  import pyarrow.dataset as ds
16
19
  from pyarrow import ArrowInvalid
@@ -23,14 +26,15 @@ from .volumes_path import get_volume_status, get_volume_metadata
23
26
  from ...libs.databrickslib import databricks
24
27
  from ...libs.pandaslib import PandasDataFrame
25
28
  from ...libs.polarslib import polars, PolarsDataFrame
26
- from ...types.cast.arrow_cast import cast_arrow_tabular
27
29
  from ...types.cast.cast_options import CastOptions
30
+ from ...types.cast.pandas_cast import pandas_converter, cast_pandas_dataframe
28
31
  from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
29
32
  from ...types.cast.registry import convert, register_converter
30
33
  from ...types.file_format import ExcelFileFormat
31
34
 
32
35
  if databricks is not None:
33
- from databricks.sdk.service.catalog import VolumeType, PathOperation, VolumeInfo
36
+ from databricks.sdk.errors import InternalError
37
+ from databricks.sdk.service.catalog import VolumeType, VolumeInfo, PathOperation
34
38
  from databricks.sdk.service.workspace import ObjectType
35
39
  from databricks.sdk.errors.platform import (
36
40
  NotFound,
@@ -176,6 +180,8 @@ class DatabricksPath:
176
180
  if not obj:
177
181
  return cls.empty_instance(workspace=workspace)
178
182
 
183
+ if isinstance(obj, str):
184
+ obj = [obj]
179
185
  if not isinstance(obj, (str, list)):
180
186
  if isinstance(obj, DatabricksPath):
181
187
  if workspace is not None and obj._workspace is None:
@@ -191,6 +197,7 @@ class DatabricksPath:
191
197
  obj = str(obj)
192
198
 
193
199
 
200
+
194
201
  obj = _flatten_parts(obj)
195
202
 
196
203
  if obj and not obj[0]:
@@ -246,16 +253,23 @@ class DatabricksPath:
246
253
  if self._workspace is not None:
247
254
  self._workspace.__exit__(exc_type, exc_val, exc_tb)
248
255
 
256
+ self.close(wait=False)
257
+
249
258
  def __str__(self):
250
259
  return self.full_path()
251
260
 
252
261
  def __repr__(self):
253
262
  return self.url()
254
263
 
264
+ def __del__(self):
265
+ self.close(wait=False)
266
+
255
267
  def __fspath__(self):
256
268
  return self.full_path()
257
269
 
258
270
  def url(self):
271
+ if self._workspace is not None:
272
+ return self._workspace.safe_host + self.full_path()
259
273
  return "dbfs://%s" % self.full_path()
260
274
 
261
275
  def full_path(self) -> str:
@@ -282,7 +296,7 @@ class DatabricksPath:
282
296
  Returns:
283
297
  A PyArrow FileSystem instance.
284
298
  """
285
- return self.workspace.filesytem(workspace=workspace)
299
+ return self.workspace.filesystem(workspace=workspace)
286
300
 
287
301
  @property
288
302
  def parent(self):
@@ -496,9 +510,15 @@ class DatabricksPath:
496
510
 
497
511
  return self
498
512
 
499
- def close(self):
513
+ def close(self, wait: bool = True):
500
514
  if self.temporary:
501
- self.remove(recursive=True)
515
+ if wait:
516
+ self.remove(recursive=True)
517
+ else:
518
+ Thread(
519
+ target=self.remove,
520
+ kwargs={"recursive": True}
521
+ ).start()
502
522
 
503
523
  def storage_location(self) -> str:
504
524
  info = self.volume_info()
@@ -586,7 +606,7 @@ class DatabricksPath:
586
606
  mtime = float(info.modified_at) / 1000.0 if info.modified_at is not None else None
587
607
 
588
608
  return self.reset_metadata(is_file=is_file, is_dir=is_dir, size=size, mtime=mtime)
589
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
609
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
590
610
  pass
591
611
 
592
612
  found = next(self.ls(fetch_size=1, recursive=False, allow_not_found=True), None)
@@ -730,7 +750,7 @@ class DatabricksPath:
730
750
  properties=default_tags,
731
751
  comment="Catalog auto generated by yggdrasil"
732
752
  )
733
- except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
753
+ except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest, InternalError):
734
754
  if not exist_ok:
735
755
  raise
736
756
 
@@ -742,7 +762,7 @@ class DatabricksPath:
742
762
  properties=default_tags,
743
763
  comment="Schema auto generated by yggdrasil"
744
764
  )
745
- except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
765
+ except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest, InternalError):
746
766
  if not exist_ok:
747
767
  raise
748
768
 
@@ -808,37 +828,54 @@ class DatabricksPath:
808
828
 
809
829
  def remove(
810
830
  self,
811
- recursive: bool = True
831
+ recursive: bool = True,
832
+ allow_not_found: bool = True
812
833
  ):
813
834
  """Remove the path as a file or directory.
814
835
 
815
836
  Args:
816
837
  recursive: Whether to delete directories recursively.
838
+ allow_not_found: Allow not found path
817
839
 
818
840
  Returns:
819
841
  The DatabricksPath instance.
820
842
  """
821
843
  if self.kind == DatabricksPathKind.VOLUME:
822
- return self._remove_volume_obj(recursive=recursive)
844
+ return self._remove_volume_obj(recursive=recursive, allow_not_found=allow_not_found)
823
845
  elif self.kind == DatabricksPathKind.WORKSPACE:
824
- return self._remove_workspace_obj(recursive=recursive)
846
+ return self._remove_workspace_obj(recursive=recursive, allow_not_found=allow_not_found)
825
847
  elif self.kind == DatabricksPathKind.DBFS:
826
- return self._remove_dbfs_obj(recursive=recursive)
848
+ return self._remove_dbfs_obj(recursive=recursive, allow_not_found=allow_not_found)
827
849
 
828
- def _remove_volume_obj(self, recursive: bool = True):
850
+ def _remove_volume_obj(
851
+ self,
852
+ recursive: bool = True,
853
+ allow_not_found: bool = True
854
+ ):
829
855
  if self.is_file():
830
- return self._remove_volume_file()
831
- return self._remove_volume_dir(recursive=recursive)
856
+ return self._remove_volume_file(allow_not_found=allow_not_found)
857
+ elif self.is_dir():
858
+ return self._remove_volume_dir(recursive=recursive, allow_not_found=allow_not_found)
832
859
 
833
- def _remove_workspace_obj(self, recursive: bool = True):
860
+ def _remove_workspace_obj(
861
+ self,
862
+ recursive: bool = True,
863
+ allow_not_found: bool = True
864
+ ):
834
865
  if self.is_file():
835
- return self._remove_workspace_file()
836
- return self._remove_workspace_dir(recursive=recursive)
866
+ return self._remove_workspace_file(allow_not_found=allow_not_found)
867
+ elif self.is_dir():
868
+ return self._remove_workspace_dir(recursive=recursive, allow_not_found=allow_not_found)
837
869
 
838
- def _remove_dbfs_obj(self, recursive: bool = True):
870
+ def _remove_dbfs_obj(
871
+ self,
872
+ recursive: bool = True,
873
+ allow_not_found: bool = True
874
+ ):
839
875
  if self.is_file():
840
- return self._remove_dbfs_file()
841
- return self._remove_dbfs_dir(recursive=recursive)
876
+ return self._remove_dbfs_file(allow_not_found=allow_not_found)
877
+ elif self.is_dir():
878
+ return self._remove_dbfs_dir(recursive=recursive, allow_not_found=allow_not_found)
842
879
 
843
880
  def rmfile(self, allow_not_found: bool = True):
844
881
  """Remove the path as a file.
@@ -859,7 +896,7 @@ class DatabricksPath:
859
896
  sdk = self.workspace.sdk()
860
897
  try:
861
898
  sdk.files.delete(self.files_full_path())
862
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
899
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
863
900
  if not allow_not_found:
864
901
  raise
865
902
  finally:
@@ -871,7 +908,7 @@ class DatabricksPath:
871
908
  sdk = self.workspace.sdk()
872
909
  try:
873
910
  sdk.workspace.delete(self.workspace_full_path(), recursive=True)
874
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
911
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
875
912
  if not allow_not_found:
876
913
  raise
877
914
  finally:
@@ -883,7 +920,7 @@ class DatabricksPath:
883
920
  sdk = self.workspace.sdk()
884
921
  try:
885
922
  sdk.dbfs.delete(self.dbfs_full_path(), recursive=True)
886
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
923
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
887
924
  if not allow_not_found:
888
925
  raise
889
926
  finally:
@@ -940,7 +977,7 @@ class DatabricksPath:
940
977
 
941
978
  if not with_root:
942
979
  sdk.workspace.mkdirs(full_path)
943
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
980
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
944
981
  if not allow_not_found:
945
982
  raise
946
983
  finally:
@@ -962,7 +999,7 @@ class DatabricksPath:
962
999
 
963
1000
  if not with_root:
964
1001
  sdk.dbfs.mkdirs(full_path)
965
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1002
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
966
1003
  if not allow_not_found:
967
1004
  raise
968
1005
  finally:
@@ -983,7 +1020,7 @@ class DatabricksPath:
983
1020
  if rel:
984
1021
  try:
985
1022
  sdk.files.delete_directory(full_path)
986
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied) as e:
1023
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError) as e:
987
1024
  message = str(e)
988
1025
 
989
1026
  if recursive and "directory is not empty" in message:
@@ -998,13 +1035,13 @@ class DatabricksPath:
998
1035
  elif volume_name:
999
1036
  try:
1000
1037
  sdk.volumes.delete(f"{catalog_name}.{schema_name}.{volume_name}")
1001
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1038
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1002
1039
  if not allow_not_found:
1003
1040
  raise
1004
1041
  elif schema_name:
1005
1042
  try:
1006
1043
  sdk.schemas.delete(f"{catalog_name}.{schema_name}", force=True)
1007
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1044
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1008
1045
  if not allow_not_found:
1009
1046
  raise
1010
1047
 
@@ -1064,7 +1101,7 @@ class DatabricksPath:
1064
1101
  yield from base._ls_volume(recursive=recursive)
1065
1102
  else:
1066
1103
  yield base
1067
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1104
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1068
1105
  if not allow_not_found:
1069
1106
  raise
1070
1107
  elif schema_name is None:
@@ -1082,7 +1119,7 @@ class DatabricksPath:
1082
1119
  yield from base._ls_volume(recursive=recursive)
1083
1120
  else:
1084
1121
  yield base
1085
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1122
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1086
1123
  if not allow_not_found:
1087
1124
  raise
1088
1125
  else:
@@ -1100,7 +1137,7 @@ class DatabricksPath:
1100
1137
  yield from base._ls_volume(recursive=recursive)
1101
1138
  else:
1102
1139
  yield base
1103
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1140
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1104
1141
  if not allow_not_found:
1105
1142
  raise
1106
1143
  else:
@@ -1121,7 +1158,7 @@ class DatabricksPath:
1121
1158
  yield from base._ls_volume(recursive=recursive)
1122
1159
  else:
1123
1160
  yield base
1124
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1161
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1125
1162
  if not allow_not_found:
1126
1163
  raise
1127
1164
 
@@ -1140,7 +1177,7 @@ class DatabricksPath:
1140
1177
  _is_dir=is_dir,
1141
1178
  _size=info.size,
1142
1179
  )
1143
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1180
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1144
1181
  if not allow_not_found:
1145
1182
  raise
1146
1183
 
@@ -1158,7 +1195,7 @@ class DatabricksPath:
1158
1195
  _is_dir=info.is_dir,
1159
1196
  _size=info.file_size,
1160
1197
  )
1161
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1198
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1162
1199
  if not allow_not_found:
1163
1200
  raise
1164
1201
 
@@ -1225,13 +1262,6 @@ class DatabricksPath:
1225
1262
  else:
1226
1263
  raise FileNotFoundError(f"Path {self} does not exist, or dest is not same file or folder type")
1227
1264
 
1228
- def write_bytes(self, data: bytes):
1229
- if hasattr(data, "read"):
1230
- data = data.read()
1231
-
1232
- with self.open("wb") as f:
1233
- f.write_all_bytes(data=data)
1234
-
1235
1265
  def temporary_credentials(
1236
1266
  self,
1237
1267
  operation: Optional["PathOperation"] = None
@@ -1248,6 +1278,14 @@ class DatabricksPath:
1248
1278
  operation=operation or PathOperation.PATH_READ,
1249
1279
  )
1250
1280
 
1281
+ def read_bytes(self, use_cache: bool = False):
1282
+ with self.open("rb") as f:
1283
+ return f.read_all_bytes(use_cache=use_cache)
1284
+
1285
+ def write_bytes(self, data: Union[bytes, IO[bytes]]):
1286
+ with self.open("wb") as f:
1287
+ f.write_all_bytes(data=data)
1288
+
1251
1289
  # -------------------------
1252
1290
  # Data ops (Arrow / Pandas / Polars)
1253
1291
  # -------------------------
@@ -1356,7 +1394,6 @@ class DatabricksPath:
1356
1394
  table: pa.Table,
1357
1395
  file_format: Optional[FileFormat] = None,
1358
1396
  batch_size: Optional[int] = None,
1359
- **kwargs
1360
1397
  ):
1361
1398
  """Write an Arrow table to the path, sharding if needed.
1362
1399
 
@@ -1364,7 +1401,6 @@ class DatabricksPath:
1364
1401
  table: Arrow table to write.
1365
1402
  file_format: Optional file format override.
1366
1403
  batch_size: Optional batch size for writes.
1367
- **kwargs: Format-specific options.
1368
1404
 
1369
1405
  Returns:
1370
1406
  The DatabricksPath instance.
@@ -1377,7 +1413,11 @@ class DatabricksPath:
1377
1413
  part_path = connected / f"{seed}-{i:05d}-{_rand_str(4)}.parquet"
1378
1414
 
1379
1415
  with part_path.open(mode="wb") as f:
1380
- f.write_arrow_batch(batch, file_format=file_format)
1416
+ f.write_arrow_batch(
1417
+ batch,
1418
+ file_format=file_format,
1419
+ batch_size=batch_size,
1420
+ )
1381
1421
 
1382
1422
  return connected
1383
1423
 
@@ -1387,7 +1427,6 @@ class DatabricksPath:
1387
1427
  table,
1388
1428
  file_format=file_format,
1389
1429
  batch_size=batch_size,
1390
- **kwargs
1391
1430
  )
1392
1431
 
1393
1432
  return self
@@ -1397,7 +1436,6 @@ class DatabricksPath:
1397
1436
  file_format: Optional[FileFormat] = None,
1398
1437
  batch_size: Optional[int] = None,
1399
1438
  concat: bool = True,
1400
- **kwargs
1401
1439
  ):
1402
1440
  """Read the path into a pandas DataFrame.
1403
1441
 
@@ -1405,7 +1443,6 @@ class DatabricksPath:
1405
1443
  file_format: Optional file format override.
1406
1444
  batch_size: Optional batch size for reads.
1407
1445
  concat: Whether to concatenate results for directories.
1408
- **kwargs: Format-specific options.
1409
1446
 
1410
1447
  Returns:
1411
1448
  A pandas DataFrame or list of DataFrames if concat=False.
@@ -1415,14 +1452,12 @@ class DatabricksPath:
1415
1452
  file_format=file_format,
1416
1453
  batch_size=batch_size,
1417
1454
  concat=True,
1418
- **kwargs
1419
1455
  ).to_pandas()
1420
1456
 
1421
1457
  tables = self.read_arrow_table(
1422
1458
  batch_size=batch_size,
1423
1459
  file_format=file_format,
1424
1460
  concat=False,
1425
- **kwargs
1426
1461
  )
1427
1462
 
1428
1463
  return [t.to_pandas() for t in tables] # type: ignore[arg-type]
@@ -1432,7 +1467,6 @@ class DatabricksPath:
1432
1467
  df: PandasDataFrame,
1433
1468
  file_format: Optional[FileFormat] = None,
1434
1469
  batch_size: Optional[int] = None,
1435
- **kwargs
1436
1470
  ):
1437
1471
  """Write a pandas DataFrame to the path.
1438
1472
 
@@ -1440,7 +1474,6 @@ class DatabricksPath:
1440
1474
  df: pandas DataFrame to write.
1441
1475
  file_format: Optional file format override.
1442
1476
  batch_size: Optional batch size for writes.
1443
- **kwargs: Format-specific options.
1444
1477
 
1445
1478
  Returns:
1446
1479
  The DatabricksPath instance.
@@ -1461,7 +1494,6 @@ class DatabricksPath:
1461
1494
  batch,
1462
1495
  file_format=file_format,
1463
1496
  batch_size=batch_size,
1464
- **kwargs
1465
1497
  )
1466
1498
  else:
1467
1499
  with connected.open(mode="wb", clone=False) as f:
@@ -1469,7 +1501,6 @@ class DatabricksPath:
1469
1501
  df,
1470
1502
  file_format=file_format,
1471
1503
  batch_size=batch_size,
1472
- **kwargs
1473
1504
  )
1474
1505
 
1475
1506
  return self
@@ -1521,7 +1552,6 @@ class DatabricksPath:
1521
1552
  df,
1522
1553
  file_format: Optional[FileFormat] = None,
1523
1554
  batch_size: Optional[int] = None,
1524
- **kwargs
1525
1555
  ):
1526
1556
  """
1527
1557
  Write Polars to a DatabricksPath.
@@ -1536,7 +1566,6 @@ class DatabricksPath:
1536
1566
  df: polars DataFrame or LazyFrame to write.
1537
1567
  file_format: Optional file format override.
1538
1568
  batch_size: Optional rows per part for directory sinks.
1539
- **kwargs: Format-specific options.
1540
1569
 
1541
1570
  Returns:
1542
1571
  The DatabricksPath instance.
@@ -1550,7 +1579,7 @@ class DatabricksPath:
1550
1579
  with self.connect() as connected:
1551
1580
  if connected.is_dir_sink():
1552
1581
  seed = int(time.time() * 1000)
1553
- rows_per_part = batch_size or 1_000_000
1582
+ rows_per_part = batch_size or 1024 * 1024
1554
1583
 
1555
1584
  # Always parquet for directory sinks (lake layout standard)
1556
1585
  for i, chunk in enumerate(df.iter_slices(n_rows=rows_per_part)):
@@ -1561,7 +1590,6 @@ class DatabricksPath:
1561
1590
  df,
1562
1591
  file_format=file_format,
1563
1592
  batch_size=batch_size,
1564
- **kwargs
1565
1593
  )
1566
1594
  else:
1567
1595
  with connected.open(mode="wb", clone=False) as f:
@@ -1569,11 +1597,33 @@ class DatabricksPath:
1569
1597
  df,
1570
1598
  file_format=file_format,
1571
1599
  batch_size=batch_size,
1572
- **kwargs
1573
1600
  )
1574
1601
 
1575
1602
  return self
1576
1603
 
1604
+ def read_pickle(
1605
+ self,
1606
+ ) -> Any:
1607
+ content = self.read_bytes()
1608
+ obj = dill.loads(content)
1609
+
1610
+ return obj
1611
+
1612
+ def write_pickle(
1613
+ self,
1614
+ obj: Any,
1615
+ file_format: Optional[FileFormat] = None,
1616
+ ):
1617
+ buffer = io.BytesIO()
1618
+
1619
+ if isinstance(obj, PandasDataFrame):
1620
+ obj.to_pickle(buffer)
1621
+ else:
1622
+ buffer.write(dill.dumps(obj))
1623
+
1624
+ self.write_bytes(data=buffer.getvalue())
1625
+
1626
+
1577
1627
  def sql(
1578
1628
  self,
1579
1629
  query: str,
@@ -1635,19 +1685,7 @@ class DatabricksPath:
1635
1685
  "Invalid engine %s, must be in duckdb, polars" % engine
1636
1686
  )
1637
1687
 
1638
-
1639
1688
  if databricks is not None:
1640
- @register_converter(DatabricksPath, pa.Table)
1641
- def databricks_path_to_arrow_table(
1642
- data: DatabricksPath,
1643
- options: Optional[CastOptions] = None,
1644
- ) -> pa.Table:
1645
- return cast_arrow_tabular(
1646
- data.read_arrow_table(),
1647
- options
1648
- )
1649
-
1650
-
1651
1689
  @register_converter(DatabricksPath, ds.Dataset)
1652
1690
  def databricks_path_to_arrow_table(
1653
1691
  data: DatabricksPath,
@@ -1656,6 +1694,16 @@ if databricks is not None:
1656
1694
  return data.arrow_dataset()
1657
1695
 
1658
1696
 
1697
+ @pandas_converter(DatabricksPath, PandasDataFrame)
1698
+ def databricks_path_to_pandas(
1699
+ data: DatabricksPath,
1700
+ options: Optional[CastOptions] = None,
1701
+ ) -> PolarsDataFrame:
1702
+ return cast_pandas_dataframe(
1703
+ data.read_pandas(),
1704
+ options
1705
+ )
1706
+
1659
1707
  @polars_converter(DatabricksPath, PolarsDataFrame)
1660
1708
  def databricks_path_to_polars(
1661
1709
  data: DatabricksPath,