ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
  4. yggdrasil/ai/__init__.py +2 -0
  5. yggdrasil/ai/session.py +89 -0
  6. yggdrasil/ai/sql_session.py +310 -0
  7. yggdrasil/databricks/__init__.py +0 -3
  8. yggdrasil/databricks/compute/cluster.py +68 -113
  9. yggdrasil/databricks/compute/command_execution.py +674 -0
  10. yggdrasil/databricks/compute/exceptions.py +7 -2
  11. yggdrasil/databricks/compute/execution_context.py +465 -277
  12. yggdrasil/databricks/compute/remote.py +4 -14
  13. yggdrasil/databricks/exceptions.py +10 -0
  14. yggdrasil/databricks/sql/__init__.py +0 -4
  15. yggdrasil/databricks/sql/engine.py +161 -173
  16. yggdrasil/databricks/sql/exceptions.py +9 -1
  17. yggdrasil/databricks/sql/statement_result.py +108 -120
  18. yggdrasil/databricks/sql/warehouse.py +331 -92
  19. yggdrasil/databricks/workspaces/io.py +92 -9
  20. yggdrasil/databricks/workspaces/path.py +120 -74
  21. yggdrasil/databricks/workspaces/workspace.py +212 -68
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -0
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/mimetypes.py +0 -0
  30. yggdrasil/pyutils/python_env.py +13 -12
  31. yggdrasil/pyutils/waiting_config.py +171 -0
  32. yggdrasil/types/cast/arrow_cast.py +3 -0
  33. yggdrasil/types/cast/pandas_cast.py +157 -169
  34. yggdrasil/types/cast/polars_cast.py +11 -43
  35. yggdrasil/types/dummy_class.py +81 -0
  36. yggdrasil/version.py +1 -1
  37. ygg-0.1.56.dist-info/RECORD +0 -68
  38. yggdrasil/databricks/ai/__init__.py +0 -1
  39. yggdrasil/databricks/ai/loki.py +0 -374
  40. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  41. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  42. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
@@ -5,12 +5,15 @@ from __future__ import annotations
5
5
 
6
6
  import dataclasses
7
7
  import datetime as dt
8
+ import io
8
9
  import random
9
10
  import string
10
11
  import time
11
12
  from pathlib import PurePosixPath
12
- from typing import Optional, Tuple, Union, TYPE_CHECKING, List
13
+ from threading import Thread
14
+ from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Any, IO
13
15
 
16
+ import dill
14
17
  import pyarrow as pa
15
18
  import pyarrow.dataset as ds
16
19
  from pyarrow import ArrowInvalid
@@ -23,14 +26,15 @@ from .volumes_path import get_volume_status, get_volume_metadata
23
26
  from ...libs.databrickslib import databricks
24
27
  from ...libs.pandaslib import PandasDataFrame
25
28
  from ...libs.polarslib import polars, PolarsDataFrame
26
- from ...types.cast.arrow_cast import cast_arrow_tabular
27
29
  from ...types.cast.cast_options import CastOptions
30
+ from ...types.cast.pandas_cast import pandas_converter, cast_pandas_dataframe
28
31
  from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
29
32
  from ...types.cast.registry import convert, register_converter
30
33
  from ...types.file_format import ExcelFileFormat
31
34
 
32
35
  if databricks is not None:
33
- from databricks.sdk.service.catalog import VolumeType, VolumeInfo
36
+ from databricks.sdk.errors import InternalError
37
+ from databricks.sdk.service.catalog import VolumeType, VolumeInfo, PathOperation
34
38
  from databricks.sdk.service.workspace import ObjectType
35
39
  from databricks.sdk.errors.platform import (
36
40
  NotFound,
@@ -176,6 +180,8 @@ class DatabricksPath:
176
180
  if not obj:
177
181
  return cls.empty_instance(workspace=workspace)
178
182
 
183
+ if isinstance(obj, str):
184
+ obj = [obj]
179
185
  if not isinstance(obj, (str, list)):
180
186
  if isinstance(obj, DatabricksPath):
181
187
  if workspace is not None and obj._workspace is None:
@@ -191,6 +197,7 @@ class DatabricksPath:
191
197
  obj = str(obj)
192
198
 
193
199
 
200
+
194
201
  obj = _flatten_parts(obj)
195
202
 
196
203
  if obj and not obj[0]:
@@ -246,16 +253,23 @@ class DatabricksPath:
246
253
  if self._workspace is not None:
247
254
  self._workspace.__exit__(exc_type, exc_val, exc_tb)
248
255
 
256
+ self.close(wait=False)
257
+
249
258
  def __str__(self):
250
259
  return self.full_path()
251
260
 
252
261
  def __repr__(self):
253
262
  return self.url()
254
263
 
264
+ def __del__(self):
265
+ self.close(wait=False)
266
+
255
267
  def __fspath__(self):
256
268
  return self.full_path()
257
269
 
258
270
  def url(self):
271
+ if self._workspace is not None:
272
+ return self._workspace.safe_host + self.full_path()
259
273
  return "dbfs://%s" % self.full_path()
260
274
 
261
275
  def full_path(self) -> str:
@@ -282,7 +296,7 @@ class DatabricksPath:
282
296
  Returns:
283
297
  A PyArrow FileSystem instance.
284
298
  """
285
- return self.workspace.filesytem(workspace=workspace)
299
+ return self.workspace.filesystem(workspace=workspace)
286
300
 
287
301
  @property
288
302
  def parent(self):
@@ -496,9 +510,15 @@ class DatabricksPath:
496
510
 
497
511
  return self
498
512
 
499
- def close(self):
513
+ def close(self, wait: bool = True):
500
514
  if self.temporary:
501
- self.remove(recursive=True)
515
+ if wait:
516
+ self.remove(recursive=True)
517
+ else:
518
+ Thread(
519
+ target=self.remove,
520
+ kwargs={"recursive": True}
521
+ ).start()
502
522
 
503
523
  def storage_location(self) -> str:
504
524
  info = self.volume_info()
@@ -586,7 +606,7 @@ class DatabricksPath:
586
606
  mtime = float(info.modified_at) / 1000.0 if info.modified_at is not None else None
587
607
 
588
608
  return self.reset_metadata(is_file=is_file, is_dir=is_dir, size=size, mtime=mtime)
589
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
609
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
590
610
  pass
591
611
 
592
612
  found = next(self.ls(fetch_size=1, recursive=False, allow_not_found=True), None)
@@ -730,7 +750,7 @@ class DatabricksPath:
730
750
  properties=default_tags,
731
751
  comment="Catalog auto generated by yggdrasil"
732
752
  )
733
- except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
753
+ except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest, InternalError):
734
754
  if not exist_ok:
735
755
  raise
736
756
 
@@ -742,7 +762,7 @@ class DatabricksPath:
742
762
  properties=default_tags,
743
763
  comment="Schema auto generated by yggdrasil"
744
764
  )
745
- except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
765
+ except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest, InternalError):
746
766
  if not exist_ok:
747
767
  raise
748
768
 
@@ -808,37 +828,54 @@ class DatabricksPath:
808
828
 
809
829
  def remove(
810
830
  self,
811
- recursive: bool = True
831
+ recursive: bool = True,
832
+ allow_not_found: bool = True
812
833
  ):
813
834
  """Remove the path as a file or directory.
814
835
 
815
836
  Args:
816
837
  recursive: Whether to delete directories recursively.
838
+ allow_not_found: Allow not found path
817
839
 
818
840
  Returns:
819
841
  The DatabricksPath instance.
820
842
  """
821
843
  if self.kind == DatabricksPathKind.VOLUME:
822
- return self._remove_volume_obj(recursive=recursive)
844
+ return self._remove_volume_obj(recursive=recursive, allow_not_found=allow_not_found)
823
845
  elif self.kind == DatabricksPathKind.WORKSPACE:
824
- return self._remove_workspace_obj(recursive=recursive)
846
+ return self._remove_workspace_obj(recursive=recursive, allow_not_found=allow_not_found)
825
847
  elif self.kind == DatabricksPathKind.DBFS:
826
- return self._remove_dbfs_obj(recursive=recursive)
848
+ return self._remove_dbfs_obj(recursive=recursive, allow_not_found=allow_not_found)
827
849
 
828
- def _remove_volume_obj(self, recursive: bool = True):
850
+ def _remove_volume_obj(
851
+ self,
852
+ recursive: bool = True,
853
+ allow_not_found: bool = True
854
+ ):
829
855
  if self.is_file():
830
- return self._remove_volume_file()
831
- return self._remove_volume_dir(recursive=recursive)
856
+ return self._remove_volume_file(allow_not_found=allow_not_found)
857
+ elif self.is_dir():
858
+ return self._remove_volume_dir(recursive=recursive, allow_not_found=allow_not_found)
832
859
 
833
- def _remove_workspace_obj(self, recursive: bool = True):
860
+ def _remove_workspace_obj(
861
+ self,
862
+ recursive: bool = True,
863
+ allow_not_found: bool = True
864
+ ):
834
865
  if self.is_file():
835
- return self._remove_workspace_file()
836
- return self._remove_workspace_dir(recursive=recursive)
866
+ return self._remove_workspace_file(allow_not_found=allow_not_found)
867
+ elif self.is_dir():
868
+ return self._remove_workspace_dir(recursive=recursive, allow_not_found=allow_not_found)
837
869
 
838
- def _remove_dbfs_obj(self, recursive: bool = True):
870
+ def _remove_dbfs_obj(
871
+ self,
872
+ recursive: bool = True,
873
+ allow_not_found: bool = True
874
+ ):
839
875
  if self.is_file():
840
- return self._remove_dbfs_file()
841
- return self._remove_dbfs_dir(recursive=recursive)
876
+ return self._remove_dbfs_file(allow_not_found=allow_not_found)
877
+ elif self.is_dir():
878
+ return self._remove_dbfs_dir(recursive=recursive, allow_not_found=allow_not_found)
842
879
 
843
880
  def rmfile(self, allow_not_found: bool = True):
844
881
  """Remove the path as a file.
@@ -859,7 +896,7 @@ class DatabricksPath:
859
896
  sdk = self.workspace.sdk()
860
897
  try:
861
898
  sdk.files.delete(self.files_full_path())
862
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
899
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
863
900
  if not allow_not_found:
864
901
  raise
865
902
  finally:
@@ -871,7 +908,7 @@ class DatabricksPath:
871
908
  sdk = self.workspace.sdk()
872
909
  try:
873
910
  sdk.workspace.delete(self.workspace_full_path(), recursive=True)
874
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
911
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
875
912
  if not allow_not_found:
876
913
  raise
877
914
  finally:
@@ -883,7 +920,7 @@ class DatabricksPath:
883
920
  sdk = self.workspace.sdk()
884
921
  try:
885
922
  sdk.dbfs.delete(self.dbfs_full_path(), recursive=True)
886
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
923
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
887
924
  if not allow_not_found:
888
925
  raise
889
926
  finally:
@@ -940,7 +977,7 @@ class DatabricksPath:
940
977
 
941
978
  if not with_root:
942
979
  sdk.workspace.mkdirs(full_path)
943
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
980
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
944
981
  if not allow_not_found:
945
982
  raise
946
983
  finally:
@@ -962,7 +999,7 @@ class DatabricksPath:
962
999
 
963
1000
  if not with_root:
964
1001
  sdk.dbfs.mkdirs(full_path)
965
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1002
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
966
1003
  if not allow_not_found:
967
1004
  raise
968
1005
  finally:
@@ -983,7 +1020,7 @@ class DatabricksPath:
983
1020
  if rel:
984
1021
  try:
985
1022
  sdk.files.delete_directory(full_path)
986
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied) as e:
1023
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError) as e:
987
1024
  message = str(e)
988
1025
 
989
1026
  if recursive and "directory is not empty" in message:
@@ -998,13 +1035,13 @@ class DatabricksPath:
998
1035
  elif volume_name:
999
1036
  try:
1000
1037
  sdk.volumes.delete(f"{catalog_name}.{schema_name}.{volume_name}")
1001
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1038
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1002
1039
  if not allow_not_found:
1003
1040
  raise
1004
1041
  elif schema_name:
1005
1042
  try:
1006
1043
  sdk.schemas.delete(f"{catalog_name}.{schema_name}", force=True)
1007
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1044
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1008
1045
  if not allow_not_found:
1009
1046
  raise
1010
1047
 
@@ -1064,7 +1101,7 @@ class DatabricksPath:
1064
1101
  yield from base._ls_volume(recursive=recursive)
1065
1102
  else:
1066
1103
  yield base
1067
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1104
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1068
1105
  if not allow_not_found:
1069
1106
  raise
1070
1107
  elif schema_name is None:
@@ -1082,7 +1119,7 @@ class DatabricksPath:
1082
1119
  yield from base._ls_volume(recursive=recursive)
1083
1120
  else:
1084
1121
  yield base
1085
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1122
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1086
1123
  if not allow_not_found:
1087
1124
  raise
1088
1125
  else:
@@ -1100,7 +1137,7 @@ class DatabricksPath:
1100
1137
  yield from base._ls_volume(recursive=recursive)
1101
1138
  else:
1102
1139
  yield base
1103
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1140
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1104
1141
  if not allow_not_found:
1105
1142
  raise
1106
1143
  else:
@@ -1121,7 +1158,7 @@ class DatabricksPath:
1121
1158
  yield from base._ls_volume(recursive=recursive)
1122
1159
  else:
1123
1160
  yield base
1124
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1161
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1125
1162
  if not allow_not_found:
1126
1163
  raise
1127
1164
 
@@ -1140,7 +1177,7 @@ class DatabricksPath:
1140
1177
  _is_dir=is_dir,
1141
1178
  _size=info.size,
1142
1179
  )
1143
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1180
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1144
1181
  if not allow_not_found:
1145
1182
  raise
1146
1183
 
@@ -1158,7 +1195,7 @@ class DatabricksPath:
1158
1195
  _is_dir=info.is_dir,
1159
1196
  _size=info.file_size,
1160
1197
  )
1161
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
1198
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, InternalError):
1162
1199
  if not allow_not_found:
1163
1200
  raise
1164
1201
 
@@ -1225,19 +1262,10 @@ class DatabricksPath:
1225
1262
  else:
1226
1263
  raise FileNotFoundError(f"Path {self} does not exist, or dest is not same file or folder type")
1227
1264
 
1228
- def write_bytes(self, data: bytes):
1229
- if hasattr(data, "read"):
1230
- data = data.read()
1231
-
1232
- with self.open("wb") as f:
1233
- f.write_all_bytes(data=data)
1234
-
1235
1265
  def temporary_credentials(
1236
1266
  self,
1237
1267
  operation: Optional["PathOperation"] = None
1238
1268
  ):
1239
- from databricks.sdk.service.catalog import PathOperation
1240
-
1241
1269
  if self.kind != DatabricksPathKind.VOLUME:
1242
1270
  raise ValueError(f"Cannot generate temporary credentials for {repr(self)}")
1243
1271
 
@@ -1250,6 +1278,14 @@ class DatabricksPath:
1250
1278
  operation=operation or PathOperation.PATH_READ,
1251
1279
  )
1252
1280
 
1281
+ def read_bytes(self, use_cache: bool = False):
1282
+ with self.open("rb") as f:
1283
+ return f.read_all_bytes(use_cache=use_cache)
1284
+
1285
+ def write_bytes(self, data: Union[bytes, IO[bytes]]):
1286
+ with self.open("wb") as f:
1287
+ f.write_all_bytes(data=data)
1288
+
1253
1289
  # -------------------------
1254
1290
  # Data ops (Arrow / Pandas / Polars)
1255
1291
  # -------------------------
@@ -1358,7 +1394,6 @@ class DatabricksPath:
1358
1394
  table: pa.Table,
1359
1395
  file_format: Optional[FileFormat] = None,
1360
1396
  batch_size: Optional[int] = None,
1361
- **kwargs
1362
1397
  ):
1363
1398
  """Write an Arrow table to the path, sharding if needed.
1364
1399
 
@@ -1366,7 +1401,6 @@ class DatabricksPath:
1366
1401
  table: Arrow table to write.
1367
1402
  file_format: Optional file format override.
1368
1403
  batch_size: Optional batch size for writes.
1369
- **kwargs: Format-specific options.
1370
1404
 
1371
1405
  Returns:
1372
1406
  The DatabricksPath instance.
@@ -1379,7 +1413,11 @@ class DatabricksPath:
1379
1413
  part_path = connected / f"{seed}-{i:05d}-{_rand_str(4)}.parquet"
1380
1414
 
1381
1415
  with part_path.open(mode="wb") as f:
1382
- f.write_arrow_batch(batch, file_format=file_format)
1416
+ f.write_arrow_batch(
1417
+ batch,
1418
+ file_format=file_format,
1419
+ batch_size=batch_size,
1420
+ )
1383
1421
 
1384
1422
  return connected
1385
1423
 
@@ -1389,7 +1427,6 @@ class DatabricksPath:
1389
1427
  table,
1390
1428
  file_format=file_format,
1391
1429
  batch_size=batch_size,
1392
- **kwargs
1393
1430
  )
1394
1431
 
1395
1432
  return self
@@ -1399,7 +1436,6 @@ class DatabricksPath:
1399
1436
  file_format: Optional[FileFormat] = None,
1400
1437
  batch_size: Optional[int] = None,
1401
1438
  concat: bool = True,
1402
- **kwargs
1403
1439
  ):
1404
1440
  """Read the path into a pandas DataFrame.
1405
1441
 
@@ -1407,7 +1443,6 @@ class DatabricksPath:
1407
1443
  file_format: Optional file format override.
1408
1444
  batch_size: Optional batch size for reads.
1409
1445
  concat: Whether to concatenate results for directories.
1410
- **kwargs: Format-specific options.
1411
1446
 
1412
1447
  Returns:
1413
1448
  A pandas DataFrame or list of DataFrames if concat=False.
@@ -1417,14 +1452,12 @@ class DatabricksPath:
1417
1452
  file_format=file_format,
1418
1453
  batch_size=batch_size,
1419
1454
  concat=True,
1420
- **kwargs
1421
1455
  ).to_pandas()
1422
1456
 
1423
1457
  tables = self.read_arrow_table(
1424
1458
  batch_size=batch_size,
1425
1459
  file_format=file_format,
1426
1460
  concat=False,
1427
- **kwargs
1428
1461
  )
1429
1462
 
1430
1463
  return [t.to_pandas() for t in tables] # type: ignore[arg-type]
@@ -1434,7 +1467,6 @@ class DatabricksPath:
1434
1467
  df: PandasDataFrame,
1435
1468
  file_format: Optional[FileFormat] = None,
1436
1469
  batch_size: Optional[int] = None,
1437
- **kwargs
1438
1470
  ):
1439
1471
  """Write a pandas DataFrame to the path.
1440
1472
 
@@ -1442,7 +1474,6 @@ class DatabricksPath:
1442
1474
  df: pandas DataFrame to write.
1443
1475
  file_format: Optional file format override.
1444
1476
  batch_size: Optional batch size for writes.
1445
- **kwargs: Format-specific options.
1446
1477
 
1447
1478
  Returns:
1448
1479
  The DatabricksPath instance.
@@ -1463,7 +1494,6 @@ class DatabricksPath:
1463
1494
  batch,
1464
1495
  file_format=file_format,
1465
1496
  batch_size=batch_size,
1466
- **kwargs
1467
1497
  )
1468
1498
  else:
1469
1499
  with connected.open(mode="wb", clone=False) as f:
@@ -1471,7 +1501,6 @@ class DatabricksPath:
1471
1501
  df,
1472
1502
  file_format=file_format,
1473
1503
  batch_size=batch_size,
1474
- **kwargs
1475
1504
  )
1476
1505
 
1477
1506
  return self
@@ -1523,7 +1552,6 @@ class DatabricksPath:
1523
1552
  df,
1524
1553
  file_format: Optional[FileFormat] = None,
1525
1554
  batch_size: Optional[int] = None,
1526
- **kwargs
1527
1555
  ):
1528
1556
  """
1529
1557
  Write Polars to a DatabricksPath.
@@ -1538,7 +1566,6 @@ class DatabricksPath:
1538
1566
  df: polars DataFrame or LazyFrame to write.
1539
1567
  file_format: Optional file format override.
1540
1568
  batch_size: Optional rows per part for directory sinks.
1541
- **kwargs: Format-specific options.
1542
1569
 
1543
1570
  Returns:
1544
1571
  The DatabricksPath instance.
@@ -1552,7 +1579,7 @@ class DatabricksPath:
1552
1579
  with self.connect() as connected:
1553
1580
  if connected.is_dir_sink():
1554
1581
  seed = int(time.time() * 1000)
1555
- rows_per_part = batch_size or 1_000_000
1582
+ rows_per_part = batch_size or 1024 * 1024
1556
1583
 
1557
1584
  # Always parquet for directory sinks (lake layout standard)
1558
1585
  for i, chunk in enumerate(df.iter_slices(n_rows=rows_per_part)):
@@ -1563,7 +1590,6 @@ class DatabricksPath:
1563
1590
  df,
1564
1591
  file_format=file_format,
1565
1592
  batch_size=batch_size,
1566
- **kwargs
1567
1593
  )
1568
1594
  else:
1569
1595
  with connected.open(mode="wb", clone=False) as f:
@@ -1571,11 +1597,33 @@ class DatabricksPath:
1571
1597
  df,
1572
1598
  file_format=file_format,
1573
1599
  batch_size=batch_size,
1574
- **kwargs
1575
1600
  )
1576
1601
 
1577
1602
  return self
1578
1603
 
1604
+ def read_pickle(
1605
+ self,
1606
+ ) -> Any:
1607
+ content = self.read_bytes()
1608
+ obj = dill.loads(content)
1609
+
1610
+ return obj
1611
+
1612
+ def write_pickle(
1613
+ self,
1614
+ obj: Any,
1615
+ file_format: Optional[FileFormat] = None,
1616
+ ):
1617
+ buffer = io.BytesIO()
1618
+
1619
+ if isinstance(obj, PandasDataFrame):
1620
+ obj.to_pickle(buffer)
1621
+ else:
1622
+ buffer.write(dill.dumps(obj))
1623
+
1624
+ self.write_bytes(data=buffer.getvalue())
1625
+
1626
+
1579
1627
  def sql(
1580
1628
  self,
1581
1629
  query: str,
@@ -1637,19 +1685,7 @@ class DatabricksPath:
1637
1685
  "Invalid engine %s, must be in duckdb, polars" % engine
1638
1686
  )
1639
1687
 
1640
-
1641
1688
  if databricks is not None:
1642
- @register_converter(DatabricksPath, pa.Table)
1643
- def databricks_path_to_arrow_table(
1644
- data: DatabricksPath,
1645
- options: Optional[CastOptions] = None,
1646
- ) -> pa.Table:
1647
- return cast_arrow_tabular(
1648
- data.read_arrow_table(),
1649
- options
1650
- )
1651
-
1652
-
1653
1689
  @register_converter(DatabricksPath, ds.Dataset)
1654
1690
  def databricks_path_to_arrow_table(
1655
1691
  data: DatabricksPath,
@@ -1658,6 +1694,16 @@ if databricks is not None:
1658
1694
  return data.arrow_dataset()
1659
1695
 
1660
1696
 
1697
+ @pandas_converter(DatabricksPath, PandasDataFrame)
1698
+ def databricks_path_to_pandas(
1699
+ data: DatabricksPath,
1700
+ options: Optional[CastOptions] = None,
1701
+ ) -> PolarsDataFrame:
1702
+ return cast_pandas_dataframe(
1703
+ data.read_pandas(),
1704
+ options
1705
+ )
1706
+
1661
1707
  @polars_converter(DatabricksPath, PolarsDataFrame)
1662
1708
  def databricks_path_to_polars(
1663
1709
  data: DatabricksPath,