ygg 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/METADATA +1 -1
  2. ygg-0.1.33.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +244 -3
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +24 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +29 -4
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +147 -0
  15. yggdrasil/databricks/sql/types.py +33 -1
  16. yggdrasil/databricks/workspaces/__init__.py +2 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +183 -0
  18. yggdrasil/databricks/workspaces/io.py +387 -9
  19. yggdrasil/databricks/workspaces/path.py +297 -2
  20. yggdrasil/databricks/workspaces/path_kind.py +3 -0
  21. yggdrasil/databricks/workspaces/workspace.py +202 -5
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +123 -1
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.31.dist-info/RECORD +0 -59
  58. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/WHEEL +0 -0
  59. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/entry_points.txt +0 -0
  60. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/licenses/LICENSE +0 -0
  61. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """Databricks path abstraction spanning DBFS, workspace, and volumes."""
2
+
1
3
  # src/yggdrasil/databricks/workspaces/databricks_path.py
2
4
  from __future__ import annotations
3
5
 
@@ -49,6 +51,14 @@ __all__ = [
49
51
 
50
52
 
51
53
  def _flatten_parts(parts: Union[list[str], str]) -> list[str]:
54
+ """Normalize path parts by splitting on '/' and removing empties.
55
+
56
+ Args:
57
+ parts: String or list of path parts.
58
+
59
+ Returns:
60
+ A flattened list of path components.
61
+ """
52
62
  if isinstance(parts, str):
53
63
  parts = [parts]
54
64
 
@@ -64,12 +74,21 @@ def _flatten_parts(parts: Union[list[str], str]) -> list[str]:
64
74
 
65
75
 
66
76
  def _rand_str(n: int) -> str:
77
+ """Return a random alphanumeric string of length ``n``.
78
+
79
+ Args:
80
+ n: Length of the random string.
81
+
82
+ Returns:
83
+ Random alphanumeric string.
84
+ """
67
85
  alphabet = string.ascii_letters + string.digits
68
86
  return "".join(random.choices(alphabet, k=n))
69
87
 
70
88
 
71
89
  @dataclasses.dataclass
72
90
  class DatabricksPath:
91
+ """Path wrapper for Databricks workspace, volumes, and DBFS objects."""
73
92
  kind: DatabricksPathKind
74
93
  parts: List[str]
75
94
 
@@ -113,6 +132,15 @@ class DatabricksPath:
113
132
  obj: Union["DatabricksPath", str, List[str]],
114
133
  workspace: Optional["Workspace"] = None,
115
134
  ) -> "DatabricksPath":
135
+ """Parse input into a DatabricksPath instance.
136
+
137
+ Args:
138
+ obj: Input path, DatabricksPath, or path parts list.
139
+ workspace: Optional Workspace to bind to the path.
140
+
141
+ Returns:
142
+ A DatabricksPath instance.
143
+ """
116
144
  if not obj:
117
145
  return DatabricksPath(kind=DatabricksPathKind.DBFS, parts=[], _workspace=workspace)
118
146
 
@@ -194,6 +222,11 @@ class DatabricksPath:
194
222
  return "dbfs://%s" % self.full_path()
195
223
 
196
224
  def full_path(self) -> str:
225
+ """Return the fully qualified path for this namespace.
226
+
227
+ Returns:
228
+ The fully qualified path string.
229
+ """
197
230
  if self.kind == DatabricksPathKind.DBFS:
198
231
  return self.dbfs_full_path()
199
232
  elif self.kind == DatabricksPathKind.WORKSPACE:
@@ -204,10 +237,23 @@ class DatabricksPath:
204
237
  raise ValueError(f"Unknown DatabricksPath kind: {self.kind!r}")
205
238
 
206
239
  def filesystem(self, workspace: Optional["Workspace"] = None):
240
+ """Return a PyArrow filesystem adapter for this workspace.
241
+
242
+ Args:
243
+ workspace: Optional workspace override.
244
+
245
+ Returns:
246
+ A PyArrow FileSystem instance.
247
+ """
207
248
  return self.workspace.filesytem(workspace=workspace)
208
249
 
209
250
  @property
210
251
  def parent(self):
252
+ """Return the parent path.
253
+
254
+ Returns:
255
+ A DatabricksPath representing the parent.
256
+ """
211
257
  if not self.parts:
212
258
  return self
213
259
 
@@ -226,6 +272,11 @@ class DatabricksPath:
226
272
 
227
273
  @property
228
274
  def workspace(self):
275
+ """Return the associated Workspace instance.
276
+
277
+ Returns:
278
+ The Workspace associated with this path.
279
+ """
229
280
  if self._workspace is None:
230
281
  from .workspace import Workspace
231
282
 
@@ -238,6 +289,11 @@ class DatabricksPath:
238
289
 
239
290
  @property
240
291
  def name(self) -> str:
292
+ """Return the final path component.
293
+
294
+ Returns:
295
+ The final path name component.
296
+ """
241
297
  if not self.parts:
242
298
  return ""
243
299
 
@@ -248,6 +304,11 @@ class DatabricksPath:
248
304
 
249
305
  @property
250
306
  def extension(self) -> str:
307
+ """Return the file extension for the path, if any.
308
+
309
+ Returns:
310
+ The file extension without leading dot.
311
+ """
251
312
  name = self.name
252
313
  if "." in name:
253
314
  return name.split(".")[-1]
@@ -255,6 +316,11 @@ class DatabricksPath:
255
316
 
256
317
  @property
257
318
  def file_format(self) -> FileFormat:
319
+ """Infer the file format from the file extension.
320
+
321
+ Returns:
322
+ A PyArrow FileFormat instance.
323
+ """
258
324
  ext = self.extension
259
325
 
260
326
  if ext == "parquet":
@@ -270,6 +336,11 @@ class DatabricksPath:
270
336
 
271
337
  @property
272
338
  def content_length(self):
339
+ """Return the size of the path in bytes if known.
340
+
341
+ Returns:
342
+ The size in bytes.
343
+ """
273
344
  if self._size is None:
274
345
  self.refresh_status()
275
346
  return self._size
@@ -280,6 +351,11 @@ class DatabricksPath:
280
351
 
281
352
  @property
282
353
  def mtime(self) -> Optional[float]:
354
+ """Return the last-modified time for the path.
355
+
356
+ Returns:
357
+ Last-modified timestamp in seconds.
358
+ """
283
359
  if self._mtime is None:
284
360
  self.refresh_status()
285
361
  return self._mtime
@@ -314,16 +390,31 @@ class DatabricksPath:
314
390
  )
315
391
 
316
392
  def is_file(self):
393
+ """Return True when the path is a file.
394
+
395
+ Returns:
396
+ True if the path is a file.
397
+ """
317
398
  if self._is_file is None:
318
399
  self.refresh_status()
319
400
  return self._is_file
320
401
 
321
402
  def is_dir(self):
403
+ """Return True when the path is a directory.
404
+
405
+ Returns:
406
+ True if the path is a directory.
407
+ """
322
408
  if self._is_dir is None:
323
409
  self.refresh_status()
324
410
  return self._is_dir
325
411
 
326
412
  def is_dir_sink(self):
413
+ """Return True if the path represents a directory sink.
414
+
415
+ Returns:
416
+ True if the path represents a directory sink.
417
+ """
327
418
  return self.is_dir() or (self.parts and self.parts[-1] == "")
328
419
 
329
420
  @property
@@ -331,6 +422,14 @@ class DatabricksPath:
331
422
  return self._workspace is not None and self._workspace.connected
332
423
 
333
424
  def connect(self, clone: bool = False) -> "DatabricksPath":
425
+ """Connect the path to its workspace, optionally returning a clone.
426
+
427
+ Args:
428
+ clone: Whether to return a cloned instance.
429
+
430
+ Returns:
431
+ The connected DatabricksPath.
432
+ """
334
433
  workspace = self.workspace.connect(clone=clone)
335
434
 
336
435
  if clone:
@@ -346,6 +445,11 @@ class DatabricksPath:
346
445
  pass
347
446
 
348
447
  def volume_parts(self) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[PurePosixPath]]:
448
+ """Return (catalog, schema, volume, rel_path) for volume paths.
449
+
450
+ Returns:
451
+ Tuple of (catalog, schema, volume, rel_path).
452
+ """
349
453
  if self.kind != DatabricksPathKind.VOLUME:
350
454
  return None, None, None, None
351
455
 
@@ -358,6 +462,11 @@ class DatabricksPath:
358
462
  return catalog, schema, volume, self.parts[3:] # type: ignore[return-value]
359
463
 
360
464
  def refresh_status(self) -> "DatabricksPath":
465
+ """Refresh cached metadata for the path.
466
+
467
+ Returns:
468
+ The DatabricksPath instance.
469
+ """
361
470
  if self.kind == DatabricksPathKind.VOLUME:
362
471
  self._refresh_volume_status()
363
472
  elif self.kind == DatabricksPathKind.WORKSPACE:
@@ -445,6 +554,17 @@ class DatabricksPath:
445
554
  size: Optional[int] = None,
446
555
  mtime: Optional[float] = None,
447
556
  ):
557
+ """Update cached metadata fields.
558
+
559
+ Args:
560
+ is_file: Optional file flag.
561
+ is_dir: Optional directory flag.
562
+ size: Optional size in bytes.
563
+ mtime: Optional modification time in seconds.
564
+
565
+ Returns:
566
+ The DatabricksPath instance.
567
+ """
448
568
  self._is_file = is_file
449
569
  self._is_dir = is_dir
450
570
  self._size = size
@@ -455,6 +575,11 @@ class DatabricksPath:
455
575
  # ---- API path normalization helpers ----
456
576
 
457
577
  def workspace_full_path(self) -> str:
578
+ """Return the full workspace path string.
579
+
580
+ Returns:
581
+ Workspace path string.
582
+ """
458
583
  if not self.parts:
459
584
  return "/Workspace"
460
585
 
@@ -463,6 +588,11 @@ class DatabricksPath:
463
588
  return "/Workspace/%s" % "/".join(parts)
464
589
 
465
590
  def dbfs_full_path(self) -> str:
591
+ """Return the full DBFS path string.
592
+
593
+ Returns:
594
+ DBFS path string.
595
+ """
466
596
  if not self.parts:
467
597
  return "/dbfs"
468
598
 
@@ -471,6 +601,11 @@ class DatabricksPath:
471
601
  return "/dbfs/%s" % "/".join(parts)
472
602
 
473
603
  def files_full_path(self) -> str:
604
+ """Return the full files (volume) path string.
605
+
606
+ Returns:
607
+ Volume path string.
608
+ """
474
609
  if not self.parts:
475
610
  return "/Volumes"
476
611
 
@@ -479,9 +614,27 @@ class DatabricksPath:
479
614
  return "/Volumes/%s" % "/".join(parts)
480
615
 
481
616
  def exists(self, *, follow_symlinks=True) -> bool:
617
+ """Return True if the path exists.
618
+
619
+ Args:
620
+ follow_symlinks: Unused; for compatibility.
621
+
622
+ Returns:
623
+ True if the path exists.
624
+ """
482
625
  return bool(self.is_file() or self.is_dir())
483
626
 
484
627
  def mkdir(self, mode=None, parents=True, exist_ok=True):
628
+ """Create a directory for the path.
629
+
630
+ Args:
631
+ mode: Optional mode (unused).
632
+ parents: Whether to create parent directories.
633
+ exist_ok: Whether to ignore existing directories.
634
+
635
+ Returns:
636
+ The DatabricksPath instance.
637
+ """
485
638
  try:
486
639
  if self.kind == DatabricksPathKind.WORKSPACE:
487
640
  self.make_workspace_dir(parents=parents, exist_ok=exist_ok)
@@ -577,6 +730,14 @@ class DatabricksPath:
577
730
  return self.reset_metadata(is_file=False, is_dir=True, size=0, mtime=time.time())
578
731
 
579
732
  def remove(self, recursive: bool = True):
733
+ """Remove the path as a file or directory.
734
+
735
+ Args:
736
+ recursive: Whether to delete directories recursively.
737
+
738
+ Returns:
739
+ The DatabricksPath instance.
740
+ """
580
741
  if self.kind == DatabricksPathKind.VOLUME:
581
742
  return self._remove_volume_obj(recursive=recursive)
582
743
  elif self.kind == DatabricksPathKind.WORKSPACE:
@@ -600,6 +761,11 @@ class DatabricksPath:
600
761
  return self._remove_dbfs_dir(recursive=recursive)
601
762
 
602
763
  def rmfile(self):
764
+ """Remove the path as a file.
765
+
766
+ Returns:
767
+ The DatabricksPath instance.
768
+ """
603
769
  try:
604
770
  if self.kind == DatabricksPathKind.VOLUME:
605
771
  return self._remove_volume_file()
@@ -636,6 +802,14 @@ class DatabricksPath:
636
802
  return self
637
803
 
638
804
  def rmdir(self, recursive: bool = True):
805
+ """Remove the path as a directory.
806
+
807
+ Args:
808
+ recursive: Whether to delete directories recursively.
809
+
810
+ Returns:
811
+ The DatabricksPath instance.
812
+ """
639
813
  if self.kind == DatabricksPathKind.VOLUME:
640
814
  return self._remove_volume_dir(recursive=recursive)
641
815
  elif self.kind == DatabricksPathKind.WORKSPACE:
@@ -691,6 +865,16 @@ class DatabricksPath:
691
865
  return self.reset_metadata()
692
866
 
693
867
  def ls(self, recursive: bool = False, fetch_size: int = None, allow_not_found: bool = True):
868
+ """List directory contents for the path.
869
+
870
+ Args:
871
+ recursive: Whether to recurse into subdirectories.
872
+ fetch_size: Optional page size for listings.
873
+ allow_not_found: Whether to suppress missing-path errors.
874
+
875
+ Yields:
876
+ DatabricksPath entries.
877
+ """
694
878
  if self.kind == DatabricksPathKind.VOLUME:
695
879
  yield from self._ls_volume(recursive=recursive, fetch_size=fetch_size, allow_not_found=allow_not_found)
696
880
  elif self.kind == DatabricksPathKind.WORKSPACE:
@@ -822,6 +1006,16 @@ class DatabricksPath:
822
1006
  encoding=None,
823
1007
  clone: bool = False,
824
1008
  ) -> DatabricksIO:
1009
+ """Open the path as a DatabricksIO instance.
1010
+
1011
+ Args:
1012
+ mode: File mode string.
1013
+ encoding: Optional text encoding.
1014
+ clone: Whether to return a cloned path instance.
1015
+
1016
+ Returns:
1017
+ A DatabricksIO instance.
1018
+ """
825
1019
  path = self.connect(clone=clone)
826
1020
 
827
1021
  return (
@@ -835,6 +1029,15 @@ class DatabricksPath:
835
1029
  dest: Union["DatabricksIO", "DatabricksPath", str],
836
1030
  allow_not_found: bool = True,
837
1031
  ) -> None:
1032
+ """Copy this path to another path or IO destination.
1033
+
1034
+ Args:
1035
+ dest: Destination IO, DatabricksPath, or path string.
1036
+ allow_not_found: Whether to suppress missing-path errors.
1037
+
1038
+ Returns:
1039
+ None.
1040
+ """
838
1041
  if self.is_file() and dest.is_file():
839
1042
  with self.open(mode="rb") as src:
840
1043
  src.copy_to(dest=dest)
@@ -869,6 +1072,16 @@ class DatabricksPath:
869
1072
  filesystem: Optional[FileSystem] = None,
870
1073
  **kwargs
871
1074
  ):
1075
+ """Return a PyArrow dataset referencing this path.
1076
+
1077
+ Args:
1078
+ workspace: Optional workspace override.
1079
+ filesystem: Optional filesystem override.
1080
+ **kwargs: Dataset options.
1081
+
1082
+ Returns:
1083
+ A PyArrow Dataset instance.
1084
+ """
872
1085
  filesystem = self.filesystem(workspace=workspace) if filesystem is None else filesystem
873
1086
 
874
1087
  return ds.dataset(
@@ -883,6 +1096,16 @@ class DatabricksPath:
883
1096
  concat: bool = True,
884
1097
  **kwargs
885
1098
  ) -> pa.Table:
1099
+ """Read the path into an Arrow table.
1100
+
1101
+ Args:
1102
+ batch_size: Optional batch size for reads.
1103
+ concat: Whether to concatenate tables for directories.
1104
+ **kwargs: Format-specific options.
1105
+
1106
+ Returns:
1107
+ An Arrow Table (or list of tables if concat=False).
1108
+ """
886
1109
  if self.is_file():
887
1110
  with self.open("rb") as f:
888
1111
  return f.read_arrow_table(batch_size=batch_size, **kwargs)
@@ -923,6 +1146,16 @@ class DatabricksPath:
923
1146
  batch_size: Optional[int] = None,
924
1147
  **kwargs
925
1148
  ):
1149
+ """Write Arrow data to the path.
1150
+
1151
+ Args:
1152
+ table: Arrow table or record batch to write.
1153
+ batch_size: Optional batch size for writes.
1154
+ **kwargs: Format-specific options.
1155
+
1156
+ Returns:
1157
+ The DatabricksPath instance.
1158
+ """
926
1159
  if not isinstance(table, pa.Table):
927
1160
  table = convert(table, pa.Table)
928
1161
 
@@ -935,9 +1168,21 @@ class DatabricksPath:
935
1168
  def write_arrow_table(
936
1169
  self,
937
1170
  table: pa.Table,
1171
+ file_format: Optional[FileFormat] = None,
938
1172
  batch_size: Optional[int] = None,
939
1173
  **kwargs
940
1174
  ):
1175
+ """Write an Arrow table to the path, sharding if needed.
1176
+
1177
+ Args:
1178
+ table: Arrow table to write.
1179
+ file_format: Optional file format override.
1180
+ batch_size: Optional batch size for writes.
1181
+ **kwargs: Format-specific options.
1182
+
1183
+ Returns:
1184
+ The DatabricksPath instance.
1185
+ """
941
1186
  with self.connect(clone=False) as connected:
942
1187
  if connected.is_dir_sink():
943
1188
  seed = int(time.time() * 1000)
@@ -946,12 +1191,13 @@ class DatabricksPath:
946
1191
  part_path = connected / f"{seed}-{i:05d}-{_rand_str(4)}.parquet"
947
1192
 
948
1193
  with part_path.open(mode="wb") as f:
949
- f.write_arrow_batch(batch)
1194
+ f.write_arrow_batch(batch, file_format=file_format)
950
1195
 
951
1196
  return connected
952
1197
 
953
1198
  connected.open(mode="wb", clone=False).write_arrow_table(
954
1199
  table,
1200
+ file_format=file_format,
955
1201
  batch_size=batch_size,
956
1202
  **kwargs
957
1203
  )
@@ -960,10 +1206,20 @@ class DatabricksPath:
960
1206
 
961
1207
  def read_pandas(
962
1208
  self,
963
- batch_size: int = 0,
1209
+ batch_size: Optional[int] = None,
964
1210
  concat: bool = True,
965
1211
  **kwargs
966
1212
  ):
1213
+ """Read the path into a pandas DataFrame.
1214
+
1215
+ Args:
1216
+ batch_size: Optional batch size for reads.
1217
+ concat: Whether to concatenate results for directories.
1218
+ **kwargs: Format-specific options.
1219
+
1220
+ Returns:
1221
+ A pandas DataFrame or list of DataFrames if concat=False.
1222
+ """
967
1223
  if concat:
968
1224
  return self.read_arrow_table(batch_size=batch_size, concat=True, **kwargs).to_pandas()
969
1225
 
@@ -976,6 +1232,16 @@ class DatabricksPath:
976
1232
  batch_size: Optional[int] = None,
977
1233
  **kwargs
978
1234
  ):
1235
+ """Write a pandas DataFrame to the path.
1236
+
1237
+ Args:
1238
+ df: pandas DataFrame to write.
1239
+ batch_size: Optional batch size for writes.
1240
+ **kwargs: Format-specific options.
1241
+
1242
+ Returns:
1243
+ The DatabricksPath instance.
1244
+ """
979
1245
  return self.write_arrow_table(pa.table(df), batch_size=batch_size, **kwargs)
980
1246
 
981
1247
  def read_polars(
@@ -986,6 +1252,18 @@ class DatabricksPath:
986
1252
  concat: bool = True,
987
1253
  **kwargs
988
1254
  ):
1255
+ """Read the path into a polars DataFrame.
1256
+
1257
+ Args:
1258
+ batch_size: Optional batch size for reads.
1259
+ how: Polars concat strategy.
1260
+ rechunk: Whether to rechunk after concat.
1261
+ concat: Whether to concatenate results for directories.
1262
+ **kwargs: Format-specific options.
1263
+
1264
+ Returns:
1265
+ A polars DataFrame or list of DataFrames if concat=False.
1266
+ """
989
1267
  import polars as pl
990
1268
 
991
1269
  if self.is_file():
@@ -1023,6 +1301,14 @@ class DatabricksPath:
1023
1301
  - If path is a file: write using DatabricksIO.write_polars which is extension-driven
1024
1302
  (parquet/csv/ipc/json/ndjson etc.).
1025
1303
 
1304
+ Args:
1305
+ df: polars DataFrame or LazyFrame to write.
1306
+ batch_size: Optional rows per part for directory sinks.
1307
+ **kwargs: Format-specific options.
1308
+
1309
+ Returns:
1310
+ The DatabricksPath instance.
1311
+
1026
1312
  Notes:
1027
1313
  - If `df` is a LazyFrame, we collect it first (optionally streaming).
1028
1314
  """
@@ -1057,6 +1343,15 @@ class DatabricksPath:
1057
1343
  query: str,
1058
1344
  engine: str = "auto"
1059
1345
  ):
1346
+ """Run a local SQL query against data at this path.
1347
+
1348
+ Args:
1349
+ query: SQL query string referencing the path.
1350
+ engine: Query engine ("duckdb", "polars", or "auto").
1351
+
1352
+ Returns:
1353
+ An Arrow Table with the query results.
1354
+ """
1060
1355
  if engine == "auto":
1061
1356
  try:
1062
1357
  import duckdb
@@ -1,3 +1,5 @@
1
+ """Enumerations for Databricks path namespaces."""
2
+
1
3
  from enum import Enum
2
4
 
3
5
 
@@ -5,6 +7,7 @@ __all__ = ["DatabricksPathKind"]
5
7
 
6
8
 
7
9
  class DatabricksPathKind(str, Enum):
10
+ """Supported Databricks path kinds for workspace, volumes, and DBFS."""
8
11
  WORKSPACE = "workspace"
9
12
  VOLUME = "volume"
10
13
  DBFS = "dbfs"