ygg 0.1.31__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/METADATA +1 -1
  2. ygg-0.1.33.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +244 -3
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +24 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +29 -4
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +147 -0
  15. yggdrasil/databricks/sql/types.py +33 -1
  16. yggdrasil/databricks/workspaces/__init__.py +2 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +183 -0
  18. yggdrasil/databricks/workspaces/io.py +387 -9
  19. yggdrasil/databricks/workspaces/path.py +297 -2
  20. yggdrasil/databricks/workspaces/path_kind.py +3 -0
  21. yggdrasil/databricks/workspaces/workspace.py +202 -5
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +123 -1
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.31.dist-info/RECORD +0 -59
  58. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/WHEEL +0 -0
  59. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/entry_points.txt +0 -0
  60. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/licenses/LICENSE +0 -0
  61. {ygg-0.1.31.dist-info → ygg-0.1.33.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """File-like IO abstractions for Databricks paths."""
2
+
1
3
  import base64
2
4
  import io
3
5
  import time
@@ -33,6 +35,7 @@ __all__ = [
33
35
 
34
36
 
35
37
  class DatabricksIO(ABC, IO):
38
+ """File-like interface for Databricks workspace, volume, or DBFS paths."""
36
39
 
37
40
  def __init__(
38
41
  self,
@@ -57,15 +60,18 @@ class DatabricksIO(ABC, IO):
57
60
  self._write_flag = False
58
61
 
59
62
  def __enter__(self) -> "DatabricksIO":
63
+ """Enter a context manager and connect the underlying path."""
60
64
  return self.connect(clone=False)
61
65
 
62
66
  def __exit__(self, exc_type, exc_value, traceback):
67
+ """Exit the context manager and close the buffer."""
63
68
  self.close()
64
69
 
65
70
  def __del__(self):
66
71
  self.close()
67
72
 
68
73
  def __next__(self):
74
+ """Iterate over lines in the file."""
69
75
  line = self.readline()
70
76
  if not line:
71
77
  raise StopIteration
@@ -87,6 +93,19 @@ class DatabricksIO(ABC, IO):
87
93
  position: int = 0,
88
94
  buffer: Optional[io.BytesIO] = None,
89
95
  ) -> "DatabricksIO":
96
+ """Create the appropriate IO subclass for the given path kind.
97
+
98
+ Args:
99
+ path: DatabricksPath to open.
100
+ mode: File mode string.
101
+ encoding: Optional text encoding for text mode.
102
+ compression: Optional compression mode.
103
+ position: Initial file cursor position.
104
+ buffer: Optional pre-seeded buffer.
105
+
106
+ Returns:
107
+ A DatabricksIO subclass instance.
108
+ """
90
109
  if path.kind == DatabricksPathKind.VOLUME:
91
110
  return DatabricksVolumeIO(
92
111
  path=path,
@@ -119,10 +138,20 @@ class DatabricksIO(ABC, IO):
119
138
 
120
139
  @property
121
140
  def workspace(self):
141
+ """Return the associated Workspace instance.
142
+
143
+ Returns:
144
+ The Workspace bound to the path.
145
+ """
122
146
  return self.path.workspace
123
147
 
124
148
  @property
125
149
  def name(self):
150
+ """Return the name of the underlying path.
151
+
152
+ Returns:
153
+ The path name component.
154
+ """
126
155
  return self.path.name
127
156
 
128
157
  @property
@@ -147,6 +176,11 @@ class DatabricksIO(ABC, IO):
147
176
  return self.path.content_length
148
177
 
149
178
  def size(self):
179
+ """Return the size of the file in bytes.
180
+
181
+ Returns:
182
+ The file size in bytes.
183
+ """
150
184
  return self.content_length
151
185
 
152
186
  @content_length.setter
@@ -155,6 +189,11 @@ class DatabricksIO(ABC, IO):
155
189
 
156
190
  @property
157
191
  def buffer(self):
192
+ """Return the in-memory buffer, creating it if necessary.
193
+
194
+ Returns:
195
+ A BytesIO buffer for the file contents.
196
+ """
158
197
  if self._buffer is None:
159
198
  self._buffer = io.BytesIO()
160
199
  self._buffer.seek(self.position, io.SEEK_SET)
@@ -165,11 +204,24 @@ class DatabricksIO(ABC, IO):
165
204
  self._buffer = value
166
205
 
167
206
  def clear_buffer(self):
207
+ """Clear any cached in-memory buffer.
208
+
209
+ Returns:
210
+ None.
211
+ """
168
212
  if self._buffer is not None:
169
213
  self._buffer.close()
170
214
  self._buffer = None
171
215
 
172
216
  def clone_instance(self, **kwargs):
217
+ """Clone this IO instance with optional overrides.
218
+
219
+ Args:
220
+ **kwargs: Field overrides for the new instance.
221
+
222
+ Returns:
223
+ A cloned DatabricksIO instance.
224
+ """
173
225
  return self.__class__(
174
226
  path=kwargs.get("path", self.path),
175
227
  mode=kwargs.get("mode", self.mode),
@@ -181,9 +233,22 @@ class DatabricksIO(ABC, IO):
181
233
 
182
234
  @property
183
235
  def connected(self):
236
+ """Return True if the underlying path is connected.
237
+
238
+ Returns:
239
+ True if connected, otherwise False.
240
+ """
184
241
  return self.path.connected
185
242
 
186
243
  def connect(self, clone: bool = False) -> "DatabricksIO":
244
+ """Connect the underlying path and optionally return a clone.
245
+
246
+ Args:
247
+ clone: Whether to return a cloned instance.
248
+
249
+ Returns:
250
+ The connected DatabricksIO instance.
251
+ """
187
252
  path = self.path.connect(clone=clone)
188
253
 
189
254
  if clone:
@@ -193,23 +258,52 @@ class DatabricksIO(ABC, IO):
193
258
  return self
194
259
 
195
260
  def close(self):
261
+ """Flush pending writes and close the buffer.
262
+
263
+ Returns:
264
+ None.
265
+ """
196
266
  self.flush()
197
267
  if self._buffer is not None:
198
268
  self._buffer.close()
199
269
 
200
270
  def fileno(self):
271
+ """Return a pseudo file descriptor based on object hash.
272
+
273
+ Returns:
274
+ An integer file descriptor-like value.
275
+ """
201
276
  return hash(self)
202
277
 
203
278
  def isatty(self):
204
279
  return False
205
280
 
206
281
  def tell(self):
282
+ """Return the current cursor position.
283
+
284
+ Returns:
285
+ The current position in bytes.
286
+ """
207
287
  return self.position
208
288
 
209
289
  def seekable(self):
290
+ """Return True to indicate seek support.
291
+
292
+ Returns:
293
+ True.
294
+ """
210
295
  return True
211
296
 
212
297
  def seek(self, offset, whence=0, /):
298
+ """Move the cursor to a new position.
299
+
300
+ Args:
301
+ offset: Offset in bytes.
302
+ whence: Reference point (start, current, end).
303
+
304
+ Returns:
305
+ The new position in bytes.
306
+ """
213
307
  if whence == io.SEEK_SET:
214
308
  new_position = offset
215
309
  elif whence == io.SEEK_CUR:
@@ -230,21 +324,55 @@ class DatabricksIO(ABC, IO):
230
324
  return self.position
231
325
 
232
326
  def readable(self):
327
+ """Return True to indicate read support.
328
+
329
+ Returns:
330
+ True.
331
+ """
233
332
  return True
234
333
 
235
334
  def getvalue(self):
335
+ """Return the buffer contents, reading from remote if needed.
336
+
337
+ Returns:
338
+ File contents as bytes or str depending on mode.
339
+ """
236
340
  if self._buffer is not None:
237
341
  return self._buffer.getvalue()
238
342
  return self.read_all_bytes()
239
343
 
240
344
  def getbuffer(self):
345
+ """Return the underlying BytesIO buffer.
346
+
347
+ Returns:
348
+ The BytesIO buffer instance.
349
+ """
241
350
  return self.buffer
242
351
 
243
352
  @abstractmethod
244
353
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
354
+ """Read a byte range from the remote path.
355
+
356
+ Args:
357
+ start: Starting byte offset.
358
+ length: Number of bytes to read.
359
+ allow_not_found: Whether to suppress missing-path errors.
360
+
361
+ Returns:
362
+ The bytes read from the remote path.
363
+ """
245
364
  pass
246
365
 
247
366
  def read_all_bytes(self, use_cache: bool = True, allow_not_found: bool = False) -> bytes:
367
+ """Read the full contents into memory, optionally caching.
368
+
369
+ Args:
370
+ use_cache: Whether to cache contents in memory.
371
+ allow_not_found: Whether to suppress missing-path errors.
372
+
373
+ Returns:
374
+ File contents as bytes.
375
+ """
248
376
  if use_cache and self._buffer is not None:
249
377
  buffer_value = self._buffer.getvalue()
250
378
 
@@ -266,6 +394,15 @@ class DatabricksIO(ABC, IO):
266
394
  return data
267
395
 
268
396
  def read(self, n=-1, use_cache: bool = True):
397
+ """Read up to ``n`` bytes/characters from the file.
398
+
399
+ Args:
400
+ n: Number of bytes/characters to read; -1 for all.
401
+ use_cache: Whether to use cached contents.
402
+
403
+ Returns:
404
+ The read bytes or string depending on mode.
405
+ """
269
406
  if not self.readable():
270
407
  raise IOError("File not open for reading")
271
408
 
@@ -285,6 +422,15 @@ class DatabricksIO(ABC, IO):
285
422
  return data
286
423
 
287
424
  def readline(self, limit=-1, use_cache: bool = True):
425
+ """Read a single line from the file.
426
+
427
+ Args:
428
+ limit: Max characters/bytes to read; -1 for no limit.
429
+ use_cache: Whether to use cached contents.
430
+
431
+ Returns:
432
+ The next line as bytes or string.
433
+ """
288
434
  if not self.readable():
289
435
  raise IOError("File not open for reading")
290
436
 
@@ -320,6 +466,15 @@ class DatabricksIO(ABC, IO):
320
466
  return bytes(line_bytes)
321
467
 
322
468
  def readlines(self, hint=-1, use_cache: bool = True):
469
+ """Read all lines from the file.
470
+
471
+ Args:
472
+ hint: Optional byte/char count hint; -1 for no hint.
473
+ use_cache: Whether to use cached contents.
474
+
475
+ Returns:
476
+ A list of lines.
477
+ """
323
478
  if not self.readable():
324
479
  raise IOError("File not open for reading")
325
480
 
@@ -338,16 +493,42 @@ class DatabricksIO(ABC, IO):
338
493
  return lines
339
494
 
340
495
  def appendable(self):
496
+ """Return True when the file is open in append mode.
497
+
498
+ Returns:
499
+ True if in append mode.
500
+ """
341
501
  return "a" in self.mode
342
502
 
343
503
  def writable(self):
504
+ """Return True to indicate write support.
505
+
506
+ Returns:
507
+ True.
508
+ """
344
509
  return True
345
510
 
346
511
  @abstractmethod
347
512
  def write_all_bytes(self, data: bytes):
513
+ """Write raw bytes to the remote path.
514
+
515
+ Args:
516
+ data: Bytes to write.
517
+
518
+ Returns:
519
+ None.
520
+ """
348
521
  pass
349
522
 
350
523
  def truncate(self, size=None, /):
524
+ """Resize the file to ``size`` bytes.
525
+
526
+ Args:
527
+ size: Target size in bytes (defaults to current position).
528
+
529
+ Returns:
530
+ The new size in bytes.
531
+ """
351
532
  if size is None:
352
533
  size = self.position
353
534
 
@@ -362,11 +543,24 @@ class DatabricksIO(ABC, IO):
362
543
  return size
363
544
 
364
545
  def flush(self):
546
+ """Flush buffered data to the remote path.
547
+
548
+ Returns:
549
+ None.
550
+ """
365
551
  if self._write_flag and self._buffer is not None:
366
552
  self.write_all_bytes(data=self._buffer.getvalue())
367
553
  self._write_flag = False
368
554
 
369
555
  def write(self, data: AnyStr) -> int:
556
+ """Write data to the buffer and mark for flush.
557
+
558
+ Args:
559
+ data: String or bytes to write.
560
+
561
+ Returns:
562
+ The number of bytes written.
563
+ """
370
564
  if not self.writable():
371
565
  raise IOError("File not open for writing")
372
566
 
@@ -382,6 +576,14 @@ class DatabricksIO(ABC, IO):
382
576
  return written
383
577
 
384
578
  def writelines(self, lines) -> None:
579
+ """Write multiple lines to the buffer.
580
+
581
+ Args:
582
+ lines: Iterable of lines to write.
583
+
584
+ Returns:
585
+ None.
586
+ """
385
587
  for line in lines:
386
588
  if isinstance(line, str):
387
589
  line = line.encode(self.encoding or "utf-8")
@@ -394,12 +596,25 @@ class DatabricksIO(ABC, IO):
394
596
  self.write(data)
395
597
 
396
598
  def get_output_stream(self, *args, **kwargs):
599
+ """Return this instance for compatibility with Arrow APIs.
600
+
601
+ Returns:
602
+ The current DatabricksIO instance.
603
+ """
397
604
  return self
398
605
 
399
606
  def copy_to(
400
607
  self,
401
608
  dest: Union["DatabricksIO", "DatabricksPath", str]
402
609
  ) -> None:
610
+ """Copy the file contents to another Databricks IO/path.
611
+
612
+ Args:
613
+ dest: Destination IO, DatabricksPath, or path string.
614
+
615
+ Returns:
616
+ None.
617
+ """
403
618
  if not isinstance(dest, DatabricksIO):
404
619
  from .path import DatabricksPath
405
620
 
@@ -426,17 +641,29 @@ class DatabricksIO(ABC, IO):
426
641
  def write_table(
427
642
  self,
428
643
  table: Union[pa.Table, pa.RecordBatch, PolarsDataFrame, PandasDataFrame],
644
+ file_format: Optional[FileFormat] = None,
429
645
  batch_size: Optional[int] = None,
430
646
  **kwargs
431
647
  ):
648
+ """Write a table-like object to the path using an inferred format.
649
+
650
+ Args:
651
+ table: Table-like object to write.
652
+ file_format: Optional file format override.
653
+ batch_size: Optional batch size for writes.
654
+ **kwargs: Format-specific options.
655
+
656
+ Returns:
657
+ The result of the specific write implementation.
658
+ """
432
659
  if isinstance(table, pa.Table):
433
- return self.write_arrow_table(table, batch_size=batch_size, **kwargs)
660
+ return self.write_arrow_table(table, file_format=file_format, batch_size=batch_size, **kwargs)
434
661
  elif isinstance(table, pa.RecordBatch):
435
- return self.write_arrow_batch(table, batch_size=batch_size, **kwargs)
662
+ return self.write_arrow_batch(table, file_format=file_format, batch_size=batch_size, **kwargs)
436
663
  elif isinstance(table, PolarsDataFrame):
437
- return self.write_polars(table, batch_size=batch_size, **kwargs)
664
+ return self.write_polars(table, file_format=file_format, batch_size=batch_size, **kwargs)
438
665
  elif isinstance(table, PandasDataFrame):
439
- return self.write_pandas(table, batch_size=batch_size, **kwargs)
666
+ return self.write_pandas(table, file_format=file_format, batch_size=batch_size, **kwargs)
440
667
  else:
441
668
  raise ValueError(f"Cannot write {type(table)} to {self.path}")
442
669
 
@@ -448,6 +675,16 @@ class DatabricksIO(ABC, IO):
448
675
  batch_size: Optional[int] = None,
449
676
  **kwargs
450
677
  ) -> pa.Table:
678
+ """Read the file as an Arrow table.
679
+
680
+ Args:
681
+ file_format: Optional file format override.
682
+ batch_size: Optional batch size for reads.
683
+ **kwargs: Format-specific options.
684
+
685
+ Returns:
686
+ An Arrow Table with the file contents.
687
+ """
451
688
  file_format = self.path.file_format if file_format is None else file_format
452
689
  self.seek(0)
453
690
 
@@ -465,6 +702,16 @@ class DatabricksIO(ABC, IO):
465
702
  batch_size: Optional[int] = None,
466
703
  **kwargs
467
704
  ):
705
+ """Write an Arrow table or record batch to the path.
706
+
707
+ Args:
708
+ table: Arrow table or batch to write.
709
+ batch_size: Optional batch size for writes.
710
+ **kwargs: Format-specific options.
711
+
712
+ Returns:
713
+ None.
714
+ """
468
715
  if not isinstance(table, pa.Table):
469
716
  table = convert(table, pa.Table)
470
717
 
@@ -481,11 +728,22 @@ class DatabricksIO(ABC, IO):
481
728
  batch_size: Optional[int] = None,
482
729
  **kwargs
483
730
  ):
731
+ """Write an Arrow table using the selected file format.
732
+
733
+ Args:
734
+ table: Arrow table to write.
735
+ file_format: Optional file format override.
736
+ batch_size: Optional batch size for writes.
737
+ **kwargs: Format-specific options.
738
+
739
+ Returns:
740
+ None.
741
+ """
484
742
  file_format = self.path.file_format if file_format is None else file_format
485
743
  buffer = io.BytesIO()
486
744
 
487
745
  if isinstance(file_format, ParquetFileFormat):
488
- pq.write_table(table, buffer, **kwargs)
746
+ pq.write_table(table, buffer, write_batch_size=batch_size, **kwargs)
489
747
 
490
748
  elif isinstance(file_format, CsvFileFormat):
491
749
  pcsv.write_csv(table, buffer, **kwargs)
@@ -498,17 +756,38 @@ class DatabricksIO(ABC, IO):
498
756
  def write_arrow_batch(
499
757
  self,
500
758
  batch: pa.RecordBatch,
759
+ file_format: Optional[FileFormat] = None,
501
760
  batch_size: Optional[int] = None,
502
761
  **kwargs
503
762
  ):
763
+ """Write a single Arrow record batch.
764
+
765
+ Args:
766
+ batch: RecordBatch to write.
767
+ file_format: Optional file format override.
768
+ batch_size: Optional batch size for writes.
769
+ **kwargs: Format-specific options.
770
+
771
+ Returns:
772
+ None.
773
+ """
504
774
  table = pa.Table.from_batches([batch])
505
- self.write_arrow_table(table, batch_size=batch_size, **kwargs)
775
+ self.write_arrow_table(table, file_format=file_format, batch_size=batch_size, **kwargs)
506
776
 
507
777
  def read_arrow_batches(
508
778
  self,
509
779
  batch_size: Optional[int] = None,
510
780
  **kwargs
511
781
  ):
782
+ """Yield Arrow record batches from the file.
783
+
784
+ Args:
785
+ batch_size: Optional batch size for reads.
786
+ **kwargs: Format-specific options.
787
+
788
+ Returns:
789
+ An iterator over Arrow RecordBatch objects.
790
+ """
512
791
  return (
513
792
  self
514
793
  .read_arrow_table(batch_size=batch_size, **kwargs)
@@ -522,6 +801,15 @@ class DatabricksIO(ABC, IO):
522
801
  batch_size: Optional[int] = None,
523
802
  **kwargs
524
803
  ):
804
+ """Read the file into a pandas DataFrame.
805
+
806
+ Args:
807
+ batch_size: Optional batch size for reads.
808
+ **kwargs: Format-specific options.
809
+
810
+ Returns:
811
+ A pandas DataFrame with the file contents.
812
+ """
525
813
  return self.read_arrow_table(batch_size=batch_size, **kwargs).to_pandas()
526
814
 
527
815
  def write_pandas(
@@ -530,6 +818,16 @@ class DatabricksIO(ABC, IO):
530
818
  batch_size: Optional[int] = None,
531
819
  **kwargs
532
820
  ):
821
+ """Write a pandas DataFrame to the file.
822
+
823
+ Args:
824
+ df: pandas DataFrame to write.
825
+ batch_size: Optional batch size for writes.
826
+ **kwargs: Format-specific options.
827
+
828
+ Returns:
829
+ None.
830
+ """
533
831
  self.write_arrow_table(pa.table(df), batch_size=batch_size, **kwargs)
534
832
 
535
833
  # ---- Polars ----
@@ -540,6 +838,16 @@ class DatabricksIO(ABC, IO):
540
838
  batch_size: Optional[int] = None,
541
839
  **kwargs
542
840
  ):
841
+ """Read the file into a polars DataFrame.
842
+
843
+ Args:
844
+ file_format: Optional file format override.
845
+ batch_size: Optional batch size for reads.
846
+ **kwargs: Format-specific options.
847
+
848
+ Returns:
849
+ A polars DataFrame with the file contents.
850
+ """
543
851
  import polars as pl
544
852
 
545
853
  file_format = self.path.file_format if file_format is None else file_format
@@ -560,22 +868,46 @@ class DatabricksIO(ABC, IO):
560
868
  batch_size: Optional[int] = None,
561
869
  **kwargs
562
870
  ):
871
+ """Write a polars DataFrame to the file.
872
+
873
+ Args:
874
+ df: polars DataFrame to write.
875
+ file_format: Optional file format override.
876
+ batch_size: Optional batch size for writes.
877
+ **kwargs: Format-specific options.
878
+
879
+ Returns:
880
+ None.
881
+ """
563
882
  file_format = self.path.file_format if file_format is None else FileFormat
564
- self._reset_for_write()
883
+ buffer = io.BytesIO()
565
884
 
566
885
  if isinstance(file_format, ParquetFileFormat):
567
- df.write_parquet(self, **kwargs)
886
+ df.write_parquet(buffer, **kwargs)
568
887
 
569
888
  elif isinstance(file_format, CsvFileFormat):
570
- df.write_csv(self, **kwargs)
889
+ df.write_csv(buffer, **kwargs)
571
890
 
572
891
  else:
573
892
  raise ValueError(f"Unsupported file format for Polars DataFrame: {file_format}")
574
893
 
894
+ self.write_all_bytes(data=buffer.getvalue())
895
+
575
896
 
576
897
  class DatabricksWorkspaceIO(DatabricksIO):
898
+ """IO adapter for Workspace files."""
577
899
 
578
900
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
901
+ """Read bytes from a Workspace file.
902
+
903
+ Args:
904
+ start: Starting byte offset.
905
+ length: Number of bytes to read.
906
+ allow_not_found: Whether to suppress missing-path errors.
907
+
908
+ Returns:
909
+ Bytes read from the file.
910
+ """
579
911
  if length == 0:
580
912
  return b""
581
913
 
@@ -597,6 +929,14 @@ class DatabricksWorkspaceIO(DatabricksIO):
597
929
  return data[start:end]
598
930
 
599
931
  def write_all_bytes(self, data: bytes):
932
+ """Write bytes to a Workspace file.
933
+
934
+ Args:
935
+ data: Bytes to write.
936
+
937
+ Returns:
938
+ The DatabricksWorkspaceIO instance.
939
+ """
600
940
  sdk = self.workspace.sdk()
601
941
  workspace_client = sdk.workspace
602
942
  full_path = self.path.workspace_full_path()
@@ -629,8 +969,19 @@ class DatabricksWorkspaceIO(DatabricksIO):
629
969
 
630
970
 
631
971
  class DatabricksVolumeIO(DatabricksIO):
972
+ """IO adapter for Unity Catalog volume files."""
632
973
 
633
974
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
975
+ """Read bytes from a volume file.
976
+
977
+ Args:
978
+ start: Starting byte offset.
979
+ length: Number of bytes to read.
980
+ allow_not_found: Whether to suppress missing-path errors.
981
+
982
+ Returns:
983
+ Bytes read from the file.
984
+ """
634
985
  if length == 0:
635
986
  return b""
636
987
 
@@ -648,6 +999,14 @@ class DatabricksVolumeIO(DatabricksIO):
648
999
  return result
649
1000
 
650
1001
  def write_all_bytes(self, data: bytes):
1002
+ """Write bytes to a volume file.
1003
+
1004
+ Args:
1005
+ data: Bytes to write.
1006
+
1007
+ Returns:
1008
+ The DatabricksVolumeIO instance.
1009
+ """
651
1010
  sdk = self.workspace.sdk()
652
1011
  client = sdk.files
653
1012
  full_path = self.path.files_full_path()
@@ -678,8 +1037,19 @@ class DatabricksVolumeIO(DatabricksIO):
678
1037
 
679
1038
 
680
1039
  class DatabricksDBFSIO(DatabricksIO):
1040
+ """IO adapter for DBFS files."""
681
1041
 
682
1042
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
1043
+ """Read bytes from a DBFS file.
1044
+
1045
+ Args:
1046
+ start: Starting byte offset.
1047
+ length: Number of bytes to read.
1048
+ allow_not_found: Whether to suppress missing-path errors.
1049
+
1050
+ Returns:
1051
+ Bytes read from the file.
1052
+ """
683
1053
  if length == 0:
684
1054
  return b""
685
1055
 
@@ -714,6 +1084,14 @@ class DatabricksDBFSIO(DatabricksIO):
714
1084
  return bytes(read_bytes)
715
1085
 
716
1086
  def write_all_bytes(self, data: bytes):
1087
+ """Write bytes to a DBFS file.
1088
+
1089
+ Args:
1090
+ data: Bytes to write.
1091
+
1092
+ Returns:
1093
+ The DatabricksDBFSIO instance.
1094
+ """
717
1095
  sdk = self.workspace.sdk()
718
1096
  client = sdk.dbfs
719
1097
  full_path = self.path.dbfs_full_path()