ygg 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +29 -4
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +147 -0
  15. yggdrasil/databricks/sql/types.py +33 -1
  16. yggdrasil/databricks/workspaces/__init__.py +2 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +183 -0
  18. yggdrasil/databricks/workspaces/io.py +387 -9
  19. yggdrasil/databricks/workspaces/path.py +297 -2
  20. yggdrasil/databricks/workspaces/path_kind.py +3 -0
  21. yggdrasil/databricks/workspaces/workspace.py +202 -5
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +123 -1
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.31.dist-info/RECORD +0 -59
  58. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  59. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  60. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  61. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """Result wrapper for Databricks SQL statement execution."""
2
+
1
3
  import dataclasses
2
4
  import threading
3
5
  import time
@@ -49,6 +51,7 @@ __all__ = [
49
51
 
50
52
  @dataclasses.dataclass
51
53
  class StatementResult:
54
+ """Container for statement responses, data extraction, and conversions."""
52
55
  engine: "SQLEngine"
53
56
  statement_id: str
54
57
  disposition: "Disposition"
@@ -60,6 +63,11 @@ class StatementResult:
60
63
  _arrow_table: Optional[pa.Table] = dataclasses.field(default=None, repr=False)
61
64
 
62
65
  def __getstate__(self):
66
+ """Serialize statement results, converting Spark dataframes to Arrow.
67
+
68
+ Returns:
69
+ A pickle-ready state dictionary.
70
+ """
63
71
  state = self.__dict__.copy()
64
72
 
65
73
  _spark_df = state.pop("_spark_df", None)
@@ -70,29 +78,54 @@ class StatementResult:
70
78
  return state
71
79
 
72
80
  def __setstate__(self, state):
81
+ """Restore statement result state, rehydrating cached data.
82
+
83
+ Args:
84
+ state: Serialized state dictionary.
85
+ """
73
86
  _spark_df = state.pop("_spark_df")
74
87
 
75
88
  def __iter__(self):
89
+ """Iterate over Arrow record batches."""
76
90
  return self.to_arrow_batches()
77
91
 
78
92
  @property
79
93
  def is_spark_sql(self):
94
+ """Return True when this result was produced by Spark SQL."""
80
95
  return self._spark_df is not None
81
96
 
82
97
  @property
83
98
  def response(self):
99
+ """Return the latest statement response, refreshing when needed.
100
+
101
+ Returns:
102
+ The current StatementResponse object.
103
+ """
84
104
  if self._response is None and not self.is_spark_sql:
85
105
  self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
86
106
  return self._response
87
107
 
88
108
  @response.setter
89
109
  def response(self, value: "StatementResponse"):
110
+ """Update the cached response and refresh timestamp.
111
+
112
+ Args:
113
+ value: StatementResponse to cache.
114
+ """
90
115
  self._response = value
91
116
  self._response_refresh_time = time.time()
92
117
 
93
118
  self.statement_id = self._response.statement_id
94
119
 
95
120
  def fresh_response(self, delay: float):
121
+ """Refresh the response if it is older than ``delay`` seconds.
122
+
123
+ Args:
124
+ delay: Minimum age in seconds before refreshing.
125
+
126
+ Returns:
127
+ The refreshed StatementResponse object.
128
+ """
96
129
  if self.is_spark_sql:
97
130
  return self._response
98
131
 
@@ -102,6 +135,14 @@ class StatementResult:
102
135
  return self._response
103
136
 
104
137
  def result_data_at(self, chunk_index: int):
138
+ """Fetch a specific result chunk by index.
139
+
140
+ Args:
141
+ chunk_index: Result chunk index to retrieve.
142
+
143
+ Returns:
144
+ The SDK result chunk response.
145
+ """
105
146
  sdk = self.workspace.sdk()
106
147
 
107
148
  return sdk.statement_execution.get_statement_result_chunk_n(
@@ -111,10 +152,20 @@ class StatementResult:
111
152
 
112
153
  @property
113
154
  def workspace(self):
155
+ """Expose the underlying workspace from the engine.
156
+
157
+ Returns:
158
+ The Workspace instance backing this statement.
159
+ """
114
160
  return self.engine.workspace
115
161
 
116
162
  @property
117
163
  def status(self):
164
+ """Return the statement status, handling persisted data.
165
+
166
+ Returns:
167
+ A StatementStatus object.
168
+ """
118
169
  if self.persisted:
119
170
  return StatementStatus(
120
171
  state=StatementState.SUCCEEDED
@@ -129,20 +180,40 @@ class StatementResult:
129
180
 
130
181
  @property
131
182
  def state(self):
183
+ """Return the statement state.
184
+
185
+ Returns:
186
+ The StatementState enum value.
187
+ """
132
188
  return self.status.state
133
189
 
134
190
  @property
135
191
  def manifest(self):
192
+ """Return the SQL result manifest, if available.
193
+
194
+ Returns:
195
+ The result manifest or None for Spark SQL results.
196
+ """
136
197
  if self.is_spark_sql:
137
198
  return None
138
199
  return self.response.manifest
139
200
 
140
201
  @property
141
202
  def result(self):
203
+ """Return the raw statement result object.
204
+
205
+ Returns:
206
+ The statement result payload from the API.
207
+ """
142
208
  return self.response.result
143
209
 
144
210
  @property
145
211
  def done(self):
212
+ """Return True when the statement is in a terminal state.
213
+
214
+ Returns:
215
+ True if the statement is done, otherwise False.
216
+ """
146
217
  if self.persisted:
147
218
  return True
148
219
 
@@ -155,6 +226,11 @@ class StatementResult:
155
226
 
156
227
  @property
157
228
  def failed(self):
229
+ """Return True when the statement failed or was cancelled.
230
+
231
+ Returns:
232
+ True if the statement failed or was cancelled.
233
+ """
158
234
  if self.persisted:
159
235
  return True
160
236
 
@@ -165,14 +241,29 @@ class StatementResult:
165
241
 
166
242
  @property
167
243
  def persisted(self):
244
+ """Return True when data is cached locally.
245
+
246
+ Returns:
247
+ True when cached Arrow or Spark data is present.
248
+ """
168
249
  return self._spark_df is not None or self._arrow_table is not None
169
250
 
170
251
  def persist(self):
252
+ """Cache the statement result locally as Arrow data.
253
+
254
+ Returns:
255
+ The current StatementResult instance.
256
+ """
171
257
  if not self.persisted:
172
258
  self._arrow_table = self.to_arrow_table()
173
259
  return self
174
260
 
175
261
  def external_links(self):
262
+ """Yield external result links for EXTERNAL_LINKS dispositions.
263
+
264
+ Yields:
265
+ External link objects in result order.
266
+ """
176
267
  assert self.disposition == Disposition.EXTERNAL_LINKS, "Cannot get from %s, disposition %s != %s" % (
177
268
  self, self.disposition, Disposition.EXTERNAL_LINKS
178
269
  )
@@ -222,6 +313,11 @@ class StatementResult:
222
313
  )
223
314
 
224
315
  def raise_for_status(self):
316
+ """Raise a ValueError if the statement failed.
317
+
318
+ Returns:
319
+ None.
320
+ """
225
321
  if self.failed:
226
322
  # grab error info if present
227
323
  err = self.status.error
@@ -244,6 +340,15 @@ class StatementResult:
244
340
  timeout: Optional[int] = None,
245
341
  poll_interval: Optional[float] = None
246
342
  ):
343
+ """Wait for statement completion with optional timeout.
344
+
345
+ Args:
346
+ timeout: Maximum seconds to wait.
347
+ poll_interval: Initial poll interval in seconds.
348
+
349
+ Returns:
350
+ The current StatementResult instance.
351
+ """
247
352
  if self.done:
248
353
  return self
249
354
 
@@ -265,6 +370,11 @@ class StatementResult:
265
370
  return current
266
371
 
267
372
  def arrow_schema(self):
373
+ """Return the Arrow schema for the result.
374
+
375
+ Returns:
376
+ An Arrow Schema instance.
377
+ """
268
378
  if self.persisted:
269
379
  if self._arrow_table is not None:
270
380
  return self._arrow_table.schema
@@ -277,6 +387,14 @@ class StatementResult:
277
387
  return pa.schema(fields)
278
388
 
279
389
  def to_arrow_table(self, parallel_pool: Optional[int] = 4) -> pa.Table:
390
+ """Collect the statement result into a single Arrow table.
391
+
392
+ Args:
393
+ parallel_pool: Maximum parallel fetch workers.
394
+
395
+ Returns:
396
+ An Arrow Table containing all rows.
397
+ """
280
398
  if self.persisted:
281
399
  if self._arrow_table:
282
400
  return self._arrow_table
@@ -295,6 +413,14 @@ class StatementResult:
295
413
  self,
296
414
  parallel_pool: Optional[int] = 4
297
415
  ) -> Iterator[pa.RecordBatch]:
416
+ """Stream the result as Arrow record batches.
417
+
418
+ Args:
419
+ parallel_pool: Maximum parallel fetch workers.
420
+
421
+ Yields:
422
+ Arrow RecordBatch objects.
423
+ """
298
424
  if self.persisted:
299
425
  if self._arrow_table is not None:
300
426
  for batch in self._arrow_table.to_batches(max_chunksize=64 * 1024):
@@ -379,15 +505,36 @@ class StatementResult:
379
505
  self,
380
506
  parallel_pool: Optional[int] = 4
381
507
  ) -> "pandas.DataFrame":
508
+ """Return the result as a pandas DataFrame.
509
+
510
+ Args:
511
+ parallel_pool: Maximum parallel fetch workers.
512
+
513
+ Returns:
514
+ A pandas DataFrame with the result rows.
515
+ """
382
516
  return self.to_arrow_table(parallel_pool=parallel_pool).to_pandas()
383
517
 
384
518
  def to_polars(
385
519
  self,
386
520
  parallel_pool: Optional[int] = 4
387
521
  ) -> "polars.DataFrame":
522
+ """Return the result as a polars DataFrame.
523
+
524
+ Args:
525
+ parallel_pool: Maximum parallel fetch workers.
526
+
527
+ Returns:
528
+ A polars DataFrame with the result rows.
529
+ """
388
530
  return polars.from_arrow(self.to_arrow_table(parallel_pool=parallel_pool))
389
531
 
390
532
  def to_spark(self):
533
+ """Return the result as a Spark DataFrame, caching it locally.
534
+
535
+ Returns:
536
+ A Spark DataFrame with the result rows.
537
+ """
391
538
  if self._spark_df:
392
539
  return self._spark_df
393
540
 
@@ -1,3 +1,5 @@
1
+ """Type utilities for Databricks SQL metadata and Arrow."""
2
+
1
3
  import json
2
4
  import re
3
5
  from typing import Union
@@ -86,6 +88,14 @@ _struct_re = re.compile(r"^STRUCT\s*<\s*(.+)\s*>$", re.IGNORECASE)
86
88
 
87
89
 
88
90
  def _split_top_level_commas(s: str):
91
+ """Split a type string by commas, respecting nested angle brackets.
92
+
93
+ Args:
94
+ s: Type string to split.
95
+
96
+ Returns:
97
+ A list of top-level comma-separated parts.
98
+ """
89
99
  parts, cur, depth = [], [], 0
90
100
  for ch in s:
91
101
  if ch == '<':
@@ -103,6 +113,14 @@ def _split_top_level_commas(s: str):
103
113
 
104
114
 
105
115
  def _safe_bytes(obj):
116
+ """Convert an object to UTF-8 bytes, with safe handling for None.
117
+
118
+ Args:
119
+ obj: Value to convert.
120
+
121
+ Returns:
122
+ UTF-8 encoded bytes.
123
+ """
106
124
  if not isinstance(obj, bytes):
107
125
  if not obj:
108
126
  return b""
@@ -120,6 +138,12 @@ def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
120
138
  - looks up base types in STRING_TYPE_MAP (expects uppercase keys)
121
139
  - supports DECIMAL(p,s), ARRAY<...>, MAP<k,v>, STRUCT<...> recursively
122
140
  - raises ValueError if it cannot map the provided type string
141
+
142
+ Args:
143
+ type_str: SQL type string to parse.
144
+
145
+ Returns:
146
+ The corresponding Arrow DataType.
123
147
  """
124
148
  if not type_str:
125
149
  raise ValueError("Empty type string")
@@ -177,6 +201,14 @@ def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
177
201
 
178
202
 
179
203
  def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
204
+ """Convert Databricks SQL/Catalog column info into an Arrow field.
205
+
206
+ Args:
207
+ col: ColumnInfo from SQL or Catalog APIs.
208
+
209
+ Returns:
210
+ An Arrow Field for the column.
211
+ """
180
212
  arrow_type = parse_sql_type_to_pa(col.type_text)
181
213
 
182
214
  if isinstance(col, CatalogColumnInfo):
@@ -198,4 +230,4 @@ def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
198
230
  arrow_type,
199
231
  nullable=nullable,
200
232
  metadata=md
201
- )
233
+ )
@@ -1,4 +1,5 @@
1
+ """Workspace, filesystem, and path utilities for Databricks."""
2
+
1
3
  from .workspace import *
2
4
  from .path import *
3
5
  from .io import *
4
-
@@ -1,3 +1,5 @@
1
+ """PyArrow filesystem wrappers for Databricks paths."""
2
+
1
3
  __all__ = [
2
4
  "DatabricksFileSystem",
3
5
  "DatabricksFileSystemHandler"
@@ -14,26 +16,60 @@ if TYPE_CHECKING:
14
16
 
15
17
 
16
18
  class DatabricksFileSystemHandler(FileSystemHandler):
19
+ """PyArrow FileSystemHandler backed by Databricks paths."""
17
20
 
18
21
  def __init__(
19
22
  self,
20
23
  workspace: "Workspace",
21
24
  ):
25
+ """Create a handler bound to a Workspace.
26
+
27
+ Args:
28
+ workspace: Workspace instance to use.
29
+ """
22
30
  super().__init__()
23
31
  self.workspace = workspace
24
32
 
25
33
  def __enter__(self):
34
+ """Enter a context manager and connect to the workspace.
35
+
36
+ Returns:
37
+ A connected DatabricksFileSystemHandler instance.
38
+ """
26
39
  return self.connect(clone=True)
27
40
 
28
41
  def __exit__(self, exc_type, exc_val, exc_tb):
42
+ """Exit the context manager and close the workspace.
43
+
44
+ Args:
45
+ exc_type: Exception type, if raised.
46
+ exc_val: Exception value, if raised.
47
+ exc_tb: Exception traceback, if raised.
48
+ """
29
49
  self.workspace.__exit__(exc_type, exc_val, exc_tb)
30
50
 
31
51
  def _parse_path(self, obj: Any) -> "DatabricksPath":
52
+ """Parse a path-like object into a DatabricksPath.
53
+
54
+ Args:
55
+ obj: Path-like object to parse.
56
+
57
+ Returns:
58
+ A DatabricksPath instance.
59
+ """
32
60
  from .path import DatabricksPath
33
61
 
34
62
  return DatabricksPath.parse(obj, workspace=self.workspace)
35
63
 
36
64
  def connect(self, clone: bool = True):
65
+ """Connect the workspace and optionally return a cloned handler.
66
+
67
+ Args:
68
+ clone: Whether to return a cloned handler.
69
+
70
+ Returns:
71
+ A connected handler.
72
+ """
37
73
  workspace = self.connect(clone=clone)
38
74
 
39
75
  if clone:
@@ -45,9 +81,21 @@ class DatabricksFileSystemHandler(FileSystemHandler):
45
81
  return self
46
82
 
47
83
  def close(self):
84
+ """Close the underlying workspace client.
85
+
86
+ Returns:
87
+ None.
88
+ """
48
89
  self.workspace.close()
49
90
 
50
91
  def copy_file(self, src, dest, *, chunk_size: int = 4 * 1024 * 1024):
92
+ """Copy a file between Databricks paths.
93
+
94
+ Args:
95
+ src: Source path.
96
+ dest: Destination path.
97
+ chunk_size: Chunk size in bytes.
98
+ """
51
99
  src = self._parse_path(src)
52
100
  dest = self._parse_path(dest)
53
101
 
@@ -59,24 +107,66 @@ class DatabricksFileSystemHandler(FileSystemHandler):
59
107
  w.write(chunk)
60
108
 
61
109
  def create_dir(self, path, *args, recursive: bool = True, **kwargs):
110
+ """Create a directory at the given path.
111
+
112
+ Args:
113
+ path: Directory path to create.
114
+ recursive: Whether to create parents.
115
+
116
+ Returns:
117
+ The created DatabricksPath instance.
118
+ """
62
119
  return self._parse_path(path).mkdir(parents=recursive)
63
120
 
64
121
  def delete_dir(self, path):
122
+ """Delete a directory recursively.
123
+
124
+ Args:
125
+ path: Directory path to delete.
126
+ """
65
127
  return self._parse_path(path).rmdir(recursive=True)
66
128
 
67
129
  def delete_dir_contents(self, path, *args, accept_root_dir: bool = False, **kwargs):
130
+ """Delete the contents of a directory.
131
+
132
+ Args:
133
+ path: Directory path whose contents should be removed.
134
+ accept_root_dir: Whether to allow deleting root contents.
135
+ """
68
136
  return self._parse_path(path).rmdir(recursive=True)
69
137
 
70
138
  def delete_root_dir_contents(self):
139
+ """Delete the contents of the root directory."""
71
140
  return self.delete_dir_contents("/", accept_root_dir=True)
72
141
 
73
142
  def delete_file(self, path):
143
+ """Delete a single file.
144
+
145
+ Args:
146
+ path: File path to delete.
147
+ """
74
148
  return self._parse_path(path).rmfile()
75
149
 
76
150
  def equals(self, other: FileSystem):
151
+ """Return True if the filesystem handler matches another.
152
+
153
+ Args:
154
+ other: Another FileSystem instance.
155
+
156
+ Returns:
157
+ True if equal, otherwise False.
158
+ """
77
159
  return self == other
78
160
 
79
161
  def from_uri(self, uri):
162
+ """Return a handler for the workspace in the provided URI.
163
+
164
+ Args:
165
+ uri: URI or path to parse.
166
+
167
+ Returns:
168
+ A DatabricksFileSystemHandler for the URI.
169
+ """
80
170
  uri = self._parse_path(uri)
81
171
 
82
172
  return self.__class__(
@@ -87,6 +177,14 @@ class DatabricksFileSystemHandler(FileSystemHandler):
87
177
  self,
88
178
  paths_or_selector: Union[FileSelector, str, "DatabricksPath", List[Union[str, "DatabricksPath"]]]
89
179
  ) -> Union[FileInfo, List[FileInfo]]:
180
+ """Return FileInfo objects for paths or selectors.
181
+
182
+ Args:
183
+ paths_or_selector: Path(s) or a FileSelector.
184
+
185
+ Returns:
186
+ A FileInfo or list of FileInfo objects.
187
+ """
90
188
  from .path import DatabricksPath
91
189
 
92
190
  if isinstance(paths_or_selector, (str, DatabricksPath)):
@@ -106,6 +204,14 @@ class DatabricksFileSystemHandler(FileSystemHandler):
106
204
  self,
107
205
  selector: FileSelector
108
206
  ):
207
+ """Return FileInfo entries for a FileSelector.
208
+
209
+ Args:
210
+ selector: FileSelector describing the listing.
211
+
212
+ Returns:
213
+ A list of FileInfo entries.
214
+ """
109
215
  base_dir = self._parse_path(selector.base_dir)
110
216
 
111
217
  return [
@@ -117,9 +223,20 @@ class DatabricksFileSystemHandler(FileSystemHandler):
117
223
  ]
118
224
 
119
225
  def get_type_name(self):
226
+ """Return the filesystem type name.
227
+
228
+ Returns:
229
+ The filesystem type name string.
230
+ """
120
231
  return "dbfs"
121
232
 
122
233
  def move(self, src, dest):
234
+ """Move a file by copying then deleting.
235
+
236
+ Args:
237
+ src: Source path.
238
+ dest: Destination path.
239
+ """
123
240
  src = self._parse_path(src)
124
241
 
125
242
  src.copy_to(dest)
@@ -127,6 +244,14 @@ class DatabricksFileSystemHandler(FileSystemHandler):
127
244
  src.remove(recursive=True)
128
245
 
129
246
  def normalize_path(self, path):
247
+ """Normalize a path to a full Databricks path string.
248
+
249
+ Args:
250
+ path: Path to normalize.
251
+
252
+ Returns:
253
+ The normalized full path string.
254
+ """
130
255
  return self._parse_path(path).full_path()
131
256
 
132
257
  def open(
@@ -135,12 +260,43 @@ class DatabricksFileSystemHandler(FileSystemHandler):
135
260
  mode: str = "r+",
136
261
  encoding: Optional[str] = None,
137
262
  ):
263
+ """Open a file path as a Databricks IO stream.
264
+
265
+ Args:
266
+ path: Path to open.
267
+ mode: File mode string.
268
+ encoding: Optional text encoding.
269
+
270
+ Returns:
271
+ A DatabricksIO instance.
272
+ """
138
273
  return self._parse_path(path).open(mode=mode, encoding=encoding, clone=False)
139
274
 
140
275
  def open_append_stream(self, path, compression='detect', buffer_size=None, metadata=None):
276
+ """Open an append stream.
277
+
278
+ Args:
279
+ path: Path to open.
280
+ compression: Optional compression hint.
281
+ buffer_size: Optional buffer size.
282
+ metadata: Optional metadata.
283
+
284
+ Returns:
285
+ A DatabricksIO instance.
286
+ """
141
287
  return self._parse_path(path).open(mode="ab")
142
288
 
143
289
  def open_input_file(self, path, mode: str = "rb", **kwargs):
290
+ """Open an input file as a PyArrow PythonFile.
291
+
292
+ Args:
293
+ path: Path to open.
294
+ mode: File mode string.
295
+ **kwargs: Additional options.
296
+
297
+ Returns:
298
+ A PyArrow PythonFile instance.
299
+ """
144
300
  buf = self._parse_path(path).open(mode=mode).connect(clone=True)
145
301
 
146
302
  return PythonFile(
@@ -149,13 +305,40 @@ class DatabricksFileSystemHandler(FileSystemHandler):
149
305
  )
150
306
 
151
307
  def open_input_stream(self, path, compression='detect', buffer_size=None):
308
+ """Open an input stream for reading bytes.
309
+
310
+ Args:
311
+ path: Path to open.
312
+ compression: Optional compression hint.
313
+ buffer_size: Optional buffer size.
314
+
315
+ Returns:
316
+ A DatabricksIO instance.
317
+ """
152
318
  return self._parse_path(path).open(mode="rb")
153
319
 
154
320
  def open_output_stream(self, path, compression='detect', buffer_size=None, metadata=None):
321
+ """Open an output stream for writing bytes.
322
+
323
+ Args:
324
+ path: Path to open.
325
+ compression: Optional compression hint.
326
+ buffer_size: Optional buffer size.
327
+ metadata: Optional metadata.
328
+
329
+ Returns:
330
+ A DatabricksIO instance.
331
+ """
155
332
  return self._parse_path(path).open(mode="wb")
156
333
 
157
334
 
158
335
  class DatabricksFileSystem(PyFileSystem):
336
+ """PyArrow filesystem wrapper for Databricks paths."""
159
337
 
160
338
  def __init__(self, handler): # real signature unknown; restored from __doc__
339
+ """Initialize the filesystem with a handler.
340
+
341
+ Args:
342
+ handler: FileSystemHandler instance.
343
+ """
161
344
  super().__init__(handler)