ygg 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.24
3
+ Version: 0.1.26
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -1,4 +1,4 @@
1
- ygg-0.1.24.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ ygg-0.1.26.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
2
  yggdrasil/__init__.py,sha256=6OPibApplA5TF4TeixkQO_qewpaAidYX-fSDvvKYcTI,91
3
3
  yggdrasil/databricks/__init__.py,sha256=aGVve5mpoQtxSK2nfzrexjRPoutCIyaOnKZijkG4_QE,92
4
4
  yggdrasil/databricks/compute/__init__.py,sha256=TVDwPmW2SOmHmnhzZhsvrWbrxZ_lEcgqe3l9BeB-oxM,218
@@ -8,12 +8,12 @@ yggdrasil/databricks/compute/remote.py,sha256=sVWBb_1YR-e33on6F5QYMLKwT6end6rolC
8
8
  yggdrasil/databricks/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  yggdrasil/databricks/jobs/config.py,sha256=8Slfw4Wl7vu0kIlaUUqVqjjOgPwuULoo0rroENCbC20,11494
10
10
  yggdrasil/databricks/sql/__init__.py,sha256=JZpQ9eCphDf1l4yzIZ7a7OLigxqXkqOgb0Mio7Rj09A,181
11
- yggdrasil/databricks/sql/engine.py,sha256=WRDrY-FpXMegF3qNmMkCsfIitJB9rY_lW8Pnk7oo-xE,30837
11
+ yggdrasil/databricks/sql/engine.py,sha256=bmkMoBij-8M-xohQK-sYb22OgBc1VIZRRVjw62CMWHk,30810
12
12
  yggdrasil/databricks/sql/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  yggdrasil/databricks/sql/statement_result.py,sha256=L-hrK5MVnH3XG57BpGmaETtRzYjAtYGQthgAPGVj618,12610
14
14
  yggdrasil/databricks/sql/types.py,sha256=YgasSyq8sygk1h6ZOTcRwXAZWNKSuk-9g9VqlR8kJl4,5324
15
15
  yggdrasil/databricks/workspaces/__init__.py,sha256=tNNS3A_Pl9FYkQ8nGERhr4VF-hwKrvh8k1W8vTaR0uo,58
16
- yggdrasil/databricks/workspaces/databricks_path.py,sha256=ieaAEPo2lBmgqrdFwtXdy9uIDFyhFT8hjbO_KP5ayec,29547
16
+ yggdrasil/databricks/workspaces/databricks_path.py,sha256=Ryoza649WRIHzbpIGjhvefwNFdSpe0-Y2ZJX6nImerw,31101
17
17
  yggdrasil/databricks/workspaces/workspace.py,sha256=8T-d0DTq-s2zTkmMbaEeS_7AiZwcUDxorch9IbZACko,33333
18
18
  yggdrasil/dataclasses/__init__.py,sha256=QVAvZnNl7gFYTLOVTfMkdQZf6o_WL8_UuDV1uTZ7Aeg,67
19
19
  yggdrasil/dataclasses/dataclass.py,sha256=ln-D1-bbiCLBd2khRMGs4dFoxzJEEGzHTKekWCnF2uk,5436
@@ -49,8 +49,8 @@ yggdrasil/types/cast/registry.py,sha256=-88mq-U1pDSGbEC9PRY0zJCzloyBodXgeSRBPb6h
49
49
  yggdrasil/types/cast/spark_cast.py,sha256=IHthM78dugabGXxNNW9sSHn-olDwzXcFdIFcPo9IiXU,23021
50
50
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=8PgJItF_XbyBcNuBnXkMQU3PBy3sAPEXZT9SXL2WbU4,4200
51
51
  yggdrasil/types/cast/spark_polars_cast.py,sha256=ba1UOvY1ouGCro1Np9slXmJ4TEyWnUtwVEAwxGvPLlk,8336
52
- ygg-0.1.24.dist-info/METADATA,sha256=w61_azInqpDmrls1mQUL-flgRhkNVmgLf1cbWJP0YUM,19204
53
- ygg-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
- ygg-0.1.24.dist-info/entry_points.txt,sha256=D0JDw2s-ZfU1GjXfniv6PvXwcyn5v9WBk4ya623Cti8,71
55
- ygg-0.1.24.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
56
- ygg-0.1.24.dist-info/RECORD,,
52
+ ygg-0.1.26.dist-info/METADATA,sha256=3htWnURwJn36QKT6AYSn9syodfAo3ZdLa6qEN4cuUO8,19204
53
+ ygg-0.1.26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
+ ygg-0.1.26.dist-info/entry_points.txt,sha256=D0JDw2s-ZfU1GjXfniv6PvXwcyn5v9WBk4ya623Cti8,71
55
+ ygg-0.1.26.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
56
+ ygg-0.1.26.dist-info/RECORD,,
@@ -439,8 +439,7 @@ FROM parquet.`{databricks_tmp_folder}`"""
439
439
  try:
440
440
  databricks_tmp_folder.rmdir(recursive=True)
441
441
  except Exception as e:
442
- raise e
443
- logger.error(e)
442
+ logger.warning(e)
444
443
 
445
444
  # Optionally run OPTIMIZE / ZORDER / VACUUM if requested (Databricks SQL)
446
445
  if zorder_by:
@@ -7,7 +7,7 @@ import urllib.parse as urlparse
7
7
  from contextlib import contextmanager
8
8
  from enum import Enum
9
9
  from pathlib import PurePosixPath, Path as SysPath
10
- from typing import BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
10
+ from typing import Any, BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
11
11
 
12
12
  from databricks.sdk.service.catalog import VolumeType
13
13
 
@@ -15,7 +15,14 @@ from ...libs.databrickslib import databricks
15
15
 
16
16
  if databricks is not None:
17
17
  from databricks.sdk.service.workspace import ImportFormat, ObjectType
18
- from databricks.sdk.errors.platform import NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, AlreadyExists, ResourceAlreadyExists
18
+ from databricks.sdk.errors.platform import (
19
+ NotFound,
20
+ ResourceDoesNotExist,
21
+ BadRequest,
22
+ PermissionDenied,
23
+ AlreadyExists,
24
+ ResourceAlreadyExists,
25
+ )
19
26
 
20
27
  NOT_FOUND_ERRORS = NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied
21
28
  ALREADY_EXISTS_ERRORS = AlreadyExists, ResourceAlreadyExists, BadRequest
@@ -26,7 +33,7 @@ if TYPE_CHECKING:
26
33
 
27
34
  __all__ = [
28
35
  "DatabricksPathKind",
29
- "DatabricksPath"
36
+ "DatabricksPath",
30
37
  ]
31
38
 
32
39
 
@@ -43,7 +50,11 @@ class DatabricksPathKind(str, Enum):
43
50
  DBFS = "dbfs"
44
51
 
45
52
  @classmethod
46
- def parse(cls, path: str, workspace: Optional["Workspace"] = None) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
53
+ def parse(
54
+ cls,
55
+ path: str,
56
+ workspace: Optional["Workspace"] = None,
57
+ ) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
47
58
  from .workspace import Workspace
48
59
 
49
60
  if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
@@ -52,13 +63,19 @@ class DatabricksPathKind(str, Enum):
52
63
  path = path.replace("/Users/me", "/Users/%s" % workspace.current_user.user_name)
53
64
 
54
65
  return cls.WORKSPACE, workspace, path
66
+
55
67
  if path.startswith("/Volumes"):
56
68
  return cls.VOLUME, workspace, path
57
69
 
58
70
  if path.startswith("dbfs://"):
59
71
  parsed = urlparse.urlparse(path)
60
- kind, _, inner_path = cls.parse(parsed.path)
61
- workspace = Workspace(host=parsed.hostname) if workspace is None else workspace
72
+
73
+ # inner path is the URL path (e.g. /tmp/x or /Volumes/...)
74
+ kind, _, inner_path = cls.parse(parsed.path, workspace=workspace)
75
+
76
+ # hostname can be None for malformed/dbfs:// variants; fall back to default Workspace()
77
+ if workspace is None:
78
+ workspace = Workspace(host=parsed.hostname) if parsed.hostname else Workspace()
62
79
 
63
80
  return kind, workspace, inner_path
64
81
 
@@ -66,7 +83,7 @@ class DatabricksPathKind(str, Enum):
66
83
 
67
84
 
68
85
  class DatabricksPath(SysPath, PurePosixPath):
69
- _kind: DatabricksPathKind
86
+ _kind: "DatabricksPathKind"
70
87
  _workspace: Optional["Workspace"]
71
88
 
72
89
  _is_file: Optional[bool]
@@ -75,48 +92,121 @@ class DatabricksPath(SysPath, PurePosixPath):
75
92
  _raw_status: Optional[dict]
76
93
  _raw_status_refresh_time: float
77
94
 
95
+ @staticmethod
96
+ def _join_segments(pathsegments: tuple[Any, ...]) -> str:
97
+ if not pathsegments:
98
+ return ""
99
+
100
+ first = _seg_to_str(pathsegments[0])
101
+
102
+ # Keep dbfs:// URL-ish paths URL-ish (don't let PurePosixPath normalize it)
103
+ if first.startswith("dbfs://"):
104
+ rest = (_seg_to_str(s).lstrip("/") for s in pathsegments[1:])
105
+ first = first.rstrip("/")
106
+ tail = "/".join(rest)
107
+ return f"{first}/{tail}" if tail else first
108
+
109
+ return str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
110
+
111
+ def _init(self, template: Optional["DatabricksPath"] = None) -> None:
112
+ """
113
+ pathlib creates derived paths (parent, /, joinpath, with_name, etc.) via _from_parts
114
+ which bypasses __new__. _init(template=...) is the hook to carry our metadata forward.
115
+ """
116
+ if isinstance(template, DatabricksPath):
117
+ # Recompute kind for the NEW path string (don’t blindly copy _kind)
118
+ temp_ws = getattr(template, "_workspace", None)
119
+ kind, ws, _ = DatabricksPathKind.parse(str(self), workspace=temp_ws)
120
+
121
+ self._kind = kind
122
+ self._workspace = ws if ws is not None else temp_ws
123
+
124
+ # Never inherit caches from template
125
+ self._is_file = None
126
+ self._is_dir = None
127
+ self._raw_status = None
128
+ self._raw_status_refresh_time = 0.0
129
+ else:
130
+ kind, ws, _ = DatabricksPathKind.parse(str(self))
131
+ self._kind = kind
132
+ self._workspace = ws
133
+
134
+ self._is_file = None
135
+ self._is_dir = None
136
+ self._raw_status = None
137
+ self._raw_status_refresh_time = 0.0
138
+
78
139
  def __new__(
79
140
  cls,
80
- *pathsegments,
141
+ *pathsegments: Any,
81
142
  workspace: Optional["Workspace"] = None,
82
143
  is_file: Optional[bool] = None,
83
144
  is_dir: Optional[bool] = None,
84
145
  raw_status: Optional[dict] = None,
85
- raw_status_refresh_time: float = 0
86
- ):
87
- if not pathsegments:
88
- joined = ""
89
- else:
90
- first = _seg_to_str(pathsegments[0])
91
-
92
- # Special case: if someone passes a dbfs://... URL segment, keep it URL-like
93
- if first.startswith("dbfs://"):
94
- rest = [_seg_to_str(s).lstrip("/") for s in pathsegments[1:]]
95
- joined = first.rstrip("/")
96
- if rest:
97
- joined += "/" + "/".join(rest)
98
- else:
99
- joined = str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
146
+ raw_status_refresh_time: float = 0.0,
147
+ ) -> "DatabricksPath":
148
+ joined = cls._join_segments(pathsegments)
149
+ kind, parsed_ws, pure_path = DatabricksPathKind.parse(joined, workspace=workspace)
150
+
151
+ self = cls._from_parts([pure_path]) # pathlib-style construction (calls _init)
152
+
153
+ # Override with constructor-provided metadata
154
+ self._kind = kind
155
+ self._workspace = parsed_ws if workspace is None else workspace
156
+ self._is_file = is_file
157
+ self._is_dir = is_dir
158
+ self._raw_status = raw_status
159
+ self._raw_status_refresh_time = float(raw_status_refresh_time)
160
+
161
+ return self
100
162
 
101
- kind, w, p = DatabricksPathKind.parse(joined)
163
+ def __init__(
164
+ self,
165
+ *pathsegments: Any,
166
+ workspace: Optional["Workspace"] = None,
167
+ is_file: Optional[bool] = None,
168
+ is_dir: Optional[bool] = None,
169
+ raw_status: Optional[dict] = None,
170
+ raw_status_refresh_time: float = 0.0,
171
+ ) -> None:
172
+ # pathlib paths are effectively immutable; all init happens in __new__ / _init
173
+ pass
102
174
 
103
- obj = super().__new__(cls, p)
175
+ def __truediv__(self, other):
176
+ if not other:
177
+ return self
104
178
 
105
- obj._kind = kind
106
- obj._workspace = w if workspace is None else workspace
107
- obj._is_file = is_file
108
- obj._is_dir = is_dir
109
- obj._raw_status = raw_status
110
- obj._raw_status_refresh_time = raw_status_refresh_time
179
+ built = super().__truediv__(other)
111
180
 
112
- return obj
181
+ built._kind = self._kind
182
+ built._workspace = self._workspace
183
+
184
+ built._is_file = None
185
+ built._is_dir = None
186
+ built._raw_status = None
187
+ built._raw_status_refresh_time = 0.0
188
+
189
+ return built
113
190
 
114
191
  def __enter__(self):
115
192
  self.workspace.__enter__()
116
193
  return self
117
194
 
118
195
  def __exit__(self, exc_type, exc_val, exc_tb):
119
- self.workspace.__exit__(exc_type, exc_val, exc_tb)
196
+ return self.workspace.__exit__(exc_type, exc_val, exc_tb)
197
+
198
+ def _from_parsed_parts(self, drv, root, parts):
199
+ built = super()._from_parsed_parts(drv, root, parts)
200
+
201
+ built._kind = self._kind
202
+ built._workspace = self._workspace
203
+
204
+ built._is_file = None
205
+ built._is_dir = None
206
+ built._raw_status = None
207
+ built._raw_status_refresh_time = 0.0
208
+
209
+ return built
120
210
 
121
211
  @property
122
212
  def workspace(self):
@@ -126,22 +216,30 @@ class DatabricksPath(SysPath, PurePosixPath):
126
216
  self._workspace = Workspace()
127
217
  return self._workspace
128
218
 
219
+ @workspace.setter
220
+ def workspace(self, value):
221
+ self._workspace = value
222
+
129
223
  @property
130
224
  def kind(self):
131
225
  return self._kind
132
226
 
133
- def is_file(self, *, follow_symlinks = True):
227
+ @kind.setter
228
+ def kind(self, value: DatabricksPathKind):
229
+ self._kind = value
230
+
231
+ def is_file(self, *, follow_symlinks=True):
134
232
  if self._is_file is None:
135
233
  self.refresh_status()
136
234
  return self._is_file
137
235
 
138
- def is_dir(self, *, follow_symlinks = True):
236
+ def is_dir(self, *, follow_symlinks=True):
139
237
  if self._is_dir is None:
140
238
  self.refresh_status()
141
239
  return self._is_dir
142
240
 
143
241
  def volume_parts(self) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[PurePosixPath]]:
144
- if self._kind != DatabricksPathKind.VOLUME:
242
+ if self.kind != DatabricksPathKind.VOLUME:
145
243
  return None, None, None, None
146
244
 
147
245
  s = str(self)
@@ -169,12 +267,12 @@ class DatabricksPath(SysPath, PurePosixPath):
169
267
  sdk = connected.workspace.sdk()
170
268
 
171
269
  try:
172
- if connected._kind == DatabricksPathKind.VOLUME:
270
+ if connected.kind == DatabricksPathKind.VOLUME:
173
271
  info = sdk.files.get_metadata(connected.as_files_api_path())
174
272
 
175
273
  connected._raw_status = info
176
274
  connected._is_file, connected._is_dir = True, False
177
- elif connected._kind == DatabricksPathKind.WORKSPACE:
275
+ elif connected.kind == DatabricksPathKind.WORKSPACE:
178
276
  info = sdk.workspace.get_status(connected.as_workspace_api_path())
179
277
 
180
278
  is_dir = info.object_type in (ObjectType.DIRECTORY, ObjectType.REPO)
@@ -184,7 +282,7 @@ class DatabricksPath(SysPath, PurePosixPath):
184
282
  info = sdk.dbfs.get_status(connected.as_dbfs_api_path())
185
283
 
186
284
  connected._raw_status = info
187
- connected._is_file, connected._is_dir = not info.is_dir, info.is_dir
285
+ connected._is_file, connected._is_dir = (not info.is_dir), info.is_dir
188
286
 
189
287
  connected._raw_status_refresh_time = time.time()
190
288
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
@@ -204,7 +302,6 @@ class DatabricksPath(SysPath, PurePosixPath):
204
302
  self._is_file = None
205
303
  self._is_dir = None
206
304
 
207
-
208
305
  # ---- API path normalization helpers ----
209
306
 
210
307
  def as_workspace_api_path(self) -> str:
@@ -243,7 +340,7 @@ class DatabricksPath(SysPath, PurePosixPath):
243
340
  return True
244
341
  return False
245
342
 
246
- def mkdir(self, mode = 0o777, parents = True, exist_ok = True):
343
+ def mkdir(self, mode=0o777, parents=True, exist_ok=True):
247
344
  """
248
345
  Create a new directory at this given path.
249
346
  """
@@ -251,9 +348,9 @@ class DatabricksPath(SysPath, PurePosixPath):
251
348
  connected.clear_cache()
252
349
 
253
350
  try:
254
- if connected._kind == DatabricksPathKind.WORKSPACE:
351
+ if connected.kind == DatabricksPathKind.WORKSPACE:
255
352
  connected.workspace.sdk().workspace.mkdirs(self.as_workspace_api_path())
256
- elif connected._kind == DatabricksPathKind.VOLUME:
353
+ elif connected.kind == DatabricksPathKind.VOLUME:
257
354
  return connected._create_volume_dir(mode=mode, parents=parents, exist_ok=exist_ok)
258
355
  elif connected._kind == DatabricksPathKind.DBFS:
259
356
  connected.workspace.sdk().dbfs.mkdirs(self.as_dbfs_api_path())
@@ -266,8 +363,6 @@ class DatabricksPath(SysPath, PurePosixPath):
266
363
  connected.parent.mkdir(parents=True, exist_ok=True)
267
364
  connected.mkdir(mode, parents=False, exist_ok=exist_ok)
268
365
  except (AlreadyExists, ResourceAlreadyExists):
269
- # Cannot rely on checking for EEXIST, since the operating system
270
- # could give priority to other errors like EACCES or EROFS
271
366
  if not exist_ok:
272
367
  raise
273
368
 
@@ -279,8 +374,6 @@ class DatabricksPath(SysPath, PurePosixPath):
279
374
  try:
280
375
  sdk.catalogs.create(name=catalog_name)
281
376
  except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
282
- # Cannot rely on checking for EEXIST, since the operating system
283
- # could give priority to other errors like EACCES or EROFS
284
377
  if not exist_ok:
285
378
  raise
286
379
 
@@ -288,8 +381,6 @@ class DatabricksPath(SysPath, PurePosixPath):
288
381
  try:
289
382
  sdk.schemas.create(catalog_name=catalog_name, name=schema_name)
290
383
  except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
291
- # Cannot rely on checking for EEXIST, since the operating system
292
- # could give priority to other errors like EACCES or EROFS
293
384
  if not exist_ok:
294
385
  raise
295
386
 
@@ -299,15 +390,13 @@ class DatabricksPath(SysPath, PurePosixPath):
299
390
  catalog_name=catalog_name,
300
391
  schema_name=schema_name,
301
392
  name=volume_name,
302
- volume_type=VolumeType.MANAGED
393
+ volume_type=VolumeType.MANAGED,
303
394
  )
304
395
  except (AlreadyExists, ResourceAlreadyExists, BadRequest):
305
- # Cannot rely on checking for EEXIST, since the operating system
306
- # could give priority to other errors like EACCES or EROFS
307
396
  if not exist_ok:
308
397
  raise
309
398
 
310
- def _create_volume_dir(self, mode = 0o777, parents = True, exist_ok = True):
399
+ def _create_volume_dir(self, mode=0o777, parents=True, exist_ok=True):
311
400
  path = self.as_files_api_path()
312
401
  sdk = self.workspace.sdk()
313
402
 
@@ -324,8 +413,6 @@ class DatabricksPath(SysPath, PurePosixPath):
324
413
 
325
414
  sdk.files.create_directory(path)
326
415
  except (AlreadyExists, ResourceAlreadyExists, BadRequest):
327
- # Cannot rely on checking for EEXIST, since the operating system
328
- # could give priority to other errors like EACCES or EROFS
329
416
  if not exist_ok:
330
417
  raise
331
418
 
@@ -340,11 +427,11 @@ class DatabricksPath(SysPath, PurePosixPath):
340
427
 
341
428
  def rmfile(self):
342
429
  try:
343
- if self._kind == DatabricksPathKind.VOLUME:
430
+ if self.kind == DatabricksPathKind.VOLUME:
344
431
  return self._remove_volume_file()
345
- elif self._kind == DatabricksPathKind.WORKSPACE:
432
+ elif self.kind == DatabricksPathKind.WORKSPACE:
346
433
  return self._remove_workspace_file()
347
- elif self._kind == DatabricksPathKind.DBFS:
434
+ elif self.kind == DatabricksPathKind.DBFS:
348
435
  return self._remove_dbfs_file()
349
436
  finally:
350
437
  self.clear_cache()
@@ -376,17 +463,17 @@ class DatabricksPath(SysPath, PurePosixPath):
376
463
  def rmdir(self, recursive: bool = True):
377
464
  with self as connected:
378
465
  try:
379
- if connected._kind == DatabricksPathKind.WORKSPACE:
466
+ if connected.kind == DatabricksPathKind.WORKSPACE:
380
467
  connected.workspace.sdk().workspace.delete(
381
468
  self.as_workspace_api_path(),
382
- recursive=recursive
469
+ recursive=recursive,
383
470
  )
384
- elif connected._kind == DatabricksPathKind.VOLUME:
471
+ elif connected.kind == DatabricksPathKind.VOLUME:
385
472
  return self._remove_volume_dir(recursive=recursive)
386
473
  else:
387
474
  connected.workspace.sdk().dbfs.delete(
388
475
  self.as_dbfs_api_path(),
389
- recursive=recursive
476
+ recursive=recursive,
390
477
  )
391
478
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
392
479
  pass
@@ -425,13 +512,13 @@ class DatabricksPath(SysPath, PurePosixPath):
425
512
  self.clear_cache()
426
513
 
427
514
  def ls(self, recursive: bool = False, fetch_size: int = None, raise_error: bool = True):
428
- if self._kind == DatabricksPathKind.VOLUME:
515
+ if self.kind == DatabricksPathKind.VOLUME:
429
516
  for _ in self._ls_volume(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
430
517
  yield _
431
- elif self._kind == DatabricksPathKind.WORKSPACE:
518
+ elif self.kind == DatabricksPathKind.WORKSPACE:
432
519
  for _ in self._ls_workspace(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
433
520
  yield _
434
- elif self._kind == DatabricksPathKind.DBFS:
521
+ elif self.kind == DatabricksPathKind.DBFS:
435
522
  for _ in self._ls_dbfs(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
436
523
  yield _
437
524
 
@@ -444,13 +531,13 @@ class DatabricksPath(SysPath, PurePosixPath):
444
531
  try:
445
532
  for info in sdk.volumes.list(
446
533
  catalog_name=catalog_name,
447
- schema_name=schema_name
534
+ schema_name=schema_name,
448
535
  ):
449
536
  base = DatabricksPath(
450
537
  f"/Volumes/{info.catalog_name}/{info.schema_name}/{info.name}",
451
538
  workspace=self.workspace,
452
539
  is_file=False,
453
- is_dir=True
540
+ is_dir=True,
454
541
  )
455
542
 
456
543
  if recursive:
@@ -463,12 +550,12 @@ class DatabricksPath(SysPath, PurePosixPath):
463
550
  raise
464
551
  elif schema_name is None:
465
552
  try:
466
- for info in sdk.schemas.list(catalog_name=catalog_name,):
553
+ for info in sdk.schemas.list(catalog_name=catalog_name):
467
554
  base = DatabricksPath(
468
555
  f"/Volumes/{info.catalog_name}/{info.name}",
469
556
  workspace=self.workspace,
470
557
  is_file=False,
471
- is_dir=True
558
+ is_dir=True,
472
559
  )
473
560
 
474
561
  if recursive:
@@ -486,7 +573,7 @@ class DatabricksPath(SysPath, PurePosixPath):
486
573
  f"/Volumes/{info.name}",
487
574
  workspace=self.workspace,
488
575
  is_file=False,
489
- is_dir=True
576
+ is_dir=True,
490
577
  )
491
578
 
492
579
  if recursive:
@@ -504,7 +591,7 @@ class DatabricksPath(SysPath, PurePosixPath):
504
591
  info.path,
505
592
  workspace=self.workspace,
506
593
  is_file=not info.is_directory,
507
- is_dir=info.is_directory
594
+ is_dir=info.is_directory,
508
595
  )
509
596
 
510
597
  if recursive and info.is_directory:
@@ -526,9 +613,8 @@ class DatabricksPath(SysPath, PurePosixPath):
526
613
  info.path,
527
614
  workspace=self.workspace,
528
615
  is_file=not is_dir,
529
- is_dir=is_dir
616
+ is_dir=is_dir,
530
617
  )
531
-
532
618
  yield base
533
619
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
534
620
  if raise_error:
@@ -538,14 +624,15 @@ class DatabricksPath(SysPath, PurePosixPath):
538
624
  sdk = self.workspace.sdk()
539
625
 
540
626
  try:
541
- for info in sdk.dbfs.list(self.as_workspace_api_path(), recursive=recursive):
627
+ # FIX: DBFS listing should use DBFS-normalized path, not workspace path
628
+ p = "/dbfs/" + self.as_dbfs_api_path() + "/"
629
+ for info in sdk.dbfs.list(p, recursive=recursive):
542
630
  base = DatabricksPath(
543
631
  info.path,
544
632
  workspace=self.workspace,
545
633
  is_file=not info.is_dir,
546
- is_dir=info.is_dir
634
+ is_dir=info.is_dir,
547
635
  )
548
-
549
636
  yield base
550
637
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
551
638
  if raise_error:
@@ -554,7 +641,7 @@ class DatabricksPath(SysPath, PurePosixPath):
554
641
  @contextmanager
555
642
  def open(
556
643
  self,
557
- mode='r',
644
+ mode="r",
558
645
  buffering=-1,
559
646
  encoding=None,
560
647
  errors=None,
@@ -569,11 +656,6 @@ class DatabricksPath(SysPath, PurePosixPath):
569
656
  Supported:
570
657
  - read: "rb", "r"
571
658
  - write: "wb", "w" (buffered; uploads on close for WORKSPACE/VOLUME)
572
-
573
- Notes:
574
- - VOLUME: uses w.files.download/upload (Files API). :contentReference[oaicite:5]{index=5}
575
- - DBFS: uses w.dbfs.open when possible. :contentReference[oaicite:6]{index=6}
576
- - WORKSPACE: uses w.workspace.download/upload. :contentReference[oaicite:7]{index=7}
577
659
  """
578
660
  if mode not in {"rb", "r", "wb", "w"}:
579
661
  raise ValueError(f"Unsupported mode {mode!r}. Use r/rb/w/wb.")
@@ -592,10 +674,10 @@ class DatabricksPath(SysPath, PurePosixPath):
592
674
  @contextmanager
593
675
  def open_read(self, encoding: str | None = None):
594
676
  with self as connected:
595
- if connected._kind == DatabricksPathKind.VOLUME:
677
+ if connected.kind == DatabricksPathKind.VOLUME:
596
678
  with connected._open_read_volume(encoding=encoding) as f:
597
679
  yield f
598
- elif connected._kind == DatabricksPathKind.WORKSPACE:
680
+ elif connected.kind == DatabricksPathKind.WORKSPACE:
599
681
  with connected._open_read_workspace(encoding=encoding) as f:
600
682
  yield f
601
683
  else:
@@ -607,7 +689,6 @@ class DatabricksPath(SysPath, PurePosixPath):
607
689
  workspace_client = self.workspace.sdk()
608
690
  path = self.as_files_api_path()
609
691
 
610
- # Files.download returns a stream-like response body. :contentReference[oaicite:8]{index=8}
611
692
  resp = workspace_client.files.download(path)
612
693
  raw = io.BytesIO(resp.contents.read())
613
694
 
@@ -623,12 +704,10 @@ class DatabricksPath(SysPath, PurePosixPath):
623
704
  workspace_client = self.workspace.sdk()
624
705
  path = self.as_workspace_api_path()
625
706
 
626
- # Files.download returns a stream-like response body. :contentReference[oaicite:8]{index=8}
627
- raw = workspace_client.workspace.download(path) # returns BinaryIO :contentReference[oaicite:10]{index=10}
707
+ raw = workspace_client.workspace.download(path) # returns BinaryIO
628
708
 
629
709
  if encoding is not None:
630
710
  raw = io.BytesIO(raw.read())
631
-
632
711
  with io.TextIOWrapper(raw, encoding=encoding) as f:
633
712
  yield f
634
713
  else:
@@ -640,7 +719,6 @@ class DatabricksPath(SysPath, PurePosixPath):
640
719
  workspace_client = self.workspace.sdk()
641
720
  path = self.as_dbfs_api_path()
642
721
 
643
- # dbfs.open gives BinaryIO for streaming reads :contentReference[oaicite:12]{index=12}
644
722
  raw = workspace_client.dbfs.open(path, read=True)
645
723
 
646
724
  if encoding is not None:
@@ -653,10 +731,10 @@ class DatabricksPath(SysPath, PurePosixPath):
653
731
  @contextmanager
654
732
  def open_write(self, encoding: str | None = None):
655
733
  with self as connected:
656
- if connected._kind == DatabricksPathKind.VOLUME:
734
+ if connected.kind == DatabricksPathKind.VOLUME:
657
735
  with connected._open_write_volume(encoding=encoding) as f:
658
736
  yield f
659
- elif connected._kind == DatabricksPathKind.WORKSPACE:
737
+ elif connected.kind == DatabricksPathKind.WORKSPACE:
660
738
  with connected._open_write_workspace(encoding=encoding) as f:
661
739
  yield f
662
740
  else:
@@ -668,7 +746,6 @@ class DatabricksPath(SysPath, PurePosixPath):
668
746
  workspace_client = self.workspace.sdk()
669
747
  path = self.as_files_api_path()
670
748
 
671
- # Buffer locally then upload stream on exit. :contentReference[oaicite:9]{index=9}
672
749
  buf = io.BytesIO()
673
750
 
674
751
  if encoding is not None:
@@ -703,7 +780,6 @@ class DatabricksPath(SysPath, PurePosixPath):
703
780
  workspace_client = self.workspace.sdk()
704
781
  path = self.as_workspace_api_path()
705
782
 
706
- # Buffer then upload (AUTO works for workspace files) :contentReference[oaicite:11]{index=11}
707
783
  buf = io.BytesIO()
708
784
 
709
785
  if encoding is not None:
@@ -727,7 +803,7 @@ class DatabricksPath(SysPath, PurePosixPath):
727
803
  path, buf, format=ImportFormat.AUTO, overwrite=overwrite
728
804
  )
729
805
  else:
730
- raise e
806
+ raise
731
807
 
732
808
  tw.detach()
733
809
  else:
@@ -749,14 +825,14 @@ class DatabricksPath(SysPath, PurePosixPath):
749
825
  path, buf, format=ImportFormat.AUTO, overwrite=overwrite
750
826
  )
751
827
  else:
752
- raise e
828
+ raise
753
829
 
754
830
  @contextmanager
755
831
  def _open_write_dbfs(self, encoding: str | None = None, overwrite: bool = True):
756
832
  workspace_client = self.workspace.sdk()
757
833
  path = self.as_dbfs_api_path()
758
834
 
759
- raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite) # :contentReference[oaicite:13]{index=13}
835
+ raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)
760
836
 
761
837
  if encoding is not None:
762
838
  with io.TextIOWrapper(raw, encoding=encoding) as f:
@@ -766,4 +842,4 @@ class DatabricksPath(SysPath, PurePosixPath):
766
842
  yield f
767
843
 
768
844
  self.clear_cache()
769
- self._is_file, self._is_dir = True, False
845
+ self._is_file, self._is_dir = True, False
File without changes