ygg 0.1.28__py3-none-any.whl → 0.1.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  # src/yggdrasil/databricks/workspaces/databricks_path.py
2
2
  from __future__ import annotations
3
3
 
4
+ import dataclasses
4
5
  import io
5
6
  import time
6
- import urllib.parse as urlparse
7
7
  from contextlib import contextmanager
8
8
  from enum import Enum
9
- from pathlib import PurePosixPath, Path as SysPath
10
- from typing import Any, BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
9
+ from pathlib import PurePosixPath
10
+ from typing import BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING, List
11
11
 
12
12
  from databricks.sdk.service.catalog import VolumeType
13
13
 
@@ -37,11 +37,19 @@ __all__ = [
37
37
  ]
38
38
 
39
39
 
40
- def _seg_to_str(s) -> str:
41
- # Handles DatabricksPath, PurePosixPath, Windows Path, etc.
42
- if isinstance(s, SysPath):
43
- return s.as_posix()
44
- return str(s)
40
+ def _flatten_parts(parts: Union[list[str], str]) -> list[str]:
41
+ if isinstance(parts, str):
42
+ parts = [parts]
43
+
44
+ if any("/" in part for part in parts):
45
+ # flatten parts with slashes
46
+ new_parts = []
47
+ for part in parts:
48
+ split_parts = part.split("/")
49
+ new_parts.extend(split_parts)
50
+ parts = new_parts
51
+
52
+ return parts
45
53
 
46
54
 
47
55
  class DatabricksPathKind(str, Enum):
@@ -49,221 +57,143 @@ class DatabricksPathKind(str, Enum):
49
57
  VOLUME = "volume"
50
58
  DBFS = "dbfs"
51
59
 
52
- @classmethod
53
- def parse(
54
- cls,
55
- path: str,
56
- workspace: Optional["Workspace"] = None,
57
- ) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
58
- from .workspace import Workspace
59
-
60
- if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
61
- if path.startswith("/Users/me"):
62
- workspace = Workspace() if workspace is None else workspace
63
- path = path.replace("/Users/me", "/Users/%s" % workspace.current_user.user_name)
64
-
65
- return cls.WORKSPACE, workspace, path
66
-
67
- if path.startswith("/Volumes"):
68
- return cls.VOLUME, workspace, path
69
-
70
- if path.startswith("dbfs://"):
71
- parsed = urlparse.urlparse(path)
72
-
73
- # inner path is the URL path (e.g. /tmp/x or /Volumes/...)
74
- kind, _, inner_path = cls.parse(parsed.path, workspace=workspace)
75
-
76
- # hostname can be None for malformed/dbfs:// variants; fall back to default Workspace()
77
- if workspace is None:
78
- workspace = Workspace(host=parsed.hostname) if parsed.hostname else Workspace()
79
-
80
- return kind, workspace, inner_path
81
-
82
- return cls.DBFS, workspace, path
83
-
84
-
85
- class DatabricksPath(SysPath, PurePosixPath):
86
- _kind: "DatabricksPathKind"
87
- _workspace: Optional["Workspace"]
88
-
89
- _is_file: Optional[bool]
90
- _is_dir: Optional[bool]
91
-
92
- _raw_status: Optional[dict]
93
- _raw_status_refresh_time: float
94
-
95
- @staticmethod
96
- def _join_segments(pathsegments: tuple[Any, ...]) -> str:
97
- if not pathsegments:
98
- return ""
99
60
 
100
- first = _seg_to_str(pathsegments[0])
61
+ @dataclasses.dataclass
62
+ class DatabricksPath:
63
+ kind: "DatabricksPathKind"
64
+ parts: List[str]
65
+ workspace: Optional["Workspace"] = None
101
66
 
102
- # Keep dbfs:// URL-ish paths URL-ish (don't let PurePosixPath normalize it)
103
- if first.startswith("dbfs://"):
104
- rest = (_seg_to_str(s).lstrip("/") for s in pathsegments[1:])
105
- first = first.rstrip("/")
106
- tail = "/".join(rest)
107
- return f"{first}/{tail}" if tail else first
67
+ _is_file: Optional[bool] = None
68
+ _is_dir: Optional[bool] = None
108
69
 
109
- return str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
70
+ _raw_status: Optional[dict] = None
71
+ _raw_status_refresh_time: float = 0.0
110
72
 
111
- def __new__(
73
+ @classmethod
74
+ def parse(
112
75
  cls,
113
- *pathsegments: Any,
76
+ parts: Union[List[str], str],
114
77
  workspace: Optional["Workspace"] = None,
115
- is_file: Optional[bool] = None,
116
- is_dir: Optional[bool] = None,
117
- raw_status: Optional[dict] = None,
118
- raw_status_refresh_time: float = 0.0,
119
78
  ) -> "DatabricksPath":
120
- joined = cls._join_segments(pathsegments)
121
- kind, parsed_ws, pure_path = DatabricksPathKind.parse(joined, workspace=workspace)
122
-
123
- self = cls._from_parts([pure_path]) # pathlib-style construction (calls _init)
79
+ if not parts:
80
+ return DatabricksPath(
81
+ kind=DatabricksPathKind.DBFS,
82
+ parts=[],
83
+ workspace=workspace,
84
+ )
85
+
86
+ parts = _flatten_parts(parts)
87
+
88
+ if not parts[0]:
89
+ parts = parts[1:]
90
+
91
+ if not parts:
92
+ return DatabricksPath(
93
+ kind=DatabricksPathKind.DBFS,
94
+ parts=[],
95
+ workspace=workspace,
96
+ )
97
+
98
+ head, *tail = parts
99
+
100
+ if head == "dbfs":
101
+ kind = DatabricksPathKind.DBFS
102
+ elif head == "Workspace":
103
+ kind = DatabricksPathKind.WORKSPACE
104
+ elif head == "Volumes":
105
+ kind = DatabricksPathKind.VOLUME
106
+ else:
107
+ raise ValueError(f"Invalid DatabricksPath prefix: {parts!r}")
124
108
 
125
- # Override with constructor-provided metadata
126
- self._kind = kind
127
- self._workspace = parsed_ws if workspace is None else workspace
128
- self._is_file = is_file
129
- self._is_dir = is_dir
130
- self._raw_status = raw_status
131
- self._raw_status_refresh_time = float(raw_status_refresh_time)
109
+ return DatabricksPath(
110
+ kind=kind,
111
+ parts=tail,
112
+ workspace=workspace,
113
+ )
132
114
 
133
- return self
115
+ def __hash__(self):
116
+ return hash((self.kind, tuple(self.parts)))
134
117
 
135
- def __init__(
136
- self,
137
- *pathsegments: Any,
138
- workspace: Optional["Workspace"] = None,
139
- is_file: Optional[bool] = None,
140
- is_dir: Optional[bool] = None,
141
- raw_status: Optional[dict] = None,
142
- raw_status_refresh_time: float = 0.0,
143
- ) -> None:
144
- # pathlib paths are effectively immutable; all init happens in __new__ / _init
145
- pass
118
+ def __eq__(self, other):
119
+ if not isinstance(other, DatabricksPath):
120
+ if isinstance(other, str):
121
+ return str(self) == other
122
+ return False
123
+ return self.kind == other.kind and self.parts == other.parts
146
124
 
147
125
  def __truediv__(self, other):
148
126
  if not other:
149
127
  return self
150
128
 
151
- built = super().__truediv__(other)
152
-
153
- built._kind = self._kind
154
- built._workspace = self._workspace
129
+ other_parts = _flatten_parts(other)
155
130
 
156
- built._is_file = None
157
- built._is_dir = None
158
- built._raw_status = None
159
- built._raw_status_refresh_time = 0.0
131
+ built = DatabricksPath(
132
+ kind=self.kind,
133
+ parts=self.parts + other_parts,
134
+ workspace=self.workspace,
135
+ )
160
136
 
161
137
  return built
162
138
 
163
139
  def __enter__(self):
164
- self.workspace.__enter__()
140
+ self.safe_workspace.__enter__()
165
141
  return self
166
142
 
167
143
  def __exit__(self, exc_type, exc_val, exc_tb):
168
- return self.workspace.__exit__(exc_type, exc_val, exc_tb)
144
+ return self.safe_workspace.__exit__(exc_type, exc_val, exc_tb)
169
145
 
170
- def _clone_meta_from(self, template: "DatabricksPath") -> None:
171
- """
172
- Copy *connection/meta* state, but never copy caches.
173
- Centralizes the logic so every creation path stays consistent.
174
- """
175
- # Keep workspace threading; kind should match the NEW path string.
176
- kind, ws, _ = DatabricksPathKind.parse(str(self), workspace=getattr(template, "_workspace", None))
177
- self._kind = kind
178
- self._workspace = ws if ws is not None else getattr(template, "_workspace", None)
146
+ def __str__(self):
147
+ if self.kind == DatabricksPathKind.DBFS:
148
+ return self.as_dbfs_api_path()
149
+ elif self.kind == DatabricksPathKind.WORKSPACE:
150
+ return self.as_workspace_api_path()
151
+ elif self.kind == DatabricksPathKind.VOLUME:
152
+ return self.as_files_api_path()
153
+ else:
154
+ raise ValueError(f"Unknown DatabricksPath kind: {self.kind!r}")
179
155
 
180
- # Reset caches
181
- self._is_file = None
182
- self._is_dir = None
183
- self._raw_status = None
184
- self._raw_status_refresh_time = 0.0
156
+ def __repr__(self):
157
+ return "dbfs://%s" % self.__str__()
185
158
 
186
159
  @property
187
160
  def parent(self):
188
- built = super().parent
189
-
190
- built._clone_meta_from(self)
191
-
192
- return built
161
+ if not self.parts:
162
+ return self
193
163
 
194
- @classmethod
195
- def _from_parsed_parts(cls, drv, root, parts):
196
- """
197
- pathlib internal factory. It may pass a template in some Python versions,
198
- but if not, we still return a valid DatabricksPath with initialized state.
199
- """
200
- built = super()._from_parsed_parts(drv, root, parts) # type: ignore[misc]
201
-
202
- # Best effort: if pathlib gave us a template on the object, use it.
203
- # Otherwise ensure we at least have valid defaults.
204
- if isinstance(built, DatabricksPath) and isinstance(getattr(built, "_workspace", None), object):
205
- # If the object already has workspace/kind via _init, don't stomp it.
206
- # But if it's missing _kind (common failure), derive it.
207
- if not hasattr(built, "_kind"):
208
- kind, ws, _ = DatabricksPathKind.parse(str(built), workspace=getattr(built, "_workspace", None))
209
- built._kind = kind
210
- built._workspace = ws if ws is not None else getattr(built, "_workspace", None)
211
-
212
- # Always reset caches (derived path => cache invalid)
213
- built._is_file = None
214
- built._is_dir = None
215
- built._raw_status = None
216
- built._raw_status_refresh_time = 0.0
164
+ if self._is_file is not None or self._is_dir is not None:
165
+ _is_file, _is_dir = False, True
217
166
  else:
218
- # Safety defaults (should be rare)
219
- kind, ws, _ = DatabricksPathKind.parse(str(built))
220
- built._kind = kind
221
- built._workspace = ws
222
- built._is_file = None
223
- built._is_dir = None
224
- built._raw_status = None
225
- built._raw_status_refresh_time = 0.0
226
-
227
- return built
167
+ _is_file, _is_dir = None, None
228
168
 
229
- def _make_child(self, args):
230
- built = super()._make_child(args) # type: ignore[misc]
231
-
232
- # Ensure type + meta carryover
233
- if isinstance(built, DatabricksPath):
234
- built._clone_meta_from(self)
235
- else:
236
- # if for some reason super didn't return our type, try to coerce
237
- built = type(self)(built, workspace=getattr(self, "_workspace", None))
169
+ built = DatabricksPath(
170
+ kind=self.kind,
171
+ parts=self.parts[:-1],
172
+ workspace=self.workspace,
173
+ _is_file=_is_file,
174
+ _is_dir=_is_dir,
175
+ )
238
176
 
239
177
  return built
240
178
 
241
179
  @property
242
- def workspace(self):
243
- if self._workspace is None:
180
+ def safe_workspace(self):
181
+ if self.workspace is None:
244
182
  from .workspace import Workspace
245
183
 
246
- self._workspace = Workspace()
247
- return self._workspace
248
-
249
- @workspace.setter
250
- def workspace(self, value):
251
- self._workspace = value
252
-
253
- @property
254
- def kind(self):
255
- return self._kind
184
+ self.workspace = Workspace()
185
+ return self.workspace
256
186
 
257
- @kind.setter
258
- def kind(self, value: DatabricksPathKind):
259
- self._kind = value
187
+ @safe_workspace.setter
188
+ def safe_workspace(self, value):
189
+ self.workspace = value
260
190
 
261
- def is_file(self, *, follow_symlinks=True):
191
+ def is_file(self):
262
192
  if self._is_file is None:
263
193
  self.refresh_status()
264
194
  return self._is_file
265
195
 
266
- def is_dir(self, *, follow_symlinks=True):
196
+ def is_dir(self):
267
197
  if self._is_dir is None:
268
198
  self.refresh_status()
269
199
  return self._is_dir
@@ -272,29 +202,15 @@ class DatabricksPath(SysPath, PurePosixPath):
272
202
  if self.kind != DatabricksPathKind.VOLUME:
273
203
  return None, None, None, None
274
204
 
275
- s = str(self)
276
- segs = s.split("/") # ['', 'Volumes', catalog?, schema?, volume?, ...]
205
+ catalog = self.parts[0] if len(self.parts) > 0 and self.parts[0] else None
206
+ schema = self.parts[1] if len(self.parts) > 1 and self.parts[1] else None
207
+ volume = self.parts[2] if len(self.parts) > 2 and self.parts[2] else None
277
208
 
278
- # still keep the basic sanity check
279
- if len(segs) < 2 or segs[1] != "Volumes":
280
- raise ValueError(f"Invalid volume path: {s!r}")
281
-
282
- catalog = segs[2] if len(segs) > 2 and segs[2] else None
283
- schema = segs[3] if len(segs) > 3 and segs[3] else None
284
- volume = segs[4] if len(segs) > 4 and segs[4] else None
285
-
286
- # rel path only makes sense after /Volumes/<catalog>/<schema>/<volume>
287
- if len(segs) > 5:
288
- rel = "/".join(segs[5:])
289
- rel_path = PurePosixPath(rel) if rel else PurePosixPath(".")
290
- else:
291
- rel_path = None
292
-
293
- return catalog, schema, volume, rel_path
209
+ return catalog, schema, volume, self.parts[3:]
294
210
 
295
211
  def refresh_status(self):
296
212
  with self as connected:
297
- sdk = connected.workspace.sdk()
213
+ sdk = connected.safe_workspace.sdk()
298
214
 
299
215
  try:
300
216
  if connected.kind == DatabricksPathKind.VOLUME:
@@ -339,38 +255,29 @@ class DatabricksPath(SysPath, PurePosixPath):
339
255
  Workspace API typically uses paths like /Users/... (not /Workspace/Users/...)
340
256
  so we strip the leading /Workspace when present.
341
257
  """
342
- s = str(self)
343
- return s[len("/Workspace") :] if s.startswith("/Workspace") else s
258
+ return "/Workspace/%s" % "/".join(self.parts) if self.parts else "/Workspace"
344
259
 
345
260
  def as_dbfs_api_path(self) -> str:
346
261
  """
347
262
  DBFS REST wants absolute DBFS paths like /tmp/x.
348
263
  If the user passes /dbfs/tmp/x (FUSE-style), strip the /dbfs prefix.
349
264
  """
350
- s = str(self)
351
- return s[len("/dbfs") :] if s.startswith("/dbfs") else s
265
+ return "/dbfs/%s" % "/".join(self.parts) if self.parts else "/dbfs"
352
266
 
353
267
  def as_files_api_path(self) -> str:
354
268
  """
355
269
  Files API takes absolute paths, e.g. /Volumes/<...>/file
356
270
  """
357
- return str(self)
271
+ return "/Volumes/%s" % "/".join(self.parts) if self.parts else "/Volumes"
358
272
 
359
- def with_segments(self, *pathsegments):
360
- """Construct a new path object from any number of path-like objects.
361
- Subclasses may override this method to customize how new path objects
362
- are created from methods like `iterdir()`.
363
- """
364
- return type(self)(*pathsegments, workspace=self._workspace)
365
-
366
- def exists(self, *, follow_symlinks=True) -> bool:
273
+ def exists(self) -> bool:
367
274
  if self.is_file():
368
275
  return True
369
276
  if self.is_dir():
370
277
  return True
371
278
  return False
372
279
 
373
- def mkdir(self, mode=0o777, parents=True, exist_ok=True):
280
+ def mkdir(self, parents=True, exist_ok=True):
374
281
  """
375
282
  Create a new directory at this given path.
376
283
  """
@@ -379,11 +286,11 @@ class DatabricksPath(SysPath, PurePosixPath):
379
286
 
380
287
  try:
381
288
  if connected.kind == DatabricksPathKind.WORKSPACE:
382
- connected.workspace.sdk().workspace.mkdirs(self.as_workspace_api_path())
289
+ connected.safe_workspace.sdk().workspace.mkdirs(self.as_workspace_api_path())
383
290
  elif connected.kind == DatabricksPathKind.VOLUME:
384
- return connected._create_volume_dir(mode=mode, parents=parents, exist_ok=exist_ok)
385
- elif connected._kind == DatabricksPathKind.DBFS:
386
- connected.workspace.sdk().dbfs.mkdirs(self.as_dbfs_api_path())
291
+ return connected._create_volume_dir(parents=parents, exist_ok=exist_ok)
292
+ elif connected.kind == DatabricksPathKind.DBFS:
293
+ connected.safe_workspace.sdk().dbfs.mkdirs(self.as_dbfs_api_path())
387
294
 
388
295
  connected._is_file, connected._is_dir = False, True
389
296
  except (NotFound, ResourceDoesNotExist):
@@ -391,14 +298,14 @@ class DatabricksPath(SysPath, PurePosixPath):
391
298
  raise
392
299
 
393
300
  connected.parent.mkdir(parents=True, exist_ok=True)
394
- connected.mkdir(mode, parents=False, exist_ok=exist_ok)
301
+ connected.mkdir(parents=False, exist_ok=exist_ok)
395
302
  except (AlreadyExists, ResourceAlreadyExists):
396
303
  if not exist_ok:
397
304
  raise
398
305
 
399
306
  def _ensure_volume(self, exist_ok: bool = True):
400
307
  catalog_name, schema_name, volume_name, rel = self.volume_parts()
401
- sdk = self.workspace.sdk()
308
+ sdk = self.safe_workspace.sdk()
402
309
 
403
310
  if catalog_name:
404
311
  try:
@@ -426,9 +333,9 @@ class DatabricksPath(SysPath, PurePosixPath):
426
333
  if not exist_ok:
427
334
  raise
428
335
 
429
- def _create_volume_dir(self, mode=0o777, parents=True, exist_ok=True):
336
+ def _create_volume_dir(self, parents=True, exist_ok=True):
430
337
  path = self.as_files_api_path()
431
- sdk = self.workspace.sdk()
338
+ sdk = self.safe_workspace.sdk()
432
339
 
433
340
  try:
434
341
  sdk.files.create_directory(path)
@@ -438,7 +345,7 @@ class DatabricksPath(SysPath, PurePosixPath):
438
345
 
439
346
  message = str(e)
440
347
 
441
- if "not exist" in message:
348
+ if "olume" in message and "not exist" in message:
442
349
  self._ensure_volume()
443
350
 
444
351
  sdk.files.create_directory(path)
@@ -467,7 +374,7 @@ class DatabricksPath(SysPath, PurePosixPath):
467
374
  self.clear_cache()
468
375
 
469
376
  def _remove_volume_file(self):
470
- sdk = self.workspace.sdk()
377
+ sdk = self.safe_workspace.sdk()
471
378
 
472
379
  try:
473
380
  sdk.files.delete(self.as_files_api_path())
@@ -475,7 +382,7 @@ class DatabricksPath(SysPath, PurePosixPath):
475
382
  pass
476
383
 
477
384
  def _remove_workspace_file(self):
478
- sdk = self.workspace.sdk()
385
+ sdk = self.safe_workspace.sdk()
479
386
 
480
387
  try:
481
388
  sdk.workspace.delete(self.as_workspace_api_path(), recursive=True)
@@ -483,7 +390,7 @@ class DatabricksPath(SysPath, PurePosixPath):
483
390
  pass
484
391
 
485
392
  def _remove_dbfs_file(self):
486
- sdk = self.workspace.sdk()
393
+ sdk = self.safe_workspace.sdk()
487
394
 
488
395
  try:
489
396
  sdk.dbfs.delete(self.as_dbfs_api_path(), recursive=True)
@@ -494,14 +401,14 @@ class DatabricksPath(SysPath, PurePosixPath):
494
401
  with self as connected:
495
402
  try:
496
403
  if connected.kind == DatabricksPathKind.WORKSPACE:
497
- connected.workspace.sdk().workspace.delete(
404
+ connected.safe_workspace.sdk().workspace.delete(
498
405
  self.as_workspace_api_path(),
499
406
  recursive=recursive,
500
407
  )
501
408
  elif connected.kind == DatabricksPathKind.VOLUME:
502
409
  return self._remove_volume_dir(recursive=recursive)
503
410
  else:
504
- connected.workspace.sdk().dbfs.delete(
411
+ connected.safe_workspace.sdk().dbfs.delete(
505
412
  self.as_dbfs_api_path(),
506
413
  recursive=recursive,
507
414
  )
@@ -514,19 +421,9 @@ class DatabricksPath(SysPath, PurePosixPath):
514
421
  root_path = self.as_files_api_path()
515
422
  catalog_name, schema_name, volume_name, rel = self.volume_parts()
516
423
 
517
- sdk = self.workspace.sdk()
424
+ sdk = self.safe_workspace.sdk()
518
425
 
519
- if rel is None:
520
- try:
521
- sdk.volumes.delete(f"{catalog_name}.{schema_name}.{volume_name}")
522
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
523
- pass
524
- elif volume_name is None:
525
- try:
526
- sdk.schemas.delete(f"{catalog_name}.{schema_name}", force=True)
527
- except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
528
- pass
529
- else:
426
+ if rel:
530
427
  try:
531
428
  sdk.files.delete_directory(root_path)
532
429
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied) as e:
@@ -538,6 +435,16 @@ class DatabricksPath(SysPath, PurePosixPath):
538
435
  sdk.files.delete_directory(root_path)
539
436
  else:
540
437
  pass
438
+ elif volume_name:
439
+ try:
440
+ sdk.volumes.delete(f"{catalog_name}.{schema_name}.{volume_name}")
441
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
442
+ pass
443
+ elif schema_name:
444
+ try:
445
+ sdk.schemas.delete(f"{catalog_name}.{schema_name}", force=True)
446
+ except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
447
+ pass
541
448
 
542
449
  self.clear_cache()
543
450
 
@@ -546,15 +453,15 @@ class DatabricksPath(SysPath, PurePosixPath):
546
453
  for _ in self._ls_volume(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
547
454
  yield _
548
455
  elif self.kind == DatabricksPathKind.WORKSPACE:
549
- for _ in self._ls_workspace(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
456
+ for _ in self._ls_workspace(recursive=recursive, raise_error=raise_error):
550
457
  yield _
551
458
  elif self.kind == DatabricksPathKind.DBFS:
552
- for _ in self._ls_dbfs(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
459
+ for _ in self._ls_dbfs(recursive=recursive, raise_error=raise_error):
553
460
  yield _
554
461
 
555
462
  def _ls_volume(self, recursive: bool = False, fetch_size: int = None, raise_error: bool = True):
556
463
  catalog_name, schema_name, volume_name, rel = self.volume_parts()
557
- sdk = self.workspace.sdk()
464
+ sdk = self.safe_workspace.sdk()
558
465
 
559
466
  if rel is None:
560
467
  if volume_name is None:
@@ -564,10 +471,11 @@ class DatabricksPath(SysPath, PurePosixPath):
564
471
  schema_name=schema_name,
565
472
  ):
566
473
  base = DatabricksPath(
567
- f"/Volumes/{info.catalog_name}/{info.schema_name}/{info.name}",
568
- workspace=self.workspace,
569
- is_file=False,
570
- is_dir=True,
474
+ kind=DatabricksPathKind.VOLUME,
475
+ parts = [info.catalog_name, info.schema_name, info.name],
476
+ workspace=self.safe_workspace,
477
+ _is_file=False,
478
+ _is_dir=True,
571
479
  )
572
480
 
573
481
  if recursive:
@@ -582,10 +490,11 @@ class DatabricksPath(SysPath, PurePosixPath):
582
490
  try:
583
491
  for info in sdk.schemas.list(catalog_name=catalog_name):
584
492
  base = DatabricksPath(
585
- f"/Volumes/{info.catalog_name}/{info.name}",
586
- workspace=self.workspace,
587
- is_file=False,
588
- is_dir=True,
493
+ kind=DatabricksPathKind.VOLUME,
494
+ parts=[info.catalog_name, info.name],
495
+ workspace=self.safe_workspace,
496
+ _is_file=False,
497
+ _is_dir=True,
589
498
  )
590
499
 
591
500
  if recursive:
@@ -600,10 +509,11 @@ class DatabricksPath(SysPath, PurePosixPath):
600
509
  try:
601
510
  for info in sdk.catalogs.list():
602
511
  base = DatabricksPath(
603
- f"/Volumes/{info.name}",
604
- workspace=self.workspace,
605
- is_file=False,
606
- is_dir=True,
512
+ kind=DatabricksPathKind.VOLUME,
513
+ parts=[info.name],
514
+ workspace=self.safe_workspace,
515
+ _is_file=False,
516
+ _is_dir=True,
607
517
  )
608
518
 
609
519
  if recursive:
@@ -618,10 +528,11 @@ class DatabricksPath(SysPath, PurePosixPath):
618
528
  try:
619
529
  for info in sdk.files.list_directory_contents(self.as_files_api_path(), page_size=fetch_size):
620
530
  base = DatabricksPath(
621
- info.path,
622
- workspace=self.workspace,
623
- is_file=not info.is_directory,
624
- is_dir=info.is_directory,
531
+ kind=DatabricksPathKind.VOLUME,
532
+ parts=info.path.split("/")[2:],
533
+ workspace=self.safe_workspace,
534
+ _is_file=not info.is_directory,
535
+ _is_dir=info.is_directory,
625
536
  )
626
537
 
627
538
  if recursive and info.is_directory:
@@ -633,36 +544,40 @@ class DatabricksPath(SysPath, PurePosixPath):
633
544
  if raise_error:
634
545
  raise
635
546
 
636
- def _ls_workspace(self, recursive: bool = True, fetch_size: int = None, raise_error: bool = True):
637
- sdk = self.workspace.sdk()
547
+ def _ls_workspace(self, recursive: bool = True, raise_error: bool = True):
548
+ sdk = self.safe_workspace.sdk()
638
549
 
639
550
  try:
640
551
  for info in sdk.workspace.list(self.as_workspace_api_path(), recursive=recursive):
641
552
  is_dir = info.object_type in (ObjectType.DIRECTORY, ObjectType.REPO)
642
553
  base = DatabricksPath(
643
- info.path,
644
- workspace=self.workspace,
645
- is_file=not is_dir,
646
- is_dir=is_dir,
554
+ kind=DatabricksPathKind.WORKSPACE,
555
+ parts=info.path.split("/")[2:],
556
+ workspace=self.safe_workspace,
557
+ _is_file=not is_dir,
558
+ _is_dir=is_dir,
647
559
  )
648
560
  yield base
649
561
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
650
562
  if raise_error:
651
563
  raise
652
564
 
653
- def _ls_dbfs(self, recursive: bool = True, fetch_size: int = None, raise_error: bool = True):
654
- sdk = self.workspace.sdk()
565
+ def _ls_dbfs(self, recursive: bool = True, raise_error: bool = True):
566
+ sdk = self.safe_workspace.sdk()
655
567
 
656
568
  try:
657
569
  # FIX: DBFS listing should use DBFS-normalized path, not workspace path
658
- p = "/dbfs/" + self.as_dbfs_api_path() + "/"
570
+ p = self.as_dbfs_api_path()
571
+
659
572
  for info in sdk.dbfs.list(p, recursive=recursive):
660
573
  base = DatabricksPath(
661
- info.path,
662
- workspace=self.workspace,
663
- is_file=not info.is_dir,
664
- is_dir=info.is_dir,
574
+ kind=DatabricksPathKind.DBFS,
575
+ parts=info.path.split("/")[2:],
576
+ workspace=self.safe_workspace,
577
+ _is_file=not info.is_dir,
578
+ _is_dir=info.is_dir,
665
579
  )
580
+
666
581
  yield base
667
582
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
668
583
  if raise_error:
@@ -672,13 +587,7 @@ class DatabricksPath(SysPath, PurePosixPath):
672
587
  def open(
673
588
  self,
674
589
  mode="r",
675
- buffering=-1,
676
590
  encoding=None,
677
- errors=None,
678
- newline=None,
679
- *,
680
- workspace: Optional["Workspace"] = None,
681
- overwrite: bool = True,
682
591
  ) -> Iterator[Union[BinaryIO, io.TextIOBase]]:
683
592
  """
684
593
  Open this Databricks path using databricks-sdk's WorkspaceClient.
@@ -716,7 +625,7 @@ class DatabricksPath(SysPath, PurePosixPath):
716
625
 
717
626
  @contextmanager
718
627
  def _open_read_volume(self, encoding: str | None = None):
719
- workspace_client = self.workspace.sdk()
628
+ workspace_client = self.safe_workspace.sdk()
720
629
  path = self.as_files_api_path()
721
630
 
722
631
  resp = workspace_client.files.download(path)
@@ -731,7 +640,7 @@ class DatabricksPath(SysPath, PurePosixPath):
731
640
 
732
641
  @contextmanager
733
642
  def _open_read_workspace(self, encoding: str | None = None):
734
- workspace_client = self.workspace.sdk()
643
+ workspace_client = self.safe_workspace.sdk()
735
644
  path = self.as_workspace_api_path()
736
645
 
737
646
  raw = workspace_client.workspace.download(path) # returns BinaryIO
@@ -746,7 +655,7 @@ class DatabricksPath(SysPath, PurePosixPath):
746
655
 
747
656
  @contextmanager
748
657
  def _open_read_dbfs(self, encoding: str | None = None):
749
- workspace_client = self.workspace.sdk()
658
+ workspace_client = self.safe_workspace.sdk()
750
659
  path = self.as_dbfs_api_path()
751
660
 
752
661
  raw = workspace_client.dbfs.open(path, read=True)
@@ -773,7 +682,7 @@ class DatabricksPath(SysPath, PurePosixPath):
773
682
 
774
683
  @contextmanager
775
684
  def _open_write_volume(self, encoding: str | None = None, overwrite: bool = True):
776
- workspace_client = self.workspace.sdk()
685
+ workspace_client = self.safe_workspace.sdk()
777
686
  path = self.as_files_api_path()
778
687
 
779
688
  buf = io.BytesIO()
@@ -807,7 +716,7 @@ class DatabricksPath(SysPath, PurePosixPath):
807
716
 
808
717
  @contextmanager
809
718
  def _open_write_workspace(self, encoding: str | None = None, overwrite: bool = True):
810
- workspace_client = self.workspace.sdk()
719
+ workspace_client = self.safe_workspace.sdk()
811
720
  path = self.as_workspace_api_path()
812
721
 
813
722
  buf = io.BytesIO()
@@ -859,7 +768,7 @@ class DatabricksPath(SysPath, PurePosixPath):
859
768
 
860
769
  @contextmanager
861
770
  def _open_write_dbfs(self, encoding: str | None = None, overwrite: bool = True):
862
- workspace_client = self.workspace.sdk()
771
+ workspace_client = self.safe_workspace.sdk()
863
772
  path = self.as_dbfs_api_path()
864
773
 
865
774
  raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)