ygg 0.1.28__py3-none-any.whl → 0.1.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.28.dist-info → ygg-0.1.30.dist-info}/METADATA +1 -1
- {ygg-0.1.28.dist-info → ygg-0.1.30.dist-info}/RECORD +16 -16
- yggdrasil/databricks/compute/cluster.py +41 -25
- yggdrasil/databricks/compute/execution_context.py +9 -10
- yggdrasil/databricks/compute/remote.py +10 -6
- yggdrasil/databricks/sql/engine.py +4 -2
- yggdrasil/databricks/sql/statement_result.py +17 -2
- yggdrasil/databricks/workspaces/databricks_path.py +192 -283
- yggdrasil/databricks/workspaces/workspace.py +53 -416
- yggdrasil/pyutils/callable_serde.py +2 -28
- yggdrasil/pyutils/modules.py +1 -1
- yggdrasil/pyutils/python_env.py +81 -264
- {ygg-0.1.28.dist-info → ygg-0.1.30.dist-info}/WHEEL +0 -0
- {ygg-0.1.28.dist-info → ygg-0.1.30.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.28.dist-info → ygg-0.1.30.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.28.dist-info → ygg-0.1.30.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# src/yggdrasil/databricks/workspaces/databricks_path.py
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import dataclasses
|
|
4
5
|
import io
|
|
5
6
|
import time
|
|
6
|
-
import urllib.parse as urlparse
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from enum import Enum
|
|
9
|
-
from pathlib import PurePosixPath
|
|
10
|
-
from typing import
|
|
9
|
+
from pathlib import PurePosixPath
|
|
10
|
+
from typing import BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING, List
|
|
11
11
|
|
|
12
12
|
from databricks.sdk.service.catalog import VolumeType
|
|
13
13
|
|
|
@@ -37,11 +37,19 @@ __all__ = [
|
|
|
37
37
|
]
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
40
|
+
def _flatten_parts(parts: Union[list[str], str]) -> list[str]:
|
|
41
|
+
if isinstance(parts, str):
|
|
42
|
+
parts = [parts]
|
|
43
|
+
|
|
44
|
+
if any("/" in part for part in parts):
|
|
45
|
+
# flatten parts with slashes
|
|
46
|
+
new_parts = []
|
|
47
|
+
for part in parts:
|
|
48
|
+
split_parts = part.split("/")
|
|
49
|
+
new_parts.extend(split_parts)
|
|
50
|
+
parts = new_parts
|
|
51
|
+
|
|
52
|
+
return parts
|
|
45
53
|
|
|
46
54
|
|
|
47
55
|
class DatabricksPathKind(str, Enum):
|
|
@@ -49,221 +57,143 @@ class DatabricksPathKind(str, Enum):
|
|
|
49
57
|
VOLUME = "volume"
|
|
50
58
|
DBFS = "dbfs"
|
|
51
59
|
|
|
52
|
-
@classmethod
|
|
53
|
-
def parse(
|
|
54
|
-
cls,
|
|
55
|
-
path: str,
|
|
56
|
-
workspace: Optional["Workspace"] = None,
|
|
57
|
-
) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
|
|
58
|
-
from .workspace import Workspace
|
|
59
|
-
|
|
60
|
-
if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
|
|
61
|
-
if path.startswith("/Users/me"):
|
|
62
|
-
workspace = Workspace() if workspace is None else workspace
|
|
63
|
-
path = path.replace("/Users/me", "/Users/%s" % workspace.current_user.user_name)
|
|
64
|
-
|
|
65
|
-
return cls.WORKSPACE, workspace, path
|
|
66
|
-
|
|
67
|
-
if path.startswith("/Volumes"):
|
|
68
|
-
return cls.VOLUME, workspace, path
|
|
69
|
-
|
|
70
|
-
if path.startswith("dbfs://"):
|
|
71
|
-
parsed = urlparse.urlparse(path)
|
|
72
|
-
|
|
73
|
-
# inner path is the URL path (e.g. /tmp/x or /Volumes/...)
|
|
74
|
-
kind, _, inner_path = cls.parse(parsed.path, workspace=workspace)
|
|
75
|
-
|
|
76
|
-
# hostname can be None for malformed/dbfs:// variants; fall back to default Workspace()
|
|
77
|
-
if workspace is None:
|
|
78
|
-
workspace = Workspace(host=parsed.hostname) if parsed.hostname else Workspace()
|
|
79
|
-
|
|
80
|
-
return kind, workspace, inner_path
|
|
81
|
-
|
|
82
|
-
return cls.DBFS, workspace, path
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
class DatabricksPath(SysPath, PurePosixPath):
|
|
86
|
-
_kind: "DatabricksPathKind"
|
|
87
|
-
_workspace: Optional["Workspace"]
|
|
88
|
-
|
|
89
|
-
_is_file: Optional[bool]
|
|
90
|
-
_is_dir: Optional[bool]
|
|
91
|
-
|
|
92
|
-
_raw_status: Optional[dict]
|
|
93
|
-
_raw_status_refresh_time: float
|
|
94
|
-
|
|
95
|
-
@staticmethod
|
|
96
|
-
def _join_segments(pathsegments: tuple[Any, ...]) -> str:
|
|
97
|
-
if not pathsegments:
|
|
98
|
-
return ""
|
|
99
60
|
|
|
100
|
-
|
|
61
|
+
@dataclasses.dataclass
|
|
62
|
+
class DatabricksPath:
|
|
63
|
+
kind: "DatabricksPathKind"
|
|
64
|
+
parts: List[str]
|
|
65
|
+
workspace: Optional["Workspace"] = None
|
|
101
66
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
rest = (_seg_to_str(s).lstrip("/") for s in pathsegments[1:])
|
|
105
|
-
first = first.rstrip("/")
|
|
106
|
-
tail = "/".join(rest)
|
|
107
|
-
return f"{first}/{tail}" if tail else first
|
|
67
|
+
_is_file: Optional[bool] = None
|
|
68
|
+
_is_dir: Optional[bool] = None
|
|
108
69
|
|
|
109
|
-
|
|
70
|
+
_raw_status: Optional[dict] = None
|
|
71
|
+
_raw_status_refresh_time: float = 0.0
|
|
110
72
|
|
|
111
|
-
|
|
73
|
+
@classmethod
|
|
74
|
+
def parse(
|
|
112
75
|
cls,
|
|
113
|
-
|
|
76
|
+
parts: Union[List[str], str],
|
|
114
77
|
workspace: Optional["Workspace"] = None,
|
|
115
|
-
is_file: Optional[bool] = None,
|
|
116
|
-
is_dir: Optional[bool] = None,
|
|
117
|
-
raw_status: Optional[dict] = None,
|
|
118
|
-
raw_status_refresh_time: float = 0.0,
|
|
119
78
|
) -> "DatabricksPath":
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
79
|
+
if not parts:
|
|
80
|
+
return DatabricksPath(
|
|
81
|
+
kind=DatabricksPathKind.DBFS,
|
|
82
|
+
parts=[],
|
|
83
|
+
workspace=workspace,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
parts = _flatten_parts(parts)
|
|
87
|
+
|
|
88
|
+
if not parts[0]:
|
|
89
|
+
parts = parts[1:]
|
|
90
|
+
|
|
91
|
+
if not parts:
|
|
92
|
+
return DatabricksPath(
|
|
93
|
+
kind=DatabricksPathKind.DBFS,
|
|
94
|
+
parts=[],
|
|
95
|
+
workspace=workspace,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
head, *tail = parts
|
|
99
|
+
|
|
100
|
+
if head == "dbfs":
|
|
101
|
+
kind = DatabricksPathKind.DBFS
|
|
102
|
+
elif head == "Workspace":
|
|
103
|
+
kind = DatabricksPathKind.WORKSPACE
|
|
104
|
+
elif head == "Volumes":
|
|
105
|
+
kind = DatabricksPathKind.VOLUME
|
|
106
|
+
else:
|
|
107
|
+
raise ValueError(f"Invalid DatabricksPath prefix: {parts!r}")
|
|
124
108
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
self._raw_status = raw_status
|
|
131
|
-
self._raw_status_refresh_time = float(raw_status_refresh_time)
|
|
109
|
+
return DatabricksPath(
|
|
110
|
+
kind=kind,
|
|
111
|
+
parts=tail,
|
|
112
|
+
workspace=workspace,
|
|
113
|
+
)
|
|
132
114
|
|
|
133
|
-
|
|
115
|
+
def __hash__(self):
|
|
116
|
+
return hash((self.kind, tuple(self.parts)))
|
|
134
117
|
|
|
135
|
-
def
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
raw_status: Optional[dict] = None,
|
|
142
|
-
raw_status_refresh_time: float = 0.0,
|
|
143
|
-
) -> None:
|
|
144
|
-
# pathlib paths are effectively immutable; all init happens in __new__ / _init
|
|
145
|
-
pass
|
|
118
|
+
def __eq__(self, other):
|
|
119
|
+
if not isinstance(other, DatabricksPath):
|
|
120
|
+
if isinstance(other, str):
|
|
121
|
+
return str(self) == other
|
|
122
|
+
return False
|
|
123
|
+
return self.kind == other.kind and self.parts == other.parts
|
|
146
124
|
|
|
147
125
|
def __truediv__(self, other):
|
|
148
126
|
if not other:
|
|
149
127
|
return self
|
|
150
128
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
built._kind = self._kind
|
|
154
|
-
built._workspace = self._workspace
|
|
129
|
+
other_parts = _flatten_parts(other)
|
|
155
130
|
|
|
156
|
-
built
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
131
|
+
built = DatabricksPath(
|
|
132
|
+
kind=self.kind,
|
|
133
|
+
parts=self.parts + other_parts,
|
|
134
|
+
workspace=self.workspace,
|
|
135
|
+
)
|
|
160
136
|
|
|
161
137
|
return built
|
|
162
138
|
|
|
163
139
|
def __enter__(self):
|
|
164
|
-
self.
|
|
140
|
+
self.safe_workspace.__enter__()
|
|
165
141
|
return self
|
|
166
142
|
|
|
167
143
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
168
|
-
return self.
|
|
144
|
+
return self.safe_workspace.__exit__(exc_type, exc_val, exc_tb)
|
|
169
145
|
|
|
170
|
-
def
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
146
|
+
def __str__(self):
|
|
147
|
+
if self.kind == DatabricksPathKind.DBFS:
|
|
148
|
+
return self.as_dbfs_api_path()
|
|
149
|
+
elif self.kind == DatabricksPathKind.WORKSPACE:
|
|
150
|
+
return self.as_workspace_api_path()
|
|
151
|
+
elif self.kind == DatabricksPathKind.VOLUME:
|
|
152
|
+
return self.as_files_api_path()
|
|
153
|
+
else:
|
|
154
|
+
raise ValueError(f"Unknown DatabricksPath kind: {self.kind!r}")
|
|
179
155
|
|
|
180
|
-
|
|
181
|
-
self.
|
|
182
|
-
self._is_dir = None
|
|
183
|
-
self._raw_status = None
|
|
184
|
-
self._raw_status_refresh_time = 0.0
|
|
156
|
+
def __repr__(self):
|
|
157
|
+
return "dbfs://%s" % self.__str__()
|
|
185
158
|
|
|
186
159
|
@property
|
|
187
160
|
def parent(self):
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
built._clone_meta_from(self)
|
|
191
|
-
|
|
192
|
-
return built
|
|
161
|
+
if not self.parts:
|
|
162
|
+
return self
|
|
193
163
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
"""
|
|
197
|
-
pathlib internal factory. It may pass a template in some Python versions,
|
|
198
|
-
but if not, we still return a valid DatabricksPath with initialized state.
|
|
199
|
-
"""
|
|
200
|
-
built = super()._from_parsed_parts(drv, root, parts) # type: ignore[misc]
|
|
201
|
-
|
|
202
|
-
# Best effort: if pathlib gave us a template on the object, use it.
|
|
203
|
-
# Otherwise ensure we at least have valid defaults.
|
|
204
|
-
if isinstance(built, DatabricksPath) and isinstance(getattr(built, "_workspace", None), object):
|
|
205
|
-
# If the object already has workspace/kind via _init, don't stomp it.
|
|
206
|
-
# But if it's missing _kind (common failure), derive it.
|
|
207
|
-
if not hasattr(built, "_kind"):
|
|
208
|
-
kind, ws, _ = DatabricksPathKind.parse(str(built), workspace=getattr(built, "_workspace", None))
|
|
209
|
-
built._kind = kind
|
|
210
|
-
built._workspace = ws if ws is not None else getattr(built, "_workspace", None)
|
|
211
|
-
|
|
212
|
-
# Always reset caches (derived path => cache invalid)
|
|
213
|
-
built._is_file = None
|
|
214
|
-
built._is_dir = None
|
|
215
|
-
built._raw_status = None
|
|
216
|
-
built._raw_status_refresh_time = 0.0
|
|
164
|
+
if self._is_file is not None or self._is_dir is not None:
|
|
165
|
+
_is_file, _is_dir = False, True
|
|
217
166
|
else:
|
|
218
|
-
|
|
219
|
-
kind, ws, _ = DatabricksPathKind.parse(str(built))
|
|
220
|
-
built._kind = kind
|
|
221
|
-
built._workspace = ws
|
|
222
|
-
built._is_file = None
|
|
223
|
-
built._is_dir = None
|
|
224
|
-
built._raw_status = None
|
|
225
|
-
built._raw_status_refresh_time = 0.0
|
|
226
|
-
|
|
227
|
-
return built
|
|
167
|
+
_is_file, _is_dir = None, None
|
|
228
168
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
# if for some reason super didn't return our type, try to coerce
|
|
237
|
-
built = type(self)(built, workspace=getattr(self, "_workspace", None))
|
|
169
|
+
built = DatabricksPath(
|
|
170
|
+
kind=self.kind,
|
|
171
|
+
parts=self.parts[:-1],
|
|
172
|
+
workspace=self.workspace,
|
|
173
|
+
_is_file=_is_file,
|
|
174
|
+
_is_dir=_is_dir,
|
|
175
|
+
)
|
|
238
176
|
|
|
239
177
|
return built
|
|
240
178
|
|
|
241
179
|
@property
|
|
242
|
-
def
|
|
243
|
-
if self.
|
|
180
|
+
def safe_workspace(self):
|
|
181
|
+
if self.workspace is None:
|
|
244
182
|
from .workspace import Workspace
|
|
245
183
|
|
|
246
|
-
self.
|
|
247
|
-
return self.
|
|
248
|
-
|
|
249
|
-
@workspace.setter
|
|
250
|
-
def workspace(self, value):
|
|
251
|
-
self._workspace = value
|
|
252
|
-
|
|
253
|
-
@property
|
|
254
|
-
def kind(self):
|
|
255
|
-
return self._kind
|
|
184
|
+
self.workspace = Workspace()
|
|
185
|
+
return self.workspace
|
|
256
186
|
|
|
257
|
-
@
|
|
258
|
-
def
|
|
259
|
-
self.
|
|
187
|
+
@safe_workspace.setter
|
|
188
|
+
def safe_workspace(self, value):
|
|
189
|
+
self.workspace = value
|
|
260
190
|
|
|
261
|
-
def is_file(self
|
|
191
|
+
def is_file(self):
|
|
262
192
|
if self._is_file is None:
|
|
263
193
|
self.refresh_status()
|
|
264
194
|
return self._is_file
|
|
265
195
|
|
|
266
|
-
def is_dir(self
|
|
196
|
+
def is_dir(self):
|
|
267
197
|
if self._is_dir is None:
|
|
268
198
|
self.refresh_status()
|
|
269
199
|
return self._is_dir
|
|
@@ -272,29 +202,15 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
272
202
|
if self.kind != DatabricksPathKind.VOLUME:
|
|
273
203
|
return None, None, None, None
|
|
274
204
|
|
|
275
|
-
|
|
276
|
-
|
|
205
|
+
catalog = self.parts[0] if len(self.parts) > 0 and self.parts[0] else None
|
|
206
|
+
schema = self.parts[1] if len(self.parts) > 1 and self.parts[1] else None
|
|
207
|
+
volume = self.parts[2] if len(self.parts) > 2 and self.parts[2] else None
|
|
277
208
|
|
|
278
|
-
|
|
279
|
-
if len(segs) < 2 or segs[1] != "Volumes":
|
|
280
|
-
raise ValueError(f"Invalid volume path: {s!r}")
|
|
281
|
-
|
|
282
|
-
catalog = segs[2] if len(segs) > 2 and segs[2] else None
|
|
283
|
-
schema = segs[3] if len(segs) > 3 and segs[3] else None
|
|
284
|
-
volume = segs[4] if len(segs) > 4 and segs[4] else None
|
|
285
|
-
|
|
286
|
-
# rel path only makes sense after /Volumes/<catalog>/<schema>/<volume>
|
|
287
|
-
if len(segs) > 5:
|
|
288
|
-
rel = "/".join(segs[5:])
|
|
289
|
-
rel_path = PurePosixPath(rel) if rel else PurePosixPath(".")
|
|
290
|
-
else:
|
|
291
|
-
rel_path = None
|
|
292
|
-
|
|
293
|
-
return catalog, schema, volume, rel_path
|
|
209
|
+
return catalog, schema, volume, self.parts[3:]
|
|
294
210
|
|
|
295
211
|
def refresh_status(self):
|
|
296
212
|
with self as connected:
|
|
297
|
-
sdk = connected.
|
|
213
|
+
sdk = connected.safe_workspace.sdk()
|
|
298
214
|
|
|
299
215
|
try:
|
|
300
216
|
if connected.kind == DatabricksPathKind.VOLUME:
|
|
@@ -339,38 +255,29 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
339
255
|
Workspace API typically uses paths like /Users/... (not /Workspace/Users/...)
|
|
340
256
|
so we strip the leading /Workspace when present.
|
|
341
257
|
"""
|
|
342
|
-
s
|
|
343
|
-
return s[len("/Workspace") :] if s.startswith("/Workspace") else s
|
|
258
|
+
return "/Workspace/%s" % "/".join(self.parts) if self.parts else "/Workspace"
|
|
344
259
|
|
|
345
260
|
def as_dbfs_api_path(self) -> str:
|
|
346
261
|
"""
|
|
347
262
|
DBFS REST wants absolute DBFS paths like /tmp/x.
|
|
348
263
|
If the user passes /dbfs/tmp/x (FUSE-style), strip the /dbfs prefix.
|
|
349
264
|
"""
|
|
350
|
-
s
|
|
351
|
-
return s[len("/dbfs") :] if s.startswith("/dbfs") else s
|
|
265
|
+
return "/dbfs/%s" % "/".join(self.parts) if self.parts else "/dbfs"
|
|
352
266
|
|
|
353
267
|
def as_files_api_path(self) -> str:
|
|
354
268
|
"""
|
|
355
269
|
Files API takes absolute paths, e.g. /Volumes/<...>/file
|
|
356
270
|
"""
|
|
357
|
-
return
|
|
271
|
+
return "/Volumes/%s" % "/".join(self.parts) if self.parts else "/Volumes"
|
|
358
272
|
|
|
359
|
-
def
|
|
360
|
-
"""Construct a new path object from any number of path-like objects.
|
|
361
|
-
Subclasses may override this method to customize how new path objects
|
|
362
|
-
are created from methods like `iterdir()`.
|
|
363
|
-
"""
|
|
364
|
-
return type(self)(*pathsegments, workspace=self._workspace)
|
|
365
|
-
|
|
366
|
-
def exists(self, *, follow_symlinks=True) -> bool:
|
|
273
|
+
def exists(self) -> bool:
|
|
367
274
|
if self.is_file():
|
|
368
275
|
return True
|
|
369
276
|
if self.is_dir():
|
|
370
277
|
return True
|
|
371
278
|
return False
|
|
372
279
|
|
|
373
|
-
def mkdir(self,
|
|
280
|
+
def mkdir(self, parents=True, exist_ok=True):
|
|
374
281
|
"""
|
|
375
282
|
Create a new directory at this given path.
|
|
376
283
|
"""
|
|
@@ -379,11 +286,11 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
379
286
|
|
|
380
287
|
try:
|
|
381
288
|
if connected.kind == DatabricksPathKind.WORKSPACE:
|
|
382
|
-
connected.
|
|
289
|
+
connected.safe_workspace.sdk().workspace.mkdirs(self.as_workspace_api_path())
|
|
383
290
|
elif connected.kind == DatabricksPathKind.VOLUME:
|
|
384
|
-
return connected._create_volume_dir(
|
|
385
|
-
elif connected.
|
|
386
|
-
connected.
|
|
291
|
+
return connected._create_volume_dir(parents=parents, exist_ok=exist_ok)
|
|
292
|
+
elif connected.kind == DatabricksPathKind.DBFS:
|
|
293
|
+
connected.safe_workspace.sdk().dbfs.mkdirs(self.as_dbfs_api_path())
|
|
387
294
|
|
|
388
295
|
connected._is_file, connected._is_dir = False, True
|
|
389
296
|
except (NotFound, ResourceDoesNotExist):
|
|
@@ -391,14 +298,14 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
391
298
|
raise
|
|
392
299
|
|
|
393
300
|
connected.parent.mkdir(parents=True, exist_ok=True)
|
|
394
|
-
connected.mkdir(
|
|
301
|
+
connected.mkdir(parents=False, exist_ok=exist_ok)
|
|
395
302
|
except (AlreadyExists, ResourceAlreadyExists):
|
|
396
303
|
if not exist_ok:
|
|
397
304
|
raise
|
|
398
305
|
|
|
399
306
|
def _ensure_volume(self, exist_ok: bool = True):
|
|
400
307
|
catalog_name, schema_name, volume_name, rel = self.volume_parts()
|
|
401
|
-
sdk = self.
|
|
308
|
+
sdk = self.safe_workspace.sdk()
|
|
402
309
|
|
|
403
310
|
if catalog_name:
|
|
404
311
|
try:
|
|
@@ -426,9 +333,9 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
426
333
|
if not exist_ok:
|
|
427
334
|
raise
|
|
428
335
|
|
|
429
|
-
def _create_volume_dir(self,
|
|
336
|
+
def _create_volume_dir(self, parents=True, exist_ok=True):
|
|
430
337
|
path = self.as_files_api_path()
|
|
431
|
-
sdk = self.
|
|
338
|
+
sdk = self.safe_workspace.sdk()
|
|
432
339
|
|
|
433
340
|
try:
|
|
434
341
|
sdk.files.create_directory(path)
|
|
@@ -438,7 +345,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
438
345
|
|
|
439
346
|
message = str(e)
|
|
440
347
|
|
|
441
|
-
if "not exist" in message:
|
|
348
|
+
if "olume" in message and "not exist" in message:
|
|
442
349
|
self._ensure_volume()
|
|
443
350
|
|
|
444
351
|
sdk.files.create_directory(path)
|
|
@@ -467,7 +374,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
467
374
|
self.clear_cache()
|
|
468
375
|
|
|
469
376
|
def _remove_volume_file(self):
|
|
470
|
-
sdk = self.
|
|
377
|
+
sdk = self.safe_workspace.sdk()
|
|
471
378
|
|
|
472
379
|
try:
|
|
473
380
|
sdk.files.delete(self.as_files_api_path())
|
|
@@ -475,7 +382,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
475
382
|
pass
|
|
476
383
|
|
|
477
384
|
def _remove_workspace_file(self):
|
|
478
|
-
sdk = self.
|
|
385
|
+
sdk = self.safe_workspace.sdk()
|
|
479
386
|
|
|
480
387
|
try:
|
|
481
388
|
sdk.workspace.delete(self.as_workspace_api_path(), recursive=True)
|
|
@@ -483,7 +390,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
483
390
|
pass
|
|
484
391
|
|
|
485
392
|
def _remove_dbfs_file(self):
|
|
486
|
-
sdk = self.
|
|
393
|
+
sdk = self.safe_workspace.sdk()
|
|
487
394
|
|
|
488
395
|
try:
|
|
489
396
|
sdk.dbfs.delete(self.as_dbfs_api_path(), recursive=True)
|
|
@@ -494,14 +401,14 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
494
401
|
with self as connected:
|
|
495
402
|
try:
|
|
496
403
|
if connected.kind == DatabricksPathKind.WORKSPACE:
|
|
497
|
-
connected.
|
|
404
|
+
connected.safe_workspace.sdk().workspace.delete(
|
|
498
405
|
self.as_workspace_api_path(),
|
|
499
406
|
recursive=recursive,
|
|
500
407
|
)
|
|
501
408
|
elif connected.kind == DatabricksPathKind.VOLUME:
|
|
502
409
|
return self._remove_volume_dir(recursive=recursive)
|
|
503
410
|
else:
|
|
504
|
-
connected.
|
|
411
|
+
connected.safe_workspace.sdk().dbfs.delete(
|
|
505
412
|
self.as_dbfs_api_path(),
|
|
506
413
|
recursive=recursive,
|
|
507
414
|
)
|
|
@@ -514,19 +421,9 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
514
421
|
root_path = self.as_files_api_path()
|
|
515
422
|
catalog_name, schema_name, volume_name, rel = self.volume_parts()
|
|
516
423
|
|
|
517
|
-
sdk = self.
|
|
424
|
+
sdk = self.safe_workspace.sdk()
|
|
518
425
|
|
|
519
|
-
if rel
|
|
520
|
-
try:
|
|
521
|
-
sdk.volumes.delete(f"{catalog_name}.{schema_name}.{volume_name}")
|
|
522
|
-
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
523
|
-
pass
|
|
524
|
-
elif volume_name is None:
|
|
525
|
-
try:
|
|
526
|
-
sdk.schemas.delete(f"{catalog_name}.{schema_name}", force=True)
|
|
527
|
-
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
528
|
-
pass
|
|
529
|
-
else:
|
|
426
|
+
if rel:
|
|
530
427
|
try:
|
|
531
428
|
sdk.files.delete_directory(root_path)
|
|
532
429
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied) as e:
|
|
@@ -538,6 +435,16 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
538
435
|
sdk.files.delete_directory(root_path)
|
|
539
436
|
else:
|
|
540
437
|
pass
|
|
438
|
+
elif volume_name:
|
|
439
|
+
try:
|
|
440
|
+
sdk.volumes.delete(f"{catalog_name}.{schema_name}.{volume_name}")
|
|
441
|
+
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
442
|
+
pass
|
|
443
|
+
elif schema_name:
|
|
444
|
+
try:
|
|
445
|
+
sdk.schemas.delete(f"{catalog_name}.{schema_name}", force=True)
|
|
446
|
+
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
447
|
+
pass
|
|
541
448
|
|
|
542
449
|
self.clear_cache()
|
|
543
450
|
|
|
@@ -546,15 +453,15 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
546
453
|
for _ in self._ls_volume(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
|
|
547
454
|
yield _
|
|
548
455
|
elif self.kind == DatabricksPathKind.WORKSPACE:
|
|
549
|
-
for _ in self._ls_workspace(recursive=recursive,
|
|
456
|
+
for _ in self._ls_workspace(recursive=recursive, raise_error=raise_error):
|
|
550
457
|
yield _
|
|
551
458
|
elif self.kind == DatabricksPathKind.DBFS:
|
|
552
|
-
for _ in self._ls_dbfs(recursive=recursive,
|
|
459
|
+
for _ in self._ls_dbfs(recursive=recursive, raise_error=raise_error):
|
|
553
460
|
yield _
|
|
554
461
|
|
|
555
462
|
def _ls_volume(self, recursive: bool = False, fetch_size: int = None, raise_error: bool = True):
|
|
556
463
|
catalog_name, schema_name, volume_name, rel = self.volume_parts()
|
|
557
|
-
sdk = self.
|
|
464
|
+
sdk = self.safe_workspace.sdk()
|
|
558
465
|
|
|
559
466
|
if rel is None:
|
|
560
467
|
if volume_name is None:
|
|
@@ -564,10 +471,11 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
564
471
|
schema_name=schema_name,
|
|
565
472
|
):
|
|
566
473
|
base = DatabricksPath(
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
474
|
+
kind=DatabricksPathKind.VOLUME,
|
|
475
|
+
parts = [info.catalog_name, info.schema_name, info.name],
|
|
476
|
+
workspace=self.safe_workspace,
|
|
477
|
+
_is_file=False,
|
|
478
|
+
_is_dir=True,
|
|
571
479
|
)
|
|
572
480
|
|
|
573
481
|
if recursive:
|
|
@@ -582,10 +490,11 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
582
490
|
try:
|
|
583
491
|
for info in sdk.schemas.list(catalog_name=catalog_name):
|
|
584
492
|
base = DatabricksPath(
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
493
|
+
kind=DatabricksPathKind.VOLUME,
|
|
494
|
+
parts=[info.catalog_name, info.name],
|
|
495
|
+
workspace=self.safe_workspace,
|
|
496
|
+
_is_file=False,
|
|
497
|
+
_is_dir=True,
|
|
589
498
|
)
|
|
590
499
|
|
|
591
500
|
if recursive:
|
|
@@ -600,10 +509,11 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
600
509
|
try:
|
|
601
510
|
for info in sdk.catalogs.list():
|
|
602
511
|
base = DatabricksPath(
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
512
|
+
kind=DatabricksPathKind.VOLUME,
|
|
513
|
+
parts=[info.name],
|
|
514
|
+
workspace=self.safe_workspace,
|
|
515
|
+
_is_file=False,
|
|
516
|
+
_is_dir=True,
|
|
607
517
|
)
|
|
608
518
|
|
|
609
519
|
if recursive:
|
|
@@ -618,10 +528,11 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
618
528
|
try:
|
|
619
529
|
for info in sdk.files.list_directory_contents(self.as_files_api_path(), page_size=fetch_size):
|
|
620
530
|
base = DatabricksPath(
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
531
|
+
kind=DatabricksPathKind.VOLUME,
|
|
532
|
+
parts=info.path.split("/")[2:],
|
|
533
|
+
workspace=self.safe_workspace,
|
|
534
|
+
_is_file=not info.is_directory,
|
|
535
|
+
_is_dir=info.is_directory,
|
|
625
536
|
)
|
|
626
537
|
|
|
627
538
|
if recursive and info.is_directory:
|
|
@@ -633,36 +544,40 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
633
544
|
if raise_error:
|
|
634
545
|
raise
|
|
635
546
|
|
|
636
|
-
def _ls_workspace(self, recursive: bool = True,
|
|
637
|
-
sdk = self.
|
|
547
|
+
def _ls_workspace(self, recursive: bool = True, raise_error: bool = True):
|
|
548
|
+
sdk = self.safe_workspace.sdk()
|
|
638
549
|
|
|
639
550
|
try:
|
|
640
551
|
for info in sdk.workspace.list(self.as_workspace_api_path(), recursive=recursive):
|
|
641
552
|
is_dir = info.object_type in (ObjectType.DIRECTORY, ObjectType.REPO)
|
|
642
553
|
base = DatabricksPath(
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
554
|
+
kind=DatabricksPathKind.WORKSPACE,
|
|
555
|
+
parts=info.path.split("/")[2:],
|
|
556
|
+
workspace=self.safe_workspace,
|
|
557
|
+
_is_file=not is_dir,
|
|
558
|
+
_is_dir=is_dir,
|
|
647
559
|
)
|
|
648
560
|
yield base
|
|
649
561
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
650
562
|
if raise_error:
|
|
651
563
|
raise
|
|
652
564
|
|
|
653
|
-
def _ls_dbfs(self, recursive: bool = True,
|
|
654
|
-
sdk = self.
|
|
565
|
+
def _ls_dbfs(self, recursive: bool = True, raise_error: bool = True):
|
|
566
|
+
sdk = self.safe_workspace.sdk()
|
|
655
567
|
|
|
656
568
|
try:
|
|
657
569
|
# FIX: DBFS listing should use DBFS-normalized path, not workspace path
|
|
658
|
-
p =
|
|
570
|
+
p = self.as_dbfs_api_path()
|
|
571
|
+
|
|
659
572
|
for info in sdk.dbfs.list(p, recursive=recursive):
|
|
660
573
|
base = DatabricksPath(
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
574
|
+
kind=DatabricksPathKind.DBFS,
|
|
575
|
+
parts=info.path.split("/")[2:],
|
|
576
|
+
workspace=self.safe_workspace,
|
|
577
|
+
_is_file=not info.is_dir,
|
|
578
|
+
_is_dir=info.is_dir,
|
|
665
579
|
)
|
|
580
|
+
|
|
666
581
|
yield base
|
|
667
582
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
668
583
|
if raise_error:
|
|
@@ -672,13 +587,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
672
587
|
def open(
|
|
673
588
|
self,
|
|
674
589
|
mode="r",
|
|
675
|
-
buffering=-1,
|
|
676
590
|
encoding=None,
|
|
677
|
-
errors=None,
|
|
678
|
-
newline=None,
|
|
679
|
-
*,
|
|
680
|
-
workspace: Optional["Workspace"] = None,
|
|
681
|
-
overwrite: bool = True,
|
|
682
591
|
) -> Iterator[Union[BinaryIO, io.TextIOBase]]:
|
|
683
592
|
"""
|
|
684
593
|
Open this Databricks path using databricks-sdk's WorkspaceClient.
|
|
@@ -716,7 +625,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
716
625
|
|
|
717
626
|
@contextmanager
|
|
718
627
|
def _open_read_volume(self, encoding: str | None = None):
|
|
719
|
-
workspace_client = self.
|
|
628
|
+
workspace_client = self.safe_workspace.sdk()
|
|
720
629
|
path = self.as_files_api_path()
|
|
721
630
|
|
|
722
631
|
resp = workspace_client.files.download(path)
|
|
@@ -731,7 +640,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
731
640
|
|
|
732
641
|
@contextmanager
|
|
733
642
|
def _open_read_workspace(self, encoding: str | None = None):
|
|
734
|
-
workspace_client = self.
|
|
643
|
+
workspace_client = self.safe_workspace.sdk()
|
|
735
644
|
path = self.as_workspace_api_path()
|
|
736
645
|
|
|
737
646
|
raw = workspace_client.workspace.download(path) # returns BinaryIO
|
|
@@ -746,7 +655,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
746
655
|
|
|
747
656
|
@contextmanager
|
|
748
657
|
def _open_read_dbfs(self, encoding: str | None = None):
|
|
749
|
-
workspace_client = self.
|
|
658
|
+
workspace_client = self.safe_workspace.sdk()
|
|
750
659
|
path = self.as_dbfs_api_path()
|
|
751
660
|
|
|
752
661
|
raw = workspace_client.dbfs.open(path, read=True)
|
|
@@ -773,7 +682,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
773
682
|
|
|
774
683
|
@contextmanager
|
|
775
684
|
def _open_write_volume(self, encoding: str | None = None, overwrite: bool = True):
|
|
776
|
-
workspace_client = self.
|
|
685
|
+
workspace_client = self.safe_workspace.sdk()
|
|
777
686
|
path = self.as_files_api_path()
|
|
778
687
|
|
|
779
688
|
buf = io.BytesIO()
|
|
@@ -807,7 +716,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
807
716
|
|
|
808
717
|
@contextmanager
|
|
809
718
|
def _open_write_workspace(self, encoding: str | None = None, overwrite: bool = True):
|
|
810
|
-
workspace_client = self.
|
|
719
|
+
workspace_client = self.safe_workspace.sdk()
|
|
811
720
|
path = self.as_workspace_api_path()
|
|
812
721
|
|
|
813
722
|
buf = io.BytesIO()
|
|
@@ -859,7 +768,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
859
768
|
|
|
860
769
|
@contextmanager
|
|
861
770
|
def _open_write_dbfs(self, encoding: str | None = None, overwrite: bool = True):
|
|
862
|
-
workspace_client = self.
|
|
771
|
+
workspace_client = self.safe_workspace.sdk()
|
|
863
772
|
path = self.as_dbfs_api_path()
|
|
864
773
|
|
|
865
774
|
raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)
|