ygg 0.1.24__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.24 → ygg-0.1.26}/PKG-INFO +1 -1
- {ygg-0.1.24 → ygg-0.1.26}/pyproject.toml +1 -1
- {ygg-0.1.24 → ygg-0.1.26}/src/ygg.egg-info/PKG-INFO +1 -1
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/sql/engine.py +1 -2
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/workspaces/databricks_path.py +173 -97
- {ygg-0.1.24 → ygg-0.1.26}/LICENSE +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/README.md +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/setup.cfg +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/ygg.egg-info/SOURCES.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/ygg.egg-info/dependency_links.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/ygg.egg-info/entry_points.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/ygg.egg-info/requires.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/ygg.egg-info/top_level.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/compute/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/compute/cluster.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/compute/execution_context.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/compute/remote.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/jobs/config.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/sql/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/sql/statement_result.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/sql/types.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/databricks/workspaces/workspace.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/dataclasses/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/dataclasses/dataclass.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/databrickslib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/extensions/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/pandaslib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/polarslib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/libs/sparklib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/pyutils/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/pyutils/callable_serde.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/pyutils/exceptions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/pyutils/modules.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/pyutils/parallel.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/pyutils/python_env.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/pyutils/retry.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/requests/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/requests/msal.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/requests/session.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/cast_options.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/polars_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/registry.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/spark_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/libs.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/python_arrow.py +0 -0
- {ygg-0.1.24 → ygg-0.1.26}/src/yggdrasil/types/python_defaults.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ygg"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.26"
|
|
8
8
|
description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -439,8 +439,7 @@ FROM parquet.`{databricks_tmp_folder}`"""
|
|
|
439
439
|
try:
|
|
440
440
|
databricks_tmp_folder.rmdir(recursive=True)
|
|
441
441
|
except Exception as e:
|
|
442
|
-
|
|
443
|
-
logger.error(e)
|
|
442
|
+
logger.warning(e)
|
|
444
443
|
|
|
445
444
|
# Optionally run OPTIMIZE / ZORDER / VACUUM if requested (Databricks SQL)
|
|
446
445
|
if zorder_by:
|
|
@@ -7,7 +7,7 @@ import urllib.parse as urlparse
|
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from enum import Enum
|
|
9
9
|
from pathlib import PurePosixPath, Path as SysPath
|
|
10
|
-
from typing import BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
|
|
10
|
+
from typing import Any, BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
|
|
11
11
|
|
|
12
12
|
from databricks.sdk.service.catalog import VolumeType
|
|
13
13
|
|
|
@@ -15,7 +15,14 @@ from ...libs.databrickslib import databricks
|
|
|
15
15
|
|
|
16
16
|
if databricks is not None:
|
|
17
17
|
from databricks.sdk.service.workspace import ImportFormat, ObjectType
|
|
18
|
-
from databricks.sdk.errors.platform import
|
|
18
|
+
from databricks.sdk.errors.platform import (
|
|
19
|
+
NotFound,
|
|
20
|
+
ResourceDoesNotExist,
|
|
21
|
+
BadRequest,
|
|
22
|
+
PermissionDenied,
|
|
23
|
+
AlreadyExists,
|
|
24
|
+
ResourceAlreadyExists,
|
|
25
|
+
)
|
|
19
26
|
|
|
20
27
|
NOT_FOUND_ERRORS = NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied
|
|
21
28
|
ALREADY_EXISTS_ERRORS = AlreadyExists, ResourceAlreadyExists, BadRequest
|
|
@@ -26,7 +33,7 @@ if TYPE_CHECKING:
|
|
|
26
33
|
|
|
27
34
|
__all__ = [
|
|
28
35
|
"DatabricksPathKind",
|
|
29
|
-
"DatabricksPath"
|
|
36
|
+
"DatabricksPath",
|
|
30
37
|
]
|
|
31
38
|
|
|
32
39
|
|
|
@@ -43,7 +50,11 @@ class DatabricksPathKind(str, Enum):
|
|
|
43
50
|
DBFS = "dbfs"
|
|
44
51
|
|
|
45
52
|
@classmethod
|
|
46
|
-
def parse(
|
|
53
|
+
def parse(
|
|
54
|
+
cls,
|
|
55
|
+
path: str,
|
|
56
|
+
workspace: Optional["Workspace"] = None,
|
|
57
|
+
) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
|
|
47
58
|
from .workspace import Workspace
|
|
48
59
|
|
|
49
60
|
if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
|
|
@@ -52,13 +63,19 @@ class DatabricksPathKind(str, Enum):
|
|
|
52
63
|
path = path.replace("/Users/me", "/Users/%s" % workspace.current_user.user_name)
|
|
53
64
|
|
|
54
65
|
return cls.WORKSPACE, workspace, path
|
|
66
|
+
|
|
55
67
|
if path.startswith("/Volumes"):
|
|
56
68
|
return cls.VOLUME, workspace, path
|
|
57
69
|
|
|
58
70
|
if path.startswith("dbfs://"):
|
|
59
71
|
parsed = urlparse.urlparse(path)
|
|
60
|
-
|
|
61
|
-
|
|
72
|
+
|
|
73
|
+
# inner path is the URL path (e.g. /tmp/x or /Volumes/...)
|
|
74
|
+
kind, _, inner_path = cls.parse(parsed.path, workspace=workspace)
|
|
75
|
+
|
|
76
|
+
# hostname can be None for malformed/dbfs:// variants; fall back to default Workspace()
|
|
77
|
+
if workspace is None:
|
|
78
|
+
workspace = Workspace(host=parsed.hostname) if parsed.hostname else Workspace()
|
|
62
79
|
|
|
63
80
|
return kind, workspace, inner_path
|
|
64
81
|
|
|
@@ -66,7 +83,7 @@ class DatabricksPathKind(str, Enum):
|
|
|
66
83
|
|
|
67
84
|
|
|
68
85
|
class DatabricksPath(SysPath, PurePosixPath):
|
|
69
|
-
_kind: DatabricksPathKind
|
|
86
|
+
_kind: "DatabricksPathKind"
|
|
70
87
|
_workspace: Optional["Workspace"]
|
|
71
88
|
|
|
72
89
|
_is_file: Optional[bool]
|
|
@@ -75,48 +92,121 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
75
92
|
_raw_status: Optional[dict]
|
|
76
93
|
_raw_status_refresh_time: float
|
|
77
94
|
|
|
95
|
+
@staticmethod
|
|
96
|
+
def _join_segments(pathsegments: tuple[Any, ...]) -> str:
|
|
97
|
+
if not pathsegments:
|
|
98
|
+
return ""
|
|
99
|
+
|
|
100
|
+
first = _seg_to_str(pathsegments[0])
|
|
101
|
+
|
|
102
|
+
# Keep dbfs:// URL-ish paths URL-ish (don't let PurePosixPath normalize it)
|
|
103
|
+
if first.startswith("dbfs://"):
|
|
104
|
+
rest = (_seg_to_str(s).lstrip("/") for s in pathsegments[1:])
|
|
105
|
+
first = first.rstrip("/")
|
|
106
|
+
tail = "/".join(rest)
|
|
107
|
+
return f"{first}/{tail}" if tail else first
|
|
108
|
+
|
|
109
|
+
return str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
|
|
110
|
+
|
|
111
|
+
def _init(self, template: Optional["DatabricksPath"] = None) -> None:
|
|
112
|
+
"""
|
|
113
|
+
pathlib creates derived paths (parent, /, joinpath, with_name, etc.) via _from_parts
|
|
114
|
+
which bypasses __new__. _init(template=...) is the hook to carry our metadata forward.
|
|
115
|
+
"""
|
|
116
|
+
if isinstance(template, DatabricksPath):
|
|
117
|
+
# Recompute kind for the NEW path string (don’t blindly copy _kind)
|
|
118
|
+
temp_ws = getattr(template, "_workspace", None)
|
|
119
|
+
kind, ws, _ = DatabricksPathKind.parse(str(self), workspace=temp_ws)
|
|
120
|
+
|
|
121
|
+
self._kind = kind
|
|
122
|
+
self._workspace = ws if ws is not None else temp_ws
|
|
123
|
+
|
|
124
|
+
# Never inherit caches from template
|
|
125
|
+
self._is_file = None
|
|
126
|
+
self._is_dir = None
|
|
127
|
+
self._raw_status = None
|
|
128
|
+
self._raw_status_refresh_time = 0.0
|
|
129
|
+
else:
|
|
130
|
+
kind, ws, _ = DatabricksPathKind.parse(str(self))
|
|
131
|
+
self._kind = kind
|
|
132
|
+
self._workspace = ws
|
|
133
|
+
|
|
134
|
+
self._is_file = None
|
|
135
|
+
self._is_dir = None
|
|
136
|
+
self._raw_status = None
|
|
137
|
+
self._raw_status_refresh_time = 0.0
|
|
138
|
+
|
|
78
139
|
def __new__(
|
|
79
140
|
cls,
|
|
80
|
-
*pathsegments,
|
|
141
|
+
*pathsegments: Any,
|
|
81
142
|
workspace: Optional["Workspace"] = None,
|
|
82
143
|
is_file: Optional[bool] = None,
|
|
83
144
|
is_dir: Optional[bool] = None,
|
|
84
145
|
raw_status: Optional[dict] = None,
|
|
85
|
-
raw_status_refresh_time: float = 0
|
|
86
|
-
):
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
146
|
+
raw_status_refresh_time: float = 0.0,
|
|
147
|
+
) -> "DatabricksPath":
|
|
148
|
+
joined = cls._join_segments(pathsegments)
|
|
149
|
+
kind, parsed_ws, pure_path = DatabricksPathKind.parse(joined, workspace=workspace)
|
|
150
|
+
|
|
151
|
+
self = cls._from_parts([pure_path]) # pathlib-style construction (calls _init)
|
|
152
|
+
|
|
153
|
+
# Override with constructor-provided metadata
|
|
154
|
+
self._kind = kind
|
|
155
|
+
self._workspace = parsed_ws if workspace is None else workspace
|
|
156
|
+
self._is_file = is_file
|
|
157
|
+
self._is_dir = is_dir
|
|
158
|
+
self._raw_status = raw_status
|
|
159
|
+
self._raw_status_refresh_time = float(raw_status_refresh_time)
|
|
160
|
+
|
|
161
|
+
return self
|
|
100
162
|
|
|
101
|
-
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
*pathsegments: Any,
|
|
166
|
+
workspace: Optional["Workspace"] = None,
|
|
167
|
+
is_file: Optional[bool] = None,
|
|
168
|
+
is_dir: Optional[bool] = None,
|
|
169
|
+
raw_status: Optional[dict] = None,
|
|
170
|
+
raw_status_refresh_time: float = 0.0,
|
|
171
|
+
) -> None:
|
|
172
|
+
# pathlib paths are effectively immutable; all init happens in __new__ / _init
|
|
173
|
+
pass
|
|
102
174
|
|
|
103
|
-
|
|
175
|
+
def __truediv__(self, other):
|
|
176
|
+
if not other:
|
|
177
|
+
return self
|
|
104
178
|
|
|
105
|
-
|
|
106
|
-
obj._workspace = w if workspace is None else workspace
|
|
107
|
-
obj._is_file = is_file
|
|
108
|
-
obj._is_dir = is_dir
|
|
109
|
-
obj._raw_status = raw_status
|
|
110
|
-
obj._raw_status_refresh_time = raw_status_refresh_time
|
|
179
|
+
built = super().__truediv__(other)
|
|
111
180
|
|
|
112
|
-
|
|
181
|
+
built._kind = self._kind
|
|
182
|
+
built._workspace = self._workspace
|
|
183
|
+
|
|
184
|
+
built._is_file = None
|
|
185
|
+
built._is_dir = None
|
|
186
|
+
built._raw_status = None
|
|
187
|
+
built._raw_status_refresh_time = 0.0
|
|
188
|
+
|
|
189
|
+
return built
|
|
113
190
|
|
|
114
191
|
def __enter__(self):
|
|
115
192
|
self.workspace.__enter__()
|
|
116
193
|
return self
|
|
117
194
|
|
|
118
195
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
119
|
-
self.workspace.__exit__(exc_type, exc_val, exc_tb)
|
|
196
|
+
return self.workspace.__exit__(exc_type, exc_val, exc_tb)
|
|
197
|
+
|
|
198
|
+
def _from_parsed_parts(self, drv, root, parts):
|
|
199
|
+
built = super()._from_parsed_parts(drv, root, parts)
|
|
200
|
+
|
|
201
|
+
built._kind = self._kind
|
|
202
|
+
built._workspace = self._workspace
|
|
203
|
+
|
|
204
|
+
built._is_file = None
|
|
205
|
+
built._is_dir = None
|
|
206
|
+
built._raw_status = None
|
|
207
|
+
built._raw_status_refresh_time = 0.0
|
|
208
|
+
|
|
209
|
+
return built
|
|
120
210
|
|
|
121
211
|
@property
|
|
122
212
|
def workspace(self):
|
|
@@ -126,22 +216,30 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
126
216
|
self._workspace = Workspace()
|
|
127
217
|
return self._workspace
|
|
128
218
|
|
|
219
|
+
@workspace.setter
|
|
220
|
+
def workspace(self, value):
|
|
221
|
+
self._workspace = value
|
|
222
|
+
|
|
129
223
|
@property
|
|
130
224
|
def kind(self):
|
|
131
225
|
return self._kind
|
|
132
226
|
|
|
133
|
-
|
|
227
|
+
@kind.setter
|
|
228
|
+
def kind(self, value: DatabricksPathKind):
|
|
229
|
+
self._kind = value
|
|
230
|
+
|
|
231
|
+
def is_file(self, *, follow_symlinks=True):
|
|
134
232
|
if self._is_file is None:
|
|
135
233
|
self.refresh_status()
|
|
136
234
|
return self._is_file
|
|
137
235
|
|
|
138
|
-
def is_dir(self, *, follow_symlinks
|
|
236
|
+
def is_dir(self, *, follow_symlinks=True):
|
|
139
237
|
if self._is_dir is None:
|
|
140
238
|
self.refresh_status()
|
|
141
239
|
return self._is_dir
|
|
142
240
|
|
|
143
241
|
def volume_parts(self) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[PurePosixPath]]:
|
|
144
|
-
if self.
|
|
242
|
+
if self.kind != DatabricksPathKind.VOLUME:
|
|
145
243
|
return None, None, None, None
|
|
146
244
|
|
|
147
245
|
s = str(self)
|
|
@@ -169,12 +267,12 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
169
267
|
sdk = connected.workspace.sdk()
|
|
170
268
|
|
|
171
269
|
try:
|
|
172
|
-
if connected.
|
|
270
|
+
if connected.kind == DatabricksPathKind.VOLUME:
|
|
173
271
|
info = sdk.files.get_metadata(connected.as_files_api_path())
|
|
174
272
|
|
|
175
273
|
connected._raw_status = info
|
|
176
274
|
connected._is_file, connected._is_dir = True, False
|
|
177
|
-
elif connected.
|
|
275
|
+
elif connected.kind == DatabricksPathKind.WORKSPACE:
|
|
178
276
|
info = sdk.workspace.get_status(connected.as_workspace_api_path())
|
|
179
277
|
|
|
180
278
|
is_dir = info.object_type in (ObjectType.DIRECTORY, ObjectType.REPO)
|
|
@@ -184,7 +282,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
184
282
|
info = sdk.dbfs.get_status(connected.as_dbfs_api_path())
|
|
185
283
|
|
|
186
284
|
connected._raw_status = info
|
|
187
|
-
connected._is_file, connected._is_dir = not info.is_dir, info.is_dir
|
|
285
|
+
connected._is_file, connected._is_dir = (not info.is_dir), info.is_dir
|
|
188
286
|
|
|
189
287
|
connected._raw_status_refresh_time = time.time()
|
|
190
288
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
@@ -204,7 +302,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
204
302
|
self._is_file = None
|
|
205
303
|
self._is_dir = None
|
|
206
304
|
|
|
207
|
-
|
|
208
305
|
# ---- API path normalization helpers ----
|
|
209
306
|
|
|
210
307
|
def as_workspace_api_path(self) -> str:
|
|
@@ -243,7 +340,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
243
340
|
return True
|
|
244
341
|
return False
|
|
245
342
|
|
|
246
|
-
def mkdir(self, mode
|
|
343
|
+
def mkdir(self, mode=0o777, parents=True, exist_ok=True):
|
|
247
344
|
"""
|
|
248
345
|
Create a new directory at this given path.
|
|
249
346
|
"""
|
|
@@ -251,9 +348,9 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
251
348
|
connected.clear_cache()
|
|
252
349
|
|
|
253
350
|
try:
|
|
254
|
-
if connected.
|
|
351
|
+
if connected.kind == DatabricksPathKind.WORKSPACE:
|
|
255
352
|
connected.workspace.sdk().workspace.mkdirs(self.as_workspace_api_path())
|
|
256
|
-
elif connected.
|
|
353
|
+
elif connected.kind == DatabricksPathKind.VOLUME:
|
|
257
354
|
return connected._create_volume_dir(mode=mode, parents=parents, exist_ok=exist_ok)
|
|
258
355
|
elif connected._kind == DatabricksPathKind.DBFS:
|
|
259
356
|
connected.workspace.sdk().dbfs.mkdirs(self.as_dbfs_api_path())
|
|
@@ -266,8 +363,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
266
363
|
connected.parent.mkdir(parents=True, exist_ok=True)
|
|
267
364
|
connected.mkdir(mode, parents=False, exist_ok=exist_ok)
|
|
268
365
|
except (AlreadyExists, ResourceAlreadyExists):
|
|
269
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
270
|
-
# could give priority to other errors like EACCES or EROFS
|
|
271
366
|
if not exist_ok:
|
|
272
367
|
raise
|
|
273
368
|
|
|
@@ -279,8 +374,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
279
374
|
try:
|
|
280
375
|
sdk.catalogs.create(name=catalog_name)
|
|
281
376
|
except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
|
|
282
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
283
|
-
# could give priority to other errors like EACCES or EROFS
|
|
284
377
|
if not exist_ok:
|
|
285
378
|
raise
|
|
286
379
|
|
|
@@ -288,8 +381,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
288
381
|
try:
|
|
289
382
|
sdk.schemas.create(catalog_name=catalog_name, name=schema_name)
|
|
290
383
|
except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
|
|
291
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
292
|
-
# could give priority to other errors like EACCES or EROFS
|
|
293
384
|
if not exist_ok:
|
|
294
385
|
raise
|
|
295
386
|
|
|
@@ -299,15 +390,13 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
299
390
|
catalog_name=catalog_name,
|
|
300
391
|
schema_name=schema_name,
|
|
301
392
|
name=volume_name,
|
|
302
|
-
volume_type=VolumeType.MANAGED
|
|
393
|
+
volume_type=VolumeType.MANAGED,
|
|
303
394
|
)
|
|
304
395
|
except (AlreadyExists, ResourceAlreadyExists, BadRequest):
|
|
305
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
306
|
-
# could give priority to other errors like EACCES or EROFS
|
|
307
396
|
if not exist_ok:
|
|
308
397
|
raise
|
|
309
398
|
|
|
310
|
-
def _create_volume_dir(self, mode
|
|
399
|
+
def _create_volume_dir(self, mode=0o777, parents=True, exist_ok=True):
|
|
311
400
|
path = self.as_files_api_path()
|
|
312
401
|
sdk = self.workspace.sdk()
|
|
313
402
|
|
|
@@ -324,8 +413,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
324
413
|
|
|
325
414
|
sdk.files.create_directory(path)
|
|
326
415
|
except (AlreadyExists, ResourceAlreadyExists, BadRequest):
|
|
327
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
328
|
-
# could give priority to other errors like EACCES or EROFS
|
|
329
416
|
if not exist_ok:
|
|
330
417
|
raise
|
|
331
418
|
|
|
@@ -340,11 +427,11 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
340
427
|
|
|
341
428
|
def rmfile(self):
|
|
342
429
|
try:
|
|
343
|
-
if self.
|
|
430
|
+
if self.kind == DatabricksPathKind.VOLUME:
|
|
344
431
|
return self._remove_volume_file()
|
|
345
|
-
elif self.
|
|
432
|
+
elif self.kind == DatabricksPathKind.WORKSPACE:
|
|
346
433
|
return self._remove_workspace_file()
|
|
347
|
-
elif self.
|
|
434
|
+
elif self.kind == DatabricksPathKind.DBFS:
|
|
348
435
|
return self._remove_dbfs_file()
|
|
349
436
|
finally:
|
|
350
437
|
self.clear_cache()
|
|
@@ -376,17 +463,17 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
376
463
|
def rmdir(self, recursive: bool = True):
|
|
377
464
|
with self as connected:
|
|
378
465
|
try:
|
|
379
|
-
if connected.
|
|
466
|
+
if connected.kind == DatabricksPathKind.WORKSPACE:
|
|
380
467
|
connected.workspace.sdk().workspace.delete(
|
|
381
468
|
self.as_workspace_api_path(),
|
|
382
|
-
recursive=recursive
|
|
469
|
+
recursive=recursive,
|
|
383
470
|
)
|
|
384
|
-
elif connected.
|
|
471
|
+
elif connected.kind == DatabricksPathKind.VOLUME:
|
|
385
472
|
return self._remove_volume_dir(recursive=recursive)
|
|
386
473
|
else:
|
|
387
474
|
connected.workspace.sdk().dbfs.delete(
|
|
388
475
|
self.as_dbfs_api_path(),
|
|
389
|
-
recursive=recursive
|
|
476
|
+
recursive=recursive,
|
|
390
477
|
)
|
|
391
478
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
392
479
|
pass
|
|
@@ -425,13 +512,13 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
425
512
|
self.clear_cache()
|
|
426
513
|
|
|
427
514
|
def ls(self, recursive: bool = False, fetch_size: int = None, raise_error: bool = True):
|
|
428
|
-
if self.
|
|
515
|
+
if self.kind == DatabricksPathKind.VOLUME:
|
|
429
516
|
for _ in self._ls_volume(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
|
|
430
517
|
yield _
|
|
431
|
-
elif self.
|
|
518
|
+
elif self.kind == DatabricksPathKind.WORKSPACE:
|
|
432
519
|
for _ in self._ls_workspace(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
|
|
433
520
|
yield _
|
|
434
|
-
elif self.
|
|
521
|
+
elif self.kind == DatabricksPathKind.DBFS:
|
|
435
522
|
for _ in self._ls_dbfs(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
|
|
436
523
|
yield _
|
|
437
524
|
|
|
@@ -444,13 +531,13 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
444
531
|
try:
|
|
445
532
|
for info in sdk.volumes.list(
|
|
446
533
|
catalog_name=catalog_name,
|
|
447
|
-
schema_name=schema_name
|
|
534
|
+
schema_name=schema_name,
|
|
448
535
|
):
|
|
449
536
|
base = DatabricksPath(
|
|
450
537
|
f"/Volumes/{info.catalog_name}/{info.schema_name}/{info.name}",
|
|
451
538
|
workspace=self.workspace,
|
|
452
539
|
is_file=False,
|
|
453
|
-
is_dir=True
|
|
540
|
+
is_dir=True,
|
|
454
541
|
)
|
|
455
542
|
|
|
456
543
|
if recursive:
|
|
@@ -463,12 +550,12 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
463
550
|
raise
|
|
464
551
|
elif schema_name is None:
|
|
465
552
|
try:
|
|
466
|
-
for info in sdk.schemas.list(catalog_name=catalog_name
|
|
553
|
+
for info in sdk.schemas.list(catalog_name=catalog_name):
|
|
467
554
|
base = DatabricksPath(
|
|
468
555
|
f"/Volumes/{info.catalog_name}/{info.name}",
|
|
469
556
|
workspace=self.workspace,
|
|
470
557
|
is_file=False,
|
|
471
|
-
is_dir=True
|
|
558
|
+
is_dir=True,
|
|
472
559
|
)
|
|
473
560
|
|
|
474
561
|
if recursive:
|
|
@@ -486,7 +573,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
486
573
|
f"/Volumes/{info.name}",
|
|
487
574
|
workspace=self.workspace,
|
|
488
575
|
is_file=False,
|
|
489
|
-
is_dir=True
|
|
576
|
+
is_dir=True,
|
|
490
577
|
)
|
|
491
578
|
|
|
492
579
|
if recursive:
|
|
@@ -504,7 +591,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
504
591
|
info.path,
|
|
505
592
|
workspace=self.workspace,
|
|
506
593
|
is_file=not info.is_directory,
|
|
507
|
-
is_dir=info.is_directory
|
|
594
|
+
is_dir=info.is_directory,
|
|
508
595
|
)
|
|
509
596
|
|
|
510
597
|
if recursive and info.is_directory:
|
|
@@ -526,9 +613,8 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
526
613
|
info.path,
|
|
527
614
|
workspace=self.workspace,
|
|
528
615
|
is_file=not is_dir,
|
|
529
|
-
is_dir=is_dir
|
|
616
|
+
is_dir=is_dir,
|
|
530
617
|
)
|
|
531
|
-
|
|
532
618
|
yield base
|
|
533
619
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
534
620
|
if raise_error:
|
|
@@ -538,14 +624,15 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
538
624
|
sdk = self.workspace.sdk()
|
|
539
625
|
|
|
540
626
|
try:
|
|
541
|
-
|
|
627
|
+
# FIX: DBFS listing should use DBFS-normalized path, not workspace path
|
|
628
|
+
p = "/dbfs/" + self.as_dbfs_api_path() + "/"
|
|
629
|
+
for info in sdk.dbfs.list(p, recursive=recursive):
|
|
542
630
|
base = DatabricksPath(
|
|
543
631
|
info.path,
|
|
544
632
|
workspace=self.workspace,
|
|
545
633
|
is_file=not info.is_dir,
|
|
546
|
-
is_dir=info.is_dir
|
|
634
|
+
is_dir=info.is_dir,
|
|
547
635
|
)
|
|
548
|
-
|
|
549
636
|
yield base
|
|
550
637
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
551
638
|
if raise_error:
|
|
@@ -554,7 +641,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
554
641
|
@contextmanager
|
|
555
642
|
def open(
|
|
556
643
|
self,
|
|
557
|
-
mode=
|
|
644
|
+
mode="r",
|
|
558
645
|
buffering=-1,
|
|
559
646
|
encoding=None,
|
|
560
647
|
errors=None,
|
|
@@ -569,11 +656,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
569
656
|
Supported:
|
|
570
657
|
- read: "rb", "r"
|
|
571
658
|
- write: "wb", "w" (buffered; uploads on close for WORKSPACE/VOLUME)
|
|
572
|
-
|
|
573
|
-
Notes:
|
|
574
|
-
- VOLUME: uses w.files.download/upload (Files API). :contentReference[oaicite:5]{index=5}
|
|
575
|
-
- DBFS: uses w.dbfs.open when possible. :contentReference[oaicite:6]{index=6}
|
|
576
|
-
- WORKSPACE: uses w.workspace.download/upload. :contentReference[oaicite:7]{index=7}
|
|
577
659
|
"""
|
|
578
660
|
if mode not in {"rb", "r", "wb", "w"}:
|
|
579
661
|
raise ValueError(f"Unsupported mode {mode!r}. Use r/rb/w/wb.")
|
|
@@ -592,10 +674,10 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
592
674
|
@contextmanager
|
|
593
675
|
def open_read(self, encoding: str | None = None):
|
|
594
676
|
with self as connected:
|
|
595
|
-
if connected.
|
|
677
|
+
if connected.kind == DatabricksPathKind.VOLUME:
|
|
596
678
|
with connected._open_read_volume(encoding=encoding) as f:
|
|
597
679
|
yield f
|
|
598
|
-
elif connected.
|
|
680
|
+
elif connected.kind == DatabricksPathKind.WORKSPACE:
|
|
599
681
|
with connected._open_read_workspace(encoding=encoding) as f:
|
|
600
682
|
yield f
|
|
601
683
|
else:
|
|
@@ -607,7 +689,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
607
689
|
workspace_client = self.workspace.sdk()
|
|
608
690
|
path = self.as_files_api_path()
|
|
609
691
|
|
|
610
|
-
# Files.download returns a stream-like response body. :contentReference[oaicite:8]{index=8}
|
|
611
692
|
resp = workspace_client.files.download(path)
|
|
612
693
|
raw = io.BytesIO(resp.contents.read())
|
|
613
694
|
|
|
@@ -623,12 +704,10 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
623
704
|
workspace_client = self.workspace.sdk()
|
|
624
705
|
path = self.as_workspace_api_path()
|
|
625
706
|
|
|
626
|
-
|
|
627
|
-
raw = workspace_client.workspace.download(path) # returns BinaryIO :contentReference[oaicite:10]{index=10}
|
|
707
|
+
raw = workspace_client.workspace.download(path) # returns BinaryIO
|
|
628
708
|
|
|
629
709
|
if encoding is not None:
|
|
630
710
|
raw = io.BytesIO(raw.read())
|
|
631
|
-
|
|
632
711
|
with io.TextIOWrapper(raw, encoding=encoding) as f:
|
|
633
712
|
yield f
|
|
634
713
|
else:
|
|
@@ -640,7 +719,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
640
719
|
workspace_client = self.workspace.sdk()
|
|
641
720
|
path = self.as_dbfs_api_path()
|
|
642
721
|
|
|
643
|
-
# dbfs.open gives BinaryIO for streaming reads :contentReference[oaicite:12]{index=12}
|
|
644
722
|
raw = workspace_client.dbfs.open(path, read=True)
|
|
645
723
|
|
|
646
724
|
if encoding is not None:
|
|
@@ -653,10 +731,10 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
653
731
|
@contextmanager
|
|
654
732
|
def open_write(self, encoding: str | None = None):
|
|
655
733
|
with self as connected:
|
|
656
|
-
if connected.
|
|
734
|
+
if connected.kind == DatabricksPathKind.VOLUME:
|
|
657
735
|
with connected._open_write_volume(encoding=encoding) as f:
|
|
658
736
|
yield f
|
|
659
|
-
elif connected.
|
|
737
|
+
elif connected.kind == DatabricksPathKind.WORKSPACE:
|
|
660
738
|
with connected._open_write_workspace(encoding=encoding) as f:
|
|
661
739
|
yield f
|
|
662
740
|
else:
|
|
@@ -668,7 +746,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
668
746
|
workspace_client = self.workspace.sdk()
|
|
669
747
|
path = self.as_files_api_path()
|
|
670
748
|
|
|
671
|
-
# Buffer locally then upload stream on exit. :contentReference[oaicite:9]{index=9}
|
|
672
749
|
buf = io.BytesIO()
|
|
673
750
|
|
|
674
751
|
if encoding is not None:
|
|
@@ -703,7 +780,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
703
780
|
workspace_client = self.workspace.sdk()
|
|
704
781
|
path = self.as_workspace_api_path()
|
|
705
782
|
|
|
706
|
-
# Buffer then upload (AUTO works for workspace files) :contentReference[oaicite:11]{index=11}
|
|
707
783
|
buf = io.BytesIO()
|
|
708
784
|
|
|
709
785
|
if encoding is not None:
|
|
@@ -727,7 +803,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
727
803
|
path, buf, format=ImportFormat.AUTO, overwrite=overwrite
|
|
728
804
|
)
|
|
729
805
|
else:
|
|
730
|
-
raise
|
|
806
|
+
raise
|
|
731
807
|
|
|
732
808
|
tw.detach()
|
|
733
809
|
else:
|
|
@@ -749,14 +825,14 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
749
825
|
path, buf, format=ImportFormat.AUTO, overwrite=overwrite
|
|
750
826
|
)
|
|
751
827
|
else:
|
|
752
|
-
raise
|
|
828
|
+
raise
|
|
753
829
|
|
|
754
830
|
@contextmanager
|
|
755
831
|
def _open_write_dbfs(self, encoding: str | None = None, overwrite: bool = True):
|
|
756
832
|
workspace_client = self.workspace.sdk()
|
|
757
833
|
path = self.as_dbfs_api_path()
|
|
758
834
|
|
|
759
|
-
raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)
|
|
835
|
+
raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)
|
|
760
836
|
|
|
761
837
|
if encoding is not None:
|
|
762
838
|
with io.TextIOWrapper(raw, encoding=encoding) as f:
|
|
@@ -766,4 +842,4 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
766
842
|
yield f
|
|
767
843
|
|
|
768
844
|
self.clear_cache()
|
|
769
|
-
self._is_file, self._is_dir = True, False
|
|
845
|
+
self._is_file, self._is_dir = True, False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|