ygg 0.1.24__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.24 → ygg-0.1.25}/PKG-INFO +1 -1
- {ygg-0.1.24 → ygg-0.1.25}/pyproject.toml +1 -1
- {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/PKG-INFO +1 -1
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/workspaces/databricks_path.py +156 -100
- {ygg-0.1.24 → ygg-0.1.25}/LICENSE +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/README.md +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/setup.cfg +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/SOURCES.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/dependency_links.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/entry_points.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/requires.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/top_level.txt +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/cluster.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/execution_context.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/remote.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/jobs/config.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/engine.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/statement_result.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/types.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/workspaces/workspace.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/dataclasses/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/dataclasses/dataclass.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/databrickslib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/extensions/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/pandaslib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/polarslib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/sparklib.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/callable_serde.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/exceptions.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/modules.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/parallel.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/python_env.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/retry.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/requests/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/requests/msal.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/requests/session.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/__init__.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/cast_options.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/polars_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/registry.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/spark_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/libs.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/python_arrow.py +0 -0
- {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/python_defaults.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ygg"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.25"
|
|
8
8
|
description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -7,7 +7,7 @@ import urllib.parse as urlparse
|
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from enum import Enum
|
|
9
9
|
from pathlib import PurePosixPath, Path as SysPath
|
|
10
|
-
from typing import BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
|
|
10
|
+
from typing import Any, BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
|
|
11
11
|
|
|
12
12
|
from databricks.sdk.service.catalog import VolumeType
|
|
13
13
|
|
|
@@ -15,7 +15,14 @@ from ...libs.databrickslib import databricks
|
|
|
15
15
|
|
|
16
16
|
if databricks is not None:
|
|
17
17
|
from databricks.sdk.service.workspace import ImportFormat, ObjectType
|
|
18
|
-
from databricks.sdk.errors.platform import
|
|
18
|
+
from databricks.sdk.errors.platform import (
|
|
19
|
+
NotFound,
|
|
20
|
+
ResourceDoesNotExist,
|
|
21
|
+
BadRequest,
|
|
22
|
+
PermissionDenied,
|
|
23
|
+
AlreadyExists,
|
|
24
|
+
ResourceAlreadyExists,
|
|
25
|
+
)
|
|
19
26
|
|
|
20
27
|
NOT_FOUND_ERRORS = NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied
|
|
21
28
|
ALREADY_EXISTS_ERRORS = AlreadyExists, ResourceAlreadyExists, BadRequest
|
|
@@ -26,7 +33,7 @@ if TYPE_CHECKING:
|
|
|
26
33
|
|
|
27
34
|
__all__ = [
|
|
28
35
|
"DatabricksPathKind",
|
|
29
|
-
"DatabricksPath"
|
|
36
|
+
"DatabricksPath",
|
|
30
37
|
]
|
|
31
38
|
|
|
32
39
|
|
|
@@ -43,7 +50,11 @@ class DatabricksPathKind(str, Enum):
|
|
|
43
50
|
DBFS = "dbfs"
|
|
44
51
|
|
|
45
52
|
@classmethod
|
|
46
|
-
def parse(
|
|
53
|
+
def parse(
|
|
54
|
+
cls,
|
|
55
|
+
path: str,
|
|
56
|
+
workspace: Optional["Workspace"] = None,
|
|
57
|
+
) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
|
|
47
58
|
from .workspace import Workspace
|
|
48
59
|
|
|
49
60
|
if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
|
|
@@ -52,13 +63,19 @@ class DatabricksPathKind(str, Enum):
|
|
|
52
63
|
path = path.replace("/Users/me", "/Users/%s" % workspace.current_user.user_name)
|
|
53
64
|
|
|
54
65
|
return cls.WORKSPACE, workspace, path
|
|
66
|
+
|
|
55
67
|
if path.startswith("/Volumes"):
|
|
56
68
|
return cls.VOLUME, workspace, path
|
|
57
69
|
|
|
58
70
|
if path.startswith("dbfs://"):
|
|
59
71
|
parsed = urlparse.urlparse(path)
|
|
60
|
-
|
|
61
|
-
|
|
72
|
+
|
|
73
|
+
# inner path is the URL path (e.g. /tmp/x or /Volumes/...)
|
|
74
|
+
kind, _, inner_path = cls.parse(parsed.path, workspace=workspace)
|
|
75
|
+
|
|
76
|
+
# hostname can be None for malformed/dbfs:// variants; fall back to default Workspace()
|
|
77
|
+
if workspace is None:
|
|
78
|
+
workspace = Workspace(host=parsed.hostname) if parsed.hostname else Workspace()
|
|
62
79
|
|
|
63
80
|
return kind, workspace, inner_path
|
|
64
81
|
|
|
@@ -66,7 +83,7 @@ class DatabricksPathKind(str, Enum):
|
|
|
66
83
|
|
|
67
84
|
|
|
68
85
|
class DatabricksPath(SysPath, PurePosixPath):
|
|
69
|
-
_kind: DatabricksPathKind
|
|
86
|
+
_kind: "DatabricksPathKind"
|
|
70
87
|
_workspace: Optional["Workspace"]
|
|
71
88
|
|
|
72
89
|
_is_file: Optional[bool]
|
|
@@ -75,73 +92,135 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
75
92
|
_raw_status: Optional[dict]
|
|
76
93
|
_raw_status_refresh_time: float
|
|
77
94
|
|
|
95
|
+
@staticmethod
|
|
96
|
+
def _join_segments(pathsegments: tuple[Any, ...]) -> str:
|
|
97
|
+
if not pathsegments:
|
|
98
|
+
return ""
|
|
99
|
+
|
|
100
|
+
first = _seg_to_str(pathsegments[0])
|
|
101
|
+
|
|
102
|
+
# Keep dbfs:// URL-ish paths URL-ish (don't let PurePosixPath normalize it)
|
|
103
|
+
if first.startswith("dbfs://"):
|
|
104
|
+
rest = (_seg_to_str(s).lstrip("/") for s in pathsegments[1:])
|
|
105
|
+
first = first.rstrip("/")
|
|
106
|
+
tail = "/".join(rest)
|
|
107
|
+
return f"{first}/{tail}" if tail else first
|
|
108
|
+
|
|
109
|
+
return str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
|
|
110
|
+
|
|
111
|
+
def _init(self, template: Optional["DatabricksPath"] = None) -> None:
|
|
112
|
+
"""
|
|
113
|
+
pathlib creates derived paths (parent, /, joinpath, with_name, etc.) via _from_parts
|
|
114
|
+
which bypasses __new__. _init(template=...) is the hook to carry our metadata forward.
|
|
115
|
+
"""
|
|
116
|
+
if isinstance(template, DatabricksPath):
|
|
117
|
+
# Recompute kind for the NEW path string (don’t blindly copy _kind)
|
|
118
|
+
kind, ws, _ = DatabricksPathKind.parse(str(self), workspace=getattr(template, "_workspace", None))
|
|
119
|
+
|
|
120
|
+
self._kind = kind
|
|
121
|
+
self._workspace = ws if ws is not None else getattr(template, "_workspace", None)
|
|
122
|
+
|
|
123
|
+
# Never inherit caches from template
|
|
124
|
+
self._is_file = None
|
|
125
|
+
self._is_dir = None
|
|
126
|
+
self._raw_status = None
|
|
127
|
+
self._raw_status_refresh_time = 0.0
|
|
128
|
+
else:
|
|
129
|
+
kind, ws, _ = DatabricksPathKind.parse(str(self))
|
|
130
|
+
self._kind = kind
|
|
131
|
+
self._workspace = ws
|
|
132
|
+
|
|
133
|
+
self._is_file = None
|
|
134
|
+
self._is_dir = None
|
|
135
|
+
self._raw_status = None
|
|
136
|
+
self._raw_status_refresh_time = 0.0
|
|
137
|
+
|
|
78
138
|
def __new__(
|
|
79
139
|
cls,
|
|
80
|
-
*pathsegments,
|
|
140
|
+
*pathsegments: Any,
|
|
81
141
|
workspace: Optional["Workspace"] = None,
|
|
82
142
|
is_file: Optional[bool] = None,
|
|
83
143
|
is_dir: Optional[bool] = None,
|
|
84
144
|
raw_status: Optional[dict] = None,
|
|
85
|
-
raw_status_refresh_time: float = 0
|
|
86
|
-
):
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
else:
|
|
90
|
-
first = _seg_to_str(pathsegments[0])
|
|
91
|
-
|
|
92
|
-
# Special case: if someone passes a dbfs://... URL segment, keep it URL-like
|
|
93
|
-
if first.startswith("dbfs://"):
|
|
94
|
-
rest = [_seg_to_str(s).lstrip("/") for s in pathsegments[1:]]
|
|
95
|
-
joined = first.rstrip("/")
|
|
96
|
-
if rest:
|
|
97
|
-
joined += "/" + "/".join(rest)
|
|
98
|
-
else:
|
|
99
|
-
joined = str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
|
|
145
|
+
raw_status_refresh_time: float = 0.0,
|
|
146
|
+
) -> "DatabricksPath":
|
|
147
|
+
joined = cls._join_segments(pathsegments)
|
|
148
|
+
kind, parsed_ws, pure_path = DatabricksPathKind.parse(joined, workspace=workspace)
|
|
100
149
|
|
|
101
|
-
|
|
150
|
+
self = cls._from_parts([pure_path]) # pathlib-style construction (calls _init)
|
|
102
151
|
|
|
103
|
-
|
|
152
|
+
# Override with constructor-provided metadata
|
|
153
|
+
self._kind = kind
|
|
154
|
+
self._workspace = parsed_ws if workspace is None else workspace
|
|
155
|
+
self._is_file = is_file
|
|
156
|
+
self._is_dir = is_dir
|
|
157
|
+
self._raw_status = raw_status
|
|
158
|
+
self._raw_status_refresh_time = float(raw_status_refresh_time)
|
|
104
159
|
|
|
105
|
-
|
|
106
|
-
obj._workspace = w if workspace is None else workspace
|
|
107
|
-
obj._is_file = is_file
|
|
108
|
-
obj._is_dir = is_dir
|
|
109
|
-
obj._raw_status = raw_status
|
|
110
|
-
obj._raw_status_refresh_time = raw_status_refresh_time
|
|
160
|
+
return self
|
|
111
161
|
|
|
112
|
-
|
|
162
|
+
def __init__(
|
|
163
|
+
self,
|
|
164
|
+
*pathsegments: Any,
|
|
165
|
+
workspace: Optional["Workspace"] = None,
|
|
166
|
+
is_file: Optional[bool] = None,
|
|
167
|
+
is_dir: Optional[bool] = None,
|
|
168
|
+
raw_status: Optional[dict] = None,
|
|
169
|
+
raw_status_refresh_time: float = 0.0,
|
|
170
|
+
) -> None:
|
|
171
|
+
# pathlib paths are effectively immutable; all init happens in __new__ / _init
|
|
172
|
+
pass
|
|
113
173
|
|
|
114
174
|
def __enter__(self):
|
|
115
175
|
self.workspace.__enter__()
|
|
116
176
|
return self
|
|
117
177
|
|
|
118
178
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
119
|
-
self.workspace.__exit__(exc_type, exc_val, exc_tb)
|
|
179
|
+
return self.workspace.__exit__(exc_type, exc_val, exc_tb)
|
|
120
180
|
|
|
121
181
|
@property
|
|
122
182
|
def workspace(self):
|
|
123
|
-
|
|
183
|
+
try:
|
|
184
|
+
if self._workspace is None:
|
|
185
|
+
from .workspace import Workspace
|
|
186
|
+
|
|
187
|
+
self._workspace = Workspace()
|
|
188
|
+
except AttributeError:
|
|
189
|
+
self._init(template=self)
|
|
190
|
+
|
|
124
191
|
from .workspace import Workspace
|
|
125
192
|
|
|
126
|
-
self._workspace
|
|
193
|
+
if self._workspace is None:
|
|
194
|
+
from .workspace import Workspace
|
|
195
|
+
|
|
196
|
+
self._workspace = Workspace()
|
|
197
|
+
|
|
127
198
|
return self._workspace
|
|
128
199
|
|
|
129
200
|
@property
|
|
130
201
|
def kind(self):
|
|
131
|
-
|
|
202
|
+
try:
|
|
203
|
+
return self._kind
|
|
204
|
+
except AttributeError:
|
|
205
|
+
self._init(template=self)
|
|
206
|
+
return self._kind
|
|
207
|
+
|
|
208
|
+
@kind.setter
|
|
209
|
+
def kind(self, value: DatabricksPathKind):
|
|
210
|
+
self._kind = value
|
|
132
211
|
|
|
133
|
-
def is_file(self, *, follow_symlinks
|
|
212
|
+
def is_file(self, *, follow_symlinks=True):
|
|
134
213
|
if self._is_file is None:
|
|
135
214
|
self.refresh_status()
|
|
136
215
|
return self._is_file
|
|
137
216
|
|
|
138
|
-
def is_dir(self, *, follow_symlinks
|
|
217
|
+
def is_dir(self, *, follow_symlinks=True):
|
|
139
218
|
if self._is_dir is None:
|
|
140
219
|
self.refresh_status()
|
|
141
220
|
return self._is_dir
|
|
142
221
|
|
|
143
222
|
def volume_parts(self) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[PurePosixPath]]:
|
|
144
|
-
if self.
|
|
223
|
+
if self.kind != DatabricksPathKind.VOLUME:
|
|
145
224
|
return None, None, None, None
|
|
146
225
|
|
|
147
226
|
s = str(self)
|
|
@@ -169,12 +248,12 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
169
248
|
sdk = connected.workspace.sdk()
|
|
170
249
|
|
|
171
250
|
try:
|
|
172
|
-
if connected.
|
|
251
|
+
if connected.kind == DatabricksPathKind.VOLUME:
|
|
173
252
|
info = sdk.files.get_metadata(connected.as_files_api_path())
|
|
174
253
|
|
|
175
254
|
connected._raw_status = info
|
|
176
255
|
connected._is_file, connected._is_dir = True, False
|
|
177
|
-
elif connected.
|
|
256
|
+
elif connected.kind == DatabricksPathKind.WORKSPACE:
|
|
178
257
|
info = sdk.workspace.get_status(connected.as_workspace_api_path())
|
|
179
258
|
|
|
180
259
|
is_dir = info.object_type in (ObjectType.DIRECTORY, ObjectType.REPO)
|
|
@@ -184,7 +263,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
184
263
|
info = sdk.dbfs.get_status(connected.as_dbfs_api_path())
|
|
185
264
|
|
|
186
265
|
connected._raw_status = info
|
|
187
|
-
connected._is_file, connected._is_dir = not info.is_dir, info.is_dir
|
|
266
|
+
connected._is_file, connected._is_dir = (not info.is_dir), info.is_dir
|
|
188
267
|
|
|
189
268
|
connected._raw_status_refresh_time = time.time()
|
|
190
269
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
@@ -204,7 +283,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
204
283
|
self._is_file = None
|
|
205
284
|
self._is_dir = None
|
|
206
285
|
|
|
207
|
-
|
|
208
286
|
# ---- API path normalization helpers ----
|
|
209
287
|
|
|
210
288
|
def as_workspace_api_path(self) -> str:
|
|
@@ -243,7 +321,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
243
321
|
return True
|
|
244
322
|
return False
|
|
245
323
|
|
|
246
|
-
def mkdir(self, mode
|
|
324
|
+
def mkdir(self, mode=0o777, parents=True, exist_ok=True):
|
|
247
325
|
"""
|
|
248
326
|
Create a new directory at this given path.
|
|
249
327
|
"""
|
|
@@ -251,9 +329,9 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
251
329
|
connected.clear_cache()
|
|
252
330
|
|
|
253
331
|
try:
|
|
254
|
-
if connected.
|
|
332
|
+
if connected.kind == DatabricksPathKind.WORKSPACE:
|
|
255
333
|
connected.workspace.sdk().workspace.mkdirs(self.as_workspace_api_path())
|
|
256
|
-
elif connected.
|
|
334
|
+
elif connected.kind == DatabricksPathKind.VOLUME:
|
|
257
335
|
return connected._create_volume_dir(mode=mode, parents=parents, exist_ok=exist_ok)
|
|
258
336
|
elif connected._kind == DatabricksPathKind.DBFS:
|
|
259
337
|
connected.workspace.sdk().dbfs.mkdirs(self.as_dbfs_api_path())
|
|
@@ -266,8 +344,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
266
344
|
connected.parent.mkdir(parents=True, exist_ok=True)
|
|
267
345
|
connected.mkdir(mode, parents=False, exist_ok=exist_ok)
|
|
268
346
|
except (AlreadyExists, ResourceAlreadyExists):
|
|
269
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
270
|
-
# could give priority to other errors like EACCES or EROFS
|
|
271
347
|
if not exist_ok:
|
|
272
348
|
raise
|
|
273
349
|
|
|
@@ -279,8 +355,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
279
355
|
try:
|
|
280
356
|
sdk.catalogs.create(name=catalog_name)
|
|
281
357
|
except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
|
|
282
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
283
|
-
# could give priority to other errors like EACCES or EROFS
|
|
284
358
|
if not exist_ok:
|
|
285
359
|
raise
|
|
286
360
|
|
|
@@ -288,8 +362,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
288
362
|
try:
|
|
289
363
|
sdk.schemas.create(catalog_name=catalog_name, name=schema_name)
|
|
290
364
|
except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
|
|
291
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
292
|
-
# could give priority to other errors like EACCES or EROFS
|
|
293
365
|
if not exist_ok:
|
|
294
366
|
raise
|
|
295
367
|
|
|
@@ -299,15 +371,13 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
299
371
|
catalog_name=catalog_name,
|
|
300
372
|
schema_name=schema_name,
|
|
301
373
|
name=volume_name,
|
|
302
|
-
volume_type=VolumeType.MANAGED
|
|
374
|
+
volume_type=VolumeType.MANAGED,
|
|
303
375
|
)
|
|
304
376
|
except (AlreadyExists, ResourceAlreadyExists, BadRequest):
|
|
305
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
306
|
-
# could give priority to other errors like EACCES or EROFS
|
|
307
377
|
if not exist_ok:
|
|
308
378
|
raise
|
|
309
379
|
|
|
310
|
-
def _create_volume_dir(self, mode
|
|
380
|
+
def _create_volume_dir(self, mode=0o777, parents=True, exist_ok=True):
|
|
311
381
|
path = self.as_files_api_path()
|
|
312
382
|
sdk = self.workspace.sdk()
|
|
313
383
|
|
|
@@ -324,8 +394,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
324
394
|
|
|
325
395
|
sdk.files.create_directory(path)
|
|
326
396
|
except (AlreadyExists, ResourceAlreadyExists, BadRequest):
|
|
327
|
-
# Cannot rely on checking for EEXIST, since the operating system
|
|
328
|
-
# could give priority to other errors like EACCES or EROFS
|
|
329
397
|
if not exist_ok:
|
|
330
398
|
raise
|
|
331
399
|
|
|
@@ -340,11 +408,11 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
340
408
|
|
|
341
409
|
def rmfile(self):
|
|
342
410
|
try:
|
|
343
|
-
if self.
|
|
411
|
+
if self.kind == DatabricksPathKind.VOLUME:
|
|
344
412
|
return self._remove_volume_file()
|
|
345
|
-
elif self.
|
|
413
|
+
elif self.kind == DatabricksPathKind.WORKSPACE:
|
|
346
414
|
return self._remove_workspace_file()
|
|
347
|
-
elif self.
|
|
415
|
+
elif self.kind == DatabricksPathKind.DBFS:
|
|
348
416
|
return self._remove_dbfs_file()
|
|
349
417
|
finally:
|
|
350
418
|
self.clear_cache()
|
|
@@ -376,17 +444,17 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
376
444
|
def rmdir(self, recursive: bool = True):
|
|
377
445
|
with self as connected:
|
|
378
446
|
try:
|
|
379
|
-
if connected.
|
|
447
|
+
if connected.kind == DatabricksPathKind.WORKSPACE:
|
|
380
448
|
connected.workspace.sdk().workspace.delete(
|
|
381
449
|
self.as_workspace_api_path(),
|
|
382
|
-
recursive=recursive
|
|
450
|
+
recursive=recursive,
|
|
383
451
|
)
|
|
384
|
-
elif connected.
|
|
452
|
+
elif connected.kind == DatabricksPathKind.VOLUME:
|
|
385
453
|
return self._remove_volume_dir(recursive=recursive)
|
|
386
454
|
else:
|
|
387
455
|
connected.workspace.sdk().dbfs.delete(
|
|
388
456
|
self.as_dbfs_api_path(),
|
|
389
|
-
recursive=recursive
|
|
457
|
+
recursive=recursive,
|
|
390
458
|
)
|
|
391
459
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
392
460
|
pass
|
|
@@ -425,13 +493,13 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
425
493
|
self.clear_cache()
|
|
426
494
|
|
|
427
495
|
def ls(self, recursive: bool = False, fetch_size: int = None, raise_error: bool = True):
|
|
428
|
-
if self.
|
|
496
|
+
if self.kind == DatabricksPathKind.VOLUME:
|
|
429
497
|
for _ in self._ls_volume(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
|
|
430
498
|
yield _
|
|
431
|
-
elif self.
|
|
499
|
+
elif self.kind == DatabricksPathKind.WORKSPACE:
|
|
432
500
|
for _ in self._ls_workspace(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
|
|
433
501
|
yield _
|
|
434
|
-
elif self.
|
|
502
|
+
elif self.kind == DatabricksPathKind.DBFS:
|
|
435
503
|
for _ in self._ls_dbfs(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
|
|
436
504
|
yield _
|
|
437
505
|
|
|
@@ -444,13 +512,13 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
444
512
|
try:
|
|
445
513
|
for info in sdk.volumes.list(
|
|
446
514
|
catalog_name=catalog_name,
|
|
447
|
-
schema_name=schema_name
|
|
515
|
+
schema_name=schema_name,
|
|
448
516
|
):
|
|
449
517
|
base = DatabricksPath(
|
|
450
518
|
f"/Volumes/{info.catalog_name}/{info.schema_name}/{info.name}",
|
|
451
519
|
workspace=self.workspace,
|
|
452
520
|
is_file=False,
|
|
453
|
-
is_dir=True
|
|
521
|
+
is_dir=True,
|
|
454
522
|
)
|
|
455
523
|
|
|
456
524
|
if recursive:
|
|
@@ -463,12 +531,12 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
463
531
|
raise
|
|
464
532
|
elif schema_name is None:
|
|
465
533
|
try:
|
|
466
|
-
for info in sdk.schemas.list(catalog_name=catalog_name
|
|
534
|
+
for info in sdk.schemas.list(catalog_name=catalog_name):
|
|
467
535
|
base = DatabricksPath(
|
|
468
536
|
f"/Volumes/{info.catalog_name}/{info.name}",
|
|
469
537
|
workspace=self.workspace,
|
|
470
538
|
is_file=False,
|
|
471
|
-
is_dir=True
|
|
539
|
+
is_dir=True,
|
|
472
540
|
)
|
|
473
541
|
|
|
474
542
|
if recursive:
|
|
@@ -486,7 +554,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
486
554
|
f"/Volumes/{info.name}",
|
|
487
555
|
workspace=self.workspace,
|
|
488
556
|
is_file=False,
|
|
489
|
-
is_dir=True
|
|
557
|
+
is_dir=True,
|
|
490
558
|
)
|
|
491
559
|
|
|
492
560
|
if recursive:
|
|
@@ -504,7 +572,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
504
572
|
info.path,
|
|
505
573
|
workspace=self.workspace,
|
|
506
574
|
is_file=not info.is_directory,
|
|
507
|
-
is_dir=info.is_directory
|
|
575
|
+
is_dir=info.is_directory,
|
|
508
576
|
)
|
|
509
577
|
|
|
510
578
|
if recursive and info.is_directory:
|
|
@@ -526,9 +594,8 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
526
594
|
info.path,
|
|
527
595
|
workspace=self.workspace,
|
|
528
596
|
is_file=not is_dir,
|
|
529
|
-
is_dir=is_dir
|
|
597
|
+
is_dir=is_dir,
|
|
530
598
|
)
|
|
531
|
-
|
|
532
599
|
yield base
|
|
533
600
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
534
601
|
if raise_error:
|
|
@@ -538,14 +605,14 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
538
605
|
sdk = self.workspace.sdk()
|
|
539
606
|
|
|
540
607
|
try:
|
|
541
|
-
|
|
608
|
+
# FIX: DBFS listing should use DBFS-normalized path, not workspace path
|
|
609
|
+
for info in sdk.dbfs.list(self.as_dbfs_api_path(), recursive=recursive):
|
|
542
610
|
base = DatabricksPath(
|
|
543
611
|
info.path,
|
|
544
612
|
workspace=self.workspace,
|
|
545
613
|
is_file=not info.is_dir,
|
|
546
|
-
is_dir=info.is_dir
|
|
614
|
+
is_dir=info.is_dir,
|
|
547
615
|
)
|
|
548
|
-
|
|
549
616
|
yield base
|
|
550
617
|
except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
|
|
551
618
|
if raise_error:
|
|
@@ -554,7 +621,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
554
621
|
@contextmanager
|
|
555
622
|
def open(
|
|
556
623
|
self,
|
|
557
|
-
mode=
|
|
624
|
+
mode="r",
|
|
558
625
|
buffering=-1,
|
|
559
626
|
encoding=None,
|
|
560
627
|
errors=None,
|
|
@@ -569,11 +636,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
569
636
|
Supported:
|
|
570
637
|
- read: "rb", "r"
|
|
571
638
|
- write: "wb", "w" (buffered; uploads on close for WORKSPACE/VOLUME)
|
|
572
|
-
|
|
573
|
-
Notes:
|
|
574
|
-
- VOLUME: uses w.files.download/upload (Files API). :contentReference[oaicite:5]{index=5}
|
|
575
|
-
- DBFS: uses w.dbfs.open when possible. :contentReference[oaicite:6]{index=6}
|
|
576
|
-
- WORKSPACE: uses w.workspace.download/upload. :contentReference[oaicite:7]{index=7}
|
|
577
639
|
"""
|
|
578
640
|
if mode not in {"rb", "r", "wb", "w"}:
|
|
579
641
|
raise ValueError(f"Unsupported mode {mode!r}. Use r/rb/w/wb.")
|
|
@@ -592,10 +654,10 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
592
654
|
@contextmanager
|
|
593
655
|
def open_read(self, encoding: str | None = None):
|
|
594
656
|
with self as connected:
|
|
595
|
-
if connected.
|
|
657
|
+
if connected.kind == DatabricksPathKind.VOLUME:
|
|
596
658
|
with connected._open_read_volume(encoding=encoding) as f:
|
|
597
659
|
yield f
|
|
598
|
-
elif connected.
|
|
660
|
+
elif connected.kind == DatabricksPathKind.WORKSPACE:
|
|
599
661
|
with connected._open_read_workspace(encoding=encoding) as f:
|
|
600
662
|
yield f
|
|
601
663
|
else:
|
|
@@ -607,7 +669,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
607
669
|
workspace_client = self.workspace.sdk()
|
|
608
670
|
path = self.as_files_api_path()
|
|
609
671
|
|
|
610
|
-
# Files.download returns a stream-like response body. :contentReference[oaicite:8]{index=8}
|
|
611
672
|
resp = workspace_client.files.download(path)
|
|
612
673
|
raw = io.BytesIO(resp.contents.read())
|
|
613
674
|
|
|
@@ -623,12 +684,10 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
623
684
|
workspace_client = self.workspace.sdk()
|
|
624
685
|
path = self.as_workspace_api_path()
|
|
625
686
|
|
|
626
|
-
|
|
627
|
-
raw = workspace_client.workspace.download(path) # returns BinaryIO :contentReference[oaicite:10]{index=10}
|
|
687
|
+
raw = workspace_client.workspace.download(path) # returns BinaryIO
|
|
628
688
|
|
|
629
689
|
if encoding is not None:
|
|
630
690
|
raw = io.BytesIO(raw.read())
|
|
631
|
-
|
|
632
691
|
with io.TextIOWrapper(raw, encoding=encoding) as f:
|
|
633
692
|
yield f
|
|
634
693
|
else:
|
|
@@ -640,7 +699,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
640
699
|
workspace_client = self.workspace.sdk()
|
|
641
700
|
path = self.as_dbfs_api_path()
|
|
642
701
|
|
|
643
|
-
# dbfs.open gives BinaryIO for streaming reads :contentReference[oaicite:12]{index=12}
|
|
644
702
|
raw = workspace_client.dbfs.open(path, read=True)
|
|
645
703
|
|
|
646
704
|
if encoding is not None:
|
|
@@ -653,10 +711,10 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
653
711
|
@contextmanager
|
|
654
712
|
def open_write(self, encoding: str | None = None):
|
|
655
713
|
with self as connected:
|
|
656
|
-
if connected.
|
|
714
|
+
if connected.kind == DatabricksPathKind.VOLUME:
|
|
657
715
|
with connected._open_write_volume(encoding=encoding) as f:
|
|
658
716
|
yield f
|
|
659
|
-
elif connected.
|
|
717
|
+
elif connected.kind == DatabricksPathKind.WORKSPACE:
|
|
660
718
|
with connected._open_write_workspace(encoding=encoding) as f:
|
|
661
719
|
yield f
|
|
662
720
|
else:
|
|
@@ -668,7 +726,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
668
726
|
workspace_client = self.workspace.sdk()
|
|
669
727
|
path = self.as_files_api_path()
|
|
670
728
|
|
|
671
|
-
# Buffer locally then upload stream on exit. :contentReference[oaicite:9]{index=9}
|
|
672
729
|
buf = io.BytesIO()
|
|
673
730
|
|
|
674
731
|
if encoding is not None:
|
|
@@ -703,7 +760,6 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
703
760
|
workspace_client = self.workspace.sdk()
|
|
704
761
|
path = self.as_workspace_api_path()
|
|
705
762
|
|
|
706
|
-
# Buffer then upload (AUTO works for workspace files) :contentReference[oaicite:11]{index=11}
|
|
707
763
|
buf = io.BytesIO()
|
|
708
764
|
|
|
709
765
|
if encoding is not None:
|
|
@@ -727,7 +783,7 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
727
783
|
path, buf, format=ImportFormat.AUTO, overwrite=overwrite
|
|
728
784
|
)
|
|
729
785
|
else:
|
|
730
|
-
raise
|
|
786
|
+
raise
|
|
731
787
|
|
|
732
788
|
tw.detach()
|
|
733
789
|
else:
|
|
@@ -749,14 +805,14 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
749
805
|
path, buf, format=ImportFormat.AUTO, overwrite=overwrite
|
|
750
806
|
)
|
|
751
807
|
else:
|
|
752
|
-
raise
|
|
808
|
+
raise
|
|
753
809
|
|
|
754
810
|
@contextmanager
|
|
755
811
|
def _open_write_dbfs(self, encoding: str | None = None, overwrite: bool = True):
|
|
756
812
|
workspace_client = self.workspace.sdk()
|
|
757
813
|
path = self.as_dbfs_api_path()
|
|
758
814
|
|
|
759
|
-
raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)
|
|
815
|
+
raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)
|
|
760
816
|
|
|
761
817
|
if encoding is not None:
|
|
762
818
|
with io.TextIOWrapper(raw, encoding=encoding) as f:
|
|
@@ -766,4 +822,4 @@ class DatabricksPath(SysPath, PurePosixPath):
|
|
|
766
822
|
yield f
|
|
767
823
|
|
|
768
824
|
self.clear_cache()
|
|
769
|
-
self._is_file, self._is_dir = True, False
|
|
825
|
+
self._is_file, self._is_dir = True, False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|