ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
- ygg-0.1.32.dist-info/RECORD +60 -0
- yggdrasil/__init__.py +2 -0
- yggdrasil/databricks/__init__.py +2 -0
- yggdrasil/databricks/compute/__init__.py +2 -0
- yggdrasil/databricks/compute/cluster.py +241 -2
- yggdrasil/databricks/compute/execution_context.py +100 -11
- yggdrasil/databricks/compute/remote.py +16 -0
- yggdrasil/databricks/jobs/__init__.py +5 -0
- yggdrasil/databricks/jobs/config.py +31 -34
- yggdrasil/databricks/sql/__init__.py +2 -0
- yggdrasil/databricks/sql/engine.py +217 -36
- yggdrasil/databricks/sql/exceptions.py +1 -0
- yggdrasil/databricks/sql/statement_result.py +148 -1
- yggdrasil/databricks/sql/types.py +49 -1
- yggdrasil/databricks/workspaces/__init__.py +4 -1
- yggdrasil/databricks/workspaces/filesytem.py +344 -0
- yggdrasil/databricks/workspaces/io.py +1123 -0
- yggdrasil/databricks/workspaces/path.py +1415 -0
- yggdrasil/databricks/workspaces/path_kind.py +13 -0
- yggdrasil/databricks/workspaces/workspace.py +298 -154
- yggdrasil/dataclasses/__init__.py +2 -0
- yggdrasil/dataclasses/dataclass.py +42 -1
- yggdrasil/libs/__init__.py +2 -0
- yggdrasil/libs/databrickslib.py +9 -0
- yggdrasil/libs/extensions/__init__.py +2 -0
- yggdrasil/libs/extensions/polars_extensions.py +72 -0
- yggdrasil/libs/extensions/spark_extensions.py +116 -0
- yggdrasil/libs/pandaslib.py +7 -0
- yggdrasil/libs/polarslib.py +7 -0
- yggdrasil/libs/sparklib.py +41 -0
- yggdrasil/pyutils/__init__.py +4 -0
- yggdrasil/pyutils/callable_serde.py +106 -0
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/modules.py +44 -1
- yggdrasil/pyutils/parallel.py +29 -0
- yggdrasil/pyutils/python_env.py +301 -0
- yggdrasil/pyutils/retry.py +57 -0
- yggdrasil/requests/__init__.py +4 -0
- yggdrasil/requests/msal.py +124 -3
- yggdrasil/requests/session.py +18 -0
- yggdrasil/types/__init__.py +2 -0
- yggdrasil/types/cast/__init__.py +2 -1
- yggdrasil/types/cast/arrow_cast.py +131 -0
- yggdrasil/types/cast/cast_options.py +119 -1
- yggdrasil/types/cast/pandas_cast.py +29 -0
- yggdrasil/types/cast/polars_cast.py +47 -0
- yggdrasil/types/cast/polars_pandas_cast.py +29 -0
- yggdrasil/types/cast/registry.py +176 -0
- yggdrasil/types/cast/spark_cast.py +76 -0
- yggdrasil/types/cast/spark_pandas_cast.py +29 -0
- yggdrasil/types/cast/spark_polars_cast.py +28 -0
- yggdrasil/types/libs.py +2 -0
- yggdrasil/types/python_arrow.py +191 -0
- yggdrasil/types/python_defaults.py +73 -0
- yggdrasil/version.py +1 -0
- ygg-0.1.30.dist-info/RECORD +0 -56
- yggdrasil/databricks/workspaces/databricks_path.py +0 -784
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Workspace configuration and Databricks SDK helpers."""
|
|
2
|
+
|
|
1
3
|
import dataclasses
|
|
2
4
|
import logging
|
|
3
5
|
import os
|
|
@@ -16,14 +18,14 @@ from typing import (
|
|
|
16
18
|
if TYPE_CHECKING:
|
|
17
19
|
from ..compute.cluster import Cluster
|
|
18
20
|
|
|
19
|
-
from .
|
|
21
|
+
from .path import DatabricksPath, DatabricksPathKind
|
|
22
|
+
from ...version import __version__ as YGGDRASIL_VERSION
|
|
20
23
|
from ...libs.databrickslib import require_databricks_sdk, databricks_sdk
|
|
21
24
|
|
|
22
25
|
if databricks_sdk is not None:
|
|
23
26
|
from databricks.sdk import WorkspaceClient
|
|
24
|
-
from databricks.sdk.errors import ResourceDoesNotExist
|
|
27
|
+
from databricks.sdk.errors import ResourceDoesNotExist
|
|
25
28
|
from databricks.sdk.service.workspace import ExportFormat, ObjectInfo
|
|
26
|
-
from databricks.sdk.service import catalog as catalog_svc
|
|
27
29
|
from databricks.sdk.dbutils import FileInfo
|
|
28
30
|
from databricks.sdk.service.files import DirectoryEntry
|
|
29
31
|
|
|
@@ -45,7 +47,7 @@ def _get_env_product():
|
|
|
45
47
|
v = os.getenv("DATABRICKS_PRODUCT")
|
|
46
48
|
|
|
47
49
|
if not v:
|
|
48
|
-
return
|
|
50
|
+
return "yggdrasil"
|
|
49
51
|
return v.strip().lower()
|
|
50
52
|
|
|
51
53
|
|
|
@@ -53,7 +55,7 @@ def _get_env_product_version():
|
|
|
53
55
|
v = os.getenv("DATABRICKS_PRODUCT_VERSION")
|
|
54
56
|
|
|
55
57
|
if not v:
|
|
56
|
-
return
|
|
58
|
+
return YGGDRASIL_VERSION
|
|
57
59
|
return v.strip().lower()
|
|
58
60
|
|
|
59
61
|
|
|
@@ -67,6 +69,7 @@ def _get_env_product_tag():
|
|
|
67
69
|
|
|
68
70
|
@dataclass
|
|
69
71
|
class Workspace:
|
|
72
|
+
"""Configuration wrapper for connecting to a Databricks workspace."""
|
|
70
73
|
# Databricks / generic
|
|
71
74
|
host: Optional[str] = None
|
|
72
75
|
account_id: Optional[str] = None
|
|
@@ -113,6 +116,11 @@ class Workspace:
|
|
|
113
116
|
# Pickle support
|
|
114
117
|
# -------------------------
|
|
115
118
|
def __getstate__(self):
|
|
119
|
+
"""Serialize the workspace state for pickling.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
A pickle-ready state dictionary.
|
|
123
|
+
"""
|
|
116
124
|
state = self.__dict__.copy()
|
|
117
125
|
state.pop("_sdk", None)
|
|
118
126
|
|
|
@@ -122,6 +130,11 @@ class Workspace:
|
|
|
122
130
|
return state
|
|
123
131
|
|
|
124
132
|
def __setstate__(self, state):
|
|
133
|
+
"""Restore workspace state after unpickling.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
state: Serialized state dictionary.
|
|
137
|
+
"""
|
|
125
138
|
self.__dict__.update(state)
|
|
126
139
|
self._sdk = None
|
|
127
140
|
|
|
@@ -132,21 +145,46 @@ class Workspace:
|
|
|
132
145
|
self.connect(reset=True)
|
|
133
146
|
|
|
134
147
|
def __enter__(self) -> "Workspace":
|
|
148
|
+
"""Enter a context manager and connect to the workspace.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
The connected Workspace instance.
|
|
152
|
+
"""
|
|
135
153
|
self._was_connected = self._sdk is not None
|
|
136
|
-
self.connect()
|
|
137
|
-
return self
|
|
154
|
+
return self.connect()
|
|
138
155
|
|
|
139
156
|
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
157
|
+
"""Exit the context manager and close if newly connected.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
exc_type: Exception type, if raised.
|
|
161
|
+
exc_val: Exception value, if raised.
|
|
162
|
+
exc_tb: Exception traceback, if raised.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
None.
|
|
166
|
+
"""
|
|
140
167
|
if not self._was_connected:
|
|
141
168
|
self.close()
|
|
142
169
|
|
|
170
|
+
def __del__(self):
|
|
171
|
+
self.close()
|
|
172
|
+
|
|
143
173
|
# -------------------------
|
|
144
174
|
# Clone
|
|
145
175
|
# -------------------------
|
|
146
|
-
def
|
|
176
|
+
def clone_instance(
|
|
147
177
|
self,
|
|
148
178
|
**kwargs
|
|
149
179
|
) -> "Workspace":
|
|
180
|
+
"""Clone the workspace config with overrides.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
**kwargs: Field overrides for the clone.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
A new Workspace instance with updated fields.
|
|
187
|
+
"""
|
|
150
188
|
state = self.__getstate__()
|
|
151
189
|
state.update(kwargs)
|
|
152
190
|
return Workspace().__setstate__(state)
|
|
@@ -154,86 +192,108 @@ class Workspace:
|
|
|
154
192
|
# -------------------------
|
|
155
193
|
# SDK connection
|
|
156
194
|
# -------------------------
|
|
157
|
-
|
|
195
|
+
@property
|
|
196
|
+
def connected(self):
|
|
197
|
+
"""Return True when a WorkspaceClient is cached.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
True if connected, otherwise False.
|
|
201
|
+
"""
|
|
202
|
+
return self._sdk is not None
|
|
203
|
+
|
|
204
|
+
def connect(self, reset: bool = False, clone: bool = False) -> "Workspace":
|
|
205
|
+
"""Connect to the workspace and cache the SDK client.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
reset: Whether to reset the cached client before connecting.
|
|
209
|
+
clone: Whether to connect a cloned instance.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
The connected Workspace instance.
|
|
213
|
+
"""
|
|
158
214
|
if reset:
|
|
159
215
|
self._sdk = None
|
|
160
216
|
|
|
161
|
-
if self._sdk is None:
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
217
|
+
if self._sdk is not None:
|
|
218
|
+
return self
|
|
219
|
+
|
|
220
|
+
instance = self.clone_instance() if clone else self
|
|
221
|
+
|
|
222
|
+
require_databricks_sdk()
|
|
223
|
+
logger.debug("Connecting %s", self)
|
|
224
|
+
|
|
225
|
+
# Build Config from config_dict if available, else from fields.
|
|
226
|
+
kwargs = {
|
|
227
|
+
"host": instance.host,
|
|
228
|
+
"account_id": instance.account_id,
|
|
229
|
+
"token": instance.token,
|
|
230
|
+
"client_id": instance.client_id,
|
|
231
|
+
"client_secret": instance.client_secret,
|
|
232
|
+
"token_audience": instance.token_audience,
|
|
233
|
+
"azure_workspace_resource_id": instance.azure_workspace_resource_id,
|
|
234
|
+
"azure_use_msi": instance.azure_use_msi,
|
|
235
|
+
"azure_client_secret": instance.azure_client_secret,
|
|
236
|
+
"azure_client_id": instance.azure_client_id,
|
|
237
|
+
"azure_tenant_id": instance.azure_tenant_id,
|
|
238
|
+
"azure_environment": instance.azure_environment,
|
|
239
|
+
"google_credentials": instance.google_credentials,
|
|
240
|
+
"google_service_account": instance.google_service_account,
|
|
241
|
+
"profile": instance.profile,
|
|
242
|
+
"config_file": instance.config_file,
|
|
243
|
+
"auth_type": instance.auth_type,
|
|
244
|
+
"http_timeout_seconds": instance.http_timeout_seconds,
|
|
245
|
+
"retry_timeout_seconds": instance.retry_timeout_seconds,
|
|
246
|
+
"debug_truncate_bytes": instance.debug_truncate_bytes,
|
|
247
|
+
"debug_headers": instance.debug_headers,
|
|
248
|
+
"rate_limit": instance.rate_limit,
|
|
249
|
+
"product": instance.product,
|
|
250
|
+
"product_version": instance.product_version,
|
|
251
|
+
}
|
|
194
252
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
253
|
+
build_kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
instance._sdk = WorkspaceClient(**build_kwargs)
|
|
257
|
+
except ValueError as e:
|
|
258
|
+
if "cannot configure default credentials" in str(e) and instance.auth_type is None:
|
|
259
|
+
last_error = e
|
|
200
260
|
|
|
201
|
-
|
|
261
|
+
auth_types = ["runtime"] if instance.is_in_databricks_environment() else ["external-browser"]
|
|
202
262
|
|
|
203
|
-
|
|
204
|
-
|
|
263
|
+
for auth_type in auth_types:
|
|
264
|
+
build_kwargs["auth_type"] = auth_type
|
|
265
|
+
|
|
266
|
+
try:
|
|
267
|
+
instance._sdk = WorkspaceClient(**build_kwargs)
|
|
268
|
+
break
|
|
269
|
+
except Exception as se:
|
|
270
|
+
last_error = se
|
|
271
|
+
build_kwargs.pop("auth_type")
|
|
272
|
+
|
|
273
|
+
if instance._sdk is None:
|
|
274
|
+
if instance.is_in_databricks_environment() and instance._cached_token:
|
|
275
|
+
build_kwargs["token"] = instance._cached_token
|
|
205
276
|
|
|
206
277
|
try:
|
|
207
|
-
|
|
208
|
-
break
|
|
278
|
+
instance._sdk = WorkspaceClient(**build_kwargs)
|
|
209
279
|
except Exception as se:
|
|
210
280
|
last_error = se
|
|
211
|
-
build_kwargs.pop("auth_type")
|
|
212
|
-
|
|
213
|
-
if self._sdk is None:
|
|
214
|
-
if self.is_in_databricks_environment() and self._cached_token:
|
|
215
|
-
build_kwargs["token"] = self._cached_token
|
|
216
281
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
282
|
+
if instance._sdk is None:
|
|
283
|
+
raise last_error
|
|
284
|
+
else:
|
|
285
|
+
raise e
|
|
221
286
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
287
|
+
# backfill resolved config values
|
|
288
|
+
for key in list(kwargs.keys()):
|
|
289
|
+
if getattr(instance, key, None) is None:
|
|
290
|
+
v = getattr(instance._sdk.config, key, None)
|
|
291
|
+
if v is not None:
|
|
292
|
+
setattr(instance, key, v)
|
|
226
293
|
|
|
227
|
-
|
|
228
|
-
for key in list(kwargs.keys()):
|
|
229
|
-
if getattr(self, key, None) is None:
|
|
230
|
-
v = getattr(self._sdk.config, key, None)
|
|
231
|
-
if v is not None:
|
|
232
|
-
setattr(self, key, v)
|
|
294
|
+
logger.info("Connected %s", instance)
|
|
233
295
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
return self
|
|
296
|
+
return instance
|
|
237
297
|
|
|
238
298
|
# ------------------------------------------------------------------ #
|
|
239
299
|
# Context manager + lifecycle
|
|
@@ -260,6 +320,11 @@ class Workspace:
|
|
|
260
320
|
return str(files[0]) if files else None
|
|
261
321
|
|
|
262
322
|
def reset_local_cache(self):
|
|
323
|
+
"""Remove cached browser OAuth tokens.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
None.
|
|
327
|
+
"""
|
|
263
328
|
local_cache = self._local_cache_token_path()
|
|
264
329
|
|
|
265
330
|
if local_cache:
|
|
@@ -267,6 +332,11 @@ class Workspace:
|
|
|
267
332
|
|
|
268
333
|
@property
|
|
269
334
|
def current_user(self):
|
|
335
|
+
"""Return the current Databricks user.
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
The current user object from the SDK.
|
|
339
|
+
"""
|
|
270
340
|
try:
|
|
271
341
|
return self.sdk().current_user.me()
|
|
272
342
|
except:
|
|
@@ -275,6 +345,11 @@ class Workspace:
|
|
|
275
345
|
raise
|
|
276
346
|
|
|
277
347
|
def current_token(self) -> str:
|
|
348
|
+
"""Return the active API token for this workspace.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
The bearer token string.
|
|
352
|
+
"""
|
|
278
353
|
if self.token:
|
|
279
354
|
return self.token
|
|
280
355
|
|
|
@@ -287,24 +362,56 @@ class Workspace:
|
|
|
287
362
|
# ------------------------------------------------------------------ #
|
|
288
363
|
# Path helpers
|
|
289
364
|
# ------------------------------------------------------------------ #
|
|
365
|
+
def filesytem(
|
|
366
|
+
self,
|
|
367
|
+
workspace: Optional["Workspace"] = None,
|
|
368
|
+
):
|
|
369
|
+
"""Return a PyArrow filesystem for Databricks paths.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
workspace: Optional workspace override.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
A DatabricksFileSystem instance.
|
|
376
|
+
"""
|
|
377
|
+
from .filesytem import DatabricksFileSystem, DatabricksFileSystemHandler
|
|
378
|
+
|
|
379
|
+
handler = DatabricksFileSystemHandler(
|
|
380
|
+
workspace=self if workspace is None else workspace
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
return DatabricksFileSystem(
|
|
384
|
+
handler=handler
|
|
385
|
+
)
|
|
386
|
+
|
|
290
387
|
def dbfs_path(
|
|
291
388
|
self,
|
|
292
389
|
parts: Union[List[str], str],
|
|
293
390
|
kind: Optional[DatabricksPathKind] = None,
|
|
294
391
|
workspace: Optional["Workspace"] = None
|
|
295
392
|
):
|
|
393
|
+
"""Create a DatabricksPath in this workspace.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
parts: Path parts or string to parse.
|
|
397
|
+
kind: Optional path kind override.
|
|
398
|
+
workspace: Optional workspace override.
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
A DatabricksPath instance.
|
|
402
|
+
"""
|
|
296
403
|
workspace = self if workspace is None else workspace
|
|
297
404
|
|
|
298
405
|
if kind is None or isinstance(parts, str):
|
|
299
406
|
return DatabricksPath.parse(
|
|
300
|
-
|
|
407
|
+
obj=parts,
|
|
301
408
|
workspace=workspace
|
|
302
409
|
)
|
|
303
410
|
|
|
304
411
|
return DatabricksPath(
|
|
305
412
|
kind=kind,
|
|
306
413
|
parts=parts,
|
|
307
|
-
|
|
414
|
+
_workspace=workspace
|
|
308
415
|
)
|
|
309
416
|
|
|
310
417
|
def shared_cache_path(
|
|
@@ -313,6 +420,12 @@ class Workspace:
|
|
|
313
420
|
) -> DatabricksPath:
|
|
314
421
|
"""
|
|
315
422
|
Shared cache base under Volumes for the current user.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
suffix: Optional path suffix to append.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
A DatabricksPath pointing at the shared cache location.
|
|
316
429
|
"""
|
|
317
430
|
base = "/Workspace/Shared/.ygg/cache"
|
|
318
431
|
|
|
@@ -327,61 +440,12 @@ class Workspace:
|
|
|
327
440
|
# ------------------------------------------------------------------ #
|
|
328
441
|
|
|
329
442
|
def sdk(self) -> "WorkspaceClient":
|
|
330
|
-
|
|
443
|
+
"""Return the connected WorkspaceClient.
|
|
331
444
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
# ------------------------------------------------------------------ #
|
|
335
|
-
|
|
336
|
-
def ensure_uc_volume_and_dir(
|
|
337
|
-
self,
|
|
338
|
-
target_path: str,
|
|
339
|
-
) -> None:
|
|
340
|
-
"""
|
|
341
|
-
Ensure catalog, schema, volume exist for a UC volume path
|
|
342
|
-
like /Volumes/<catalog>/<schema>/<volume>/...,
|
|
343
|
-
then create the directory.
|
|
445
|
+
Returns:
|
|
446
|
+
The WorkspaceClient instance.
|
|
344
447
|
"""
|
|
345
|
-
|
|
346
|
-
parts = target_path.split("/")
|
|
347
|
-
|
|
348
|
-
# basic sanity check
|
|
349
|
-
if len(parts) < 5 or parts[1] != "Volumes":
|
|
350
|
-
raise ValueError(
|
|
351
|
-
f"Unexpected UC volume path: {target_path!r}. "
|
|
352
|
-
"Expected /Volumes/<catalog>/<schema>/<volume>/..."
|
|
353
|
-
)
|
|
354
|
-
|
|
355
|
-
# /Volumes/<catalog>/<schema>/<volume>/...
|
|
356
|
-
_, _, catalog_name, schema_name, volume_name, *subpath = parts
|
|
357
|
-
|
|
358
|
-
# 1) ensure catalog
|
|
359
|
-
try:
|
|
360
|
-
sdk.catalogs.get(name=catalog_name)
|
|
361
|
-
except NotFound:
|
|
362
|
-
sdk.catalogs.create(name=catalog_name)
|
|
363
|
-
|
|
364
|
-
# 2) ensure schema
|
|
365
|
-
schema_full_name = f"{catalog_name}.{schema_name}"
|
|
366
|
-
try:
|
|
367
|
-
sdk.schemas.get(full_name=schema_full_name)
|
|
368
|
-
except NotFound:
|
|
369
|
-
sdk.schemas.create(name=schema_name, catalog_name=catalog_name)
|
|
370
|
-
|
|
371
|
-
# 3) ensure volume (managed volume is simplest)
|
|
372
|
-
volume_full_name = f"{catalog_name}.{schema_name}.{volume_name}"
|
|
373
|
-
try:
|
|
374
|
-
sdk.volumes.read(name=volume_full_name)
|
|
375
|
-
except NotFound:
|
|
376
|
-
sdk.volumes.create(
|
|
377
|
-
catalog_name=catalog_name,
|
|
378
|
-
schema_name=schema_name,
|
|
379
|
-
name=volume_name,
|
|
380
|
-
volume_type=catalog_svc.VolumeType.MANAGED,
|
|
381
|
-
)
|
|
382
|
-
|
|
383
|
-
# 4) finally create the directory path itself
|
|
384
|
-
sdk.files.create_directory(target_path)
|
|
448
|
+
return self.connect()._sdk
|
|
385
449
|
|
|
386
450
|
# ------------------------------------------------------------------ #
|
|
387
451
|
# List / open / delete / SQL
|
|
@@ -400,6 +464,13 @@ class Workspace:
|
|
|
400
464
|
- other paths -> Workspace paths (sdk.workspace.list)
|
|
401
465
|
|
|
402
466
|
If recursive=True, yield all nested files/directories.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
path: Path string to list.
|
|
470
|
+
recursive: Whether to list recursively.
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
An iterator of workspace/DBFS/volume entries.
|
|
403
474
|
"""
|
|
404
475
|
sdk = self.sdk()
|
|
405
476
|
|
|
@@ -452,6 +523,13 @@ class Workspace:
|
|
|
452
523
|
via workspace.download(...).
|
|
453
524
|
|
|
454
525
|
Returned object is a BinaryIO context manager.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
path: Path to open.
|
|
529
|
+
workspace_format: Optional export format for workspace paths.
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
A BinaryIO stream for reading.
|
|
455
533
|
"""
|
|
456
534
|
sdk = self.sdk()
|
|
457
535
|
|
|
@@ -462,37 +540,24 @@ class Workspace:
|
|
|
462
540
|
|
|
463
541
|
# Workspace path
|
|
464
542
|
fmt = workspace_format or ExportFormat.AUTO
|
|
465
|
-
return sdk.workspace.download(path=path, format=fmt)
|
|
466
|
-
|
|
467
|
-
def delete_path(
|
|
468
|
-
self,
|
|
469
|
-
target_path: str,
|
|
470
|
-
recursive: bool = True,
|
|
471
|
-
ignore_missing: bool = True,
|
|
472
|
-
) -> None:
|
|
473
|
-
"""
|
|
474
|
-
Delete a path in Databricks Workspace (file or directory).
|
|
475
|
-
|
|
476
|
-
- If recursive=True and target_path is a directory, deletes entire tree.
|
|
477
|
-
- If ignore_missing=True, missing paths won't raise.
|
|
478
|
-
"""
|
|
479
|
-
sdk = self.sdk()
|
|
480
543
|
|
|
481
|
-
|
|
482
|
-
sdk.workspace.delete(
|
|
483
|
-
path=target_path,
|
|
484
|
-
recursive=recursive,
|
|
485
|
-
)
|
|
486
|
-
except ResourceDoesNotExist:
|
|
487
|
-
if ignore_missing:
|
|
488
|
-
return
|
|
489
|
-
raise
|
|
544
|
+
return sdk.workspace.download(path=path, format=fmt)
|
|
490
545
|
|
|
491
546
|
@staticmethod
|
|
492
547
|
def is_in_databricks_environment():
|
|
548
|
+
"""Return True when running on a Databricks runtime.
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
True if running on Databricks, otherwise False.
|
|
552
|
+
"""
|
|
493
553
|
return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
|
|
494
554
|
|
|
495
555
|
def default_tags(self):
|
|
556
|
+
"""Return default resource tags for Databricks assets.
|
|
557
|
+
|
|
558
|
+
Returns:
|
|
559
|
+
A dict of default tags.
|
|
560
|
+
"""
|
|
496
561
|
return {
|
|
497
562
|
k: v
|
|
498
563
|
for k, v in (
|
|
@@ -504,6 +569,14 @@ class Workspace:
|
|
|
504
569
|
}
|
|
505
570
|
|
|
506
571
|
def merge_tags(self, existing: dict | None = None):
|
|
572
|
+
"""Merge default tags with an existing set.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
existing: Optional existing tags.
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
A dict of merged tags.
|
|
579
|
+
"""
|
|
507
580
|
if existing:
|
|
508
581
|
return self.default_tags()
|
|
509
582
|
|
|
@@ -514,6 +587,17 @@ class Workspace:
|
|
|
514
587
|
schema_name: Optional[str] = None,
|
|
515
588
|
**kwargs
|
|
516
589
|
):
|
|
590
|
+
"""Return a SQLEngine configured for this workspace.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
workspace: Optional workspace override.
|
|
594
|
+
catalog_name: Optional catalog name.
|
|
595
|
+
schema_name: Optional schema name.
|
|
596
|
+
**kwargs: Additional SQLEngine parameters.
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
A SQLEngine instance.
|
|
600
|
+
"""
|
|
517
601
|
from ..sql import SQLEngine
|
|
518
602
|
|
|
519
603
|
return SQLEngine(
|
|
@@ -529,11 +613,20 @@ class Workspace:
|
|
|
529
613
|
cluster_name: Optional[str] = None,
|
|
530
614
|
**kwargs
|
|
531
615
|
) -> "Cluster":
|
|
616
|
+
"""Return a Cluster helper bound to this workspace.
|
|
617
|
+
|
|
618
|
+
Args:
|
|
619
|
+
cluster_id: Optional cluster id.
|
|
620
|
+
cluster_name: Optional cluster name.
|
|
621
|
+
**kwargs: Additional Cluster parameters.
|
|
622
|
+
|
|
623
|
+
Returns:
|
|
624
|
+
A Cluster instance.
|
|
625
|
+
"""
|
|
532
626
|
from ..compute.cluster import Cluster
|
|
533
627
|
|
|
534
628
|
return Cluster(workspace=self, cluster_id=cluster_id, cluster_name=cluster_name, **kwargs)
|
|
535
629
|
|
|
536
|
-
|
|
537
630
|
# ---------------------------------------------------------------------------
|
|
538
631
|
# Workspace-bound base class
|
|
539
632
|
# ---------------------------------------------------------------------------
|
|
@@ -543,23 +636,54 @@ DBXWorkspace = Workspace
|
|
|
543
636
|
|
|
544
637
|
@dataclass
|
|
545
638
|
class WorkspaceService(ABC):
|
|
639
|
+
"""Base class for helpers that depend on a Workspace."""
|
|
546
640
|
workspace: Workspace = dataclasses.field(default_factory=Workspace)
|
|
547
641
|
|
|
548
642
|
def __post_init__(self):
|
|
643
|
+
"""Ensure a Workspace instance is available.
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
None.
|
|
647
|
+
"""
|
|
549
648
|
if self.workspace is None:
|
|
550
649
|
self.workspace = Workspace()
|
|
551
650
|
|
|
552
651
|
def __enter__(self):
|
|
652
|
+
"""Enter a context manager and connect the workspace.
|
|
653
|
+
|
|
654
|
+
Returns:
|
|
655
|
+
The current WorkspaceService instance.
|
|
656
|
+
"""
|
|
553
657
|
self.workspace.__enter__()
|
|
554
658
|
return self
|
|
555
659
|
|
|
556
660
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
661
|
+
"""Exit the context manager and close the workspace.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
exc_type: Exception type, if raised.
|
|
665
|
+
exc_val: Exception value, if raised.
|
|
666
|
+
exc_tb: Exception traceback, if raised.
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
None.
|
|
670
|
+
"""
|
|
557
671
|
self.workspace.__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb)
|
|
558
672
|
|
|
559
673
|
def is_in_databricks_environment(self):
|
|
674
|
+
"""Return True when running on a Databricks runtime.
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
True if running on Databricks, otherwise False.
|
|
678
|
+
"""
|
|
560
679
|
return self.workspace.is_in_databricks_environment()
|
|
561
680
|
|
|
562
681
|
def connect(self):
|
|
682
|
+
"""Connect the underlying workspace.
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
The current WorkspaceService instance.
|
|
686
|
+
"""
|
|
563
687
|
self.workspace = self.workspace.connect()
|
|
564
688
|
return self
|
|
565
689
|
|
|
@@ -569,6 +693,16 @@ class WorkspaceService(ABC):
|
|
|
569
693
|
kind: Optional[DatabricksPathKind] = None,
|
|
570
694
|
workspace: Optional["Workspace"] = None
|
|
571
695
|
):
|
|
696
|
+
"""Create a DatabricksPath in the underlying workspace.
|
|
697
|
+
|
|
698
|
+
Args:
|
|
699
|
+
parts: Path parts or string to parse.
|
|
700
|
+
kind: Optional path kind override.
|
|
701
|
+
workspace: Optional workspace override.
|
|
702
|
+
|
|
703
|
+
Returns:
|
|
704
|
+
A DatabricksPath instance.
|
|
705
|
+
"""
|
|
572
706
|
return self.workspace.dbfs_path(
|
|
573
707
|
kind=kind,
|
|
574
708
|
parts=parts,
|
|
@@ -576,8 +710,18 @@ class WorkspaceService(ABC):
|
|
|
576
710
|
)
|
|
577
711
|
|
|
578
712
|
def sdk(self):
|
|
713
|
+
"""Return the WorkspaceClient for the underlying workspace.
|
|
714
|
+
|
|
715
|
+
Returns:
|
|
716
|
+
The WorkspaceClient instance.
|
|
717
|
+
"""
|
|
579
718
|
return self.workspace.sdk()
|
|
580
719
|
|
|
581
720
|
@property
|
|
582
721
|
def current_user(self):
|
|
722
|
+
"""Return the current Databricks user.
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
The current user object from the SDK.
|
|
726
|
+
"""
|
|
583
727
|
return self.workspace.current_user
|