ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +31 -34
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +148 -1
  15. yggdrasil/databricks/sql/types.py +49 -1
  16. yggdrasil/databricks/workspaces/__init__.py +4 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +344 -0
  18. yggdrasil/databricks/workspaces/io.py +1123 -0
  19. yggdrasil/databricks/workspaces/path.py +1415 -0
  20. yggdrasil/databricks/workspaces/path_kind.py +13 -0
  21. yggdrasil/databricks/workspaces/workspace.py +298 -154
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +131 -0
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.30.dist-info/RECORD +0 -56
  58. yggdrasil/databricks/workspaces/databricks_path.py +0 -784
  59. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  60. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  61. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  62. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """Workspace configuration and Databricks SDK helpers."""
2
+
1
3
  import dataclasses
2
4
  import logging
3
5
  import os
@@ -16,14 +18,14 @@ from typing import (
16
18
  if TYPE_CHECKING:
17
19
  from ..compute.cluster import Cluster
18
20
 
19
- from .databricks_path import DatabricksPath, DatabricksPathKind
21
+ from .path import DatabricksPath, DatabricksPathKind
22
+ from ...version import __version__ as YGGDRASIL_VERSION
20
23
  from ...libs.databrickslib import require_databricks_sdk, databricks_sdk
21
24
 
22
25
  if databricks_sdk is not None:
23
26
  from databricks.sdk import WorkspaceClient
24
- from databricks.sdk.errors import ResourceDoesNotExist, NotFound
27
+ from databricks.sdk.errors import ResourceDoesNotExist
25
28
  from databricks.sdk.service.workspace import ExportFormat, ObjectInfo
26
- from databricks.sdk.service import catalog as catalog_svc
27
29
  from databricks.sdk.dbutils import FileInfo
28
30
  from databricks.sdk.service.files import DirectoryEntry
29
31
 
@@ -45,7 +47,7 @@ def _get_env_product():
45
47
  v = os.getenv("DATABRICKS_PRODUCT")
46
48
 
47
49
  if not v:
48
- return None
50
+ return "yggdrasil"
49
51
  return v.strip().lower()
50
52
 
51
53
 
@@ -53,7 +55,7 @@ def _get_env_product_version():
53
55
  v = os.getenv("DATABRICKS_PRODUCT_VERSION")
54
56
 
55
57
  if not v:
56
- return None
58
+ return YGGDRASIL_VERSION
57
59
  return v.strip().lower()
58
60
 
59
61
 
@@ -67,6 +69,7 @@ def _get_env_product_tag():
67
69
 
68
70
  @dataclass
69
71
  class Workspace:
72
+ """Configuration wrapper for connecting to a Databricks workspace."""
70
73
  # Databricks / generic
71
74
  host: Optional[str] = None
72
75
  account_id: Optional[str] = None
@@ -113,6 +116,11 @@ class Workspace:
113
116
  # Pickle support
114
117
  # -------------------------
115
118
  def __getstate__(self):
119
+ """Serialize the workspace state for pickling.
120
+
121
+ Returns:
122
+ A pickle-ready state dictionary.
123
+ """
116
124
  state = self.__dict__.copy()
117
125
  state.pop("_sdk", None)
118
126
 
@@ -122,6 +130,11 @@ class Workspace:
122
130
  return state
123
131
 
124
132
  def __setstate__(self, state):
133
+ """Restore workspace state after unpickling.
134
+
135
+ Args:
136
+ state: Serialized state dictionary.
137
+ """
125
138
  self.__dict__.update(state)
126
139
  self._sdk = None
127
140
 
@@ -132,21 +145,46 @@ class Workspace:
132
145
  self.connect(reset=True)
133
146
 
134
147
  def __enter__(self) -> "Workspace":
148
+ """Enter a context manager and connect to the workspace.
149
+
150
+ Returns:
151
+ The connected Workspace instance.
152
+ """
135
153
  self._was_connected = self._sdk is not None
136
- self.connect()
137
- return self
154
+ return self.connect()
138
155
 
139
156
  def __exit__(self, exc_type, exc_val, exc_tb) -> None:
157
+ """Exit the context manager and close if newly connected.
158
+
159
+ Args:
160
+ exc_type: Exception type, if raised.
161
+ exc_val: Exception value, if raised.
162
+ exc_tb: Exception traceback, if raised.
163
+
164
+ Returns:
165
+ None.
166
+ """
140
167
  if not self._was_connected:
141
168
  self.close()
142
169
 
170
+ def __del__(self):
171
+ self.close()
172
+
143
173
  # -------------------------
144
174
  # Clone
145
175
  # -------------------------
146
- def clone(
176
+ def clone_instance(
147
177
  self,
148
178
  **kwargs
149
179
  ) -> "Workspace":
180
+ """Clone the workspace config with overrides.
181
+
182
+ Args:
183
+ **kwargs: Field overrides for the clone.
184
+
185
+ Returns:
186
+ A new Workspace instance with updated fields.
187
+ """
150
188
  state = self.__getstate__()
151
189
  state.update(kwargs)
152
190
  return Workspace().__setstate__(state)
@@ -154,86 +192,108 @@ class Workspace:
154
192
  # -------------------------
155
193
  # SDK connection
156
194
  # -------------------------
157
- def connect(self, reset: bool = False) -> "Workspace":
195
+ @property
196
+ def connected(self):
197
+ """Return True when a WorkspaceClient is cached.
198
+
199
+ Returns:
200
+ True if connected, otherwise False.
201
+ """
202
+ return self._sdk is not None
203
+
204
+ def connect(self, reset: bool = False, clone: bool = False) -> "Workspace":
205
+ """Connect to the workspace and cache the SDK client.
206
+
207
+ Args:
208
+ reset: Whether to reset the cached client before connecting.
209
+ clone: Whether to connect a cloned instance.
210
+
211
+ Returns:
212
+ The connected Workspace instance.
213
+ """
158
214
  if reset:
159
215
  self._sdk = None
160
216
 
161
- if self._sdk is None:
162
- require_databricks_sdk()
163
- logger.debug("Connecting %s", self)
164
-
165
- # Build Config from config_dict if available, else from fields.
166
- kwargs = {
167
- "host": self.host,
168
- "account_id": self.account_id,
169
- "token": self.token,
170
- "client_id": self.client_id,
171
- "client_secret": self.client_secret,
172
- "token_audience": self.token_audience,
173
- "azure_workspace_resource_id": self.azure_workspace_resource_id,
174
- "azure_use_msi": self.azure_use_msi,
175
- "azure_client_secret": self.azure_client_secret,
176
- "azure_client_id": self.azure_client_id,
177
- "azure_tenant_id": self.azure_tenant_id,
178
- "azure_environment": self.azure_environment,
179
- "google_credentials": self.google_credentials,
180
- "google_service_account": self.google_service_account,
181
- "profile": self.profile,
182
- "config_file": self.config_file,
183
- "auth_type": self.auth_type,
184
- "http_timeout_seconds": self.http_timeout_seconds,
185
- "retry_timeout_seconds": self.retry_timeout_seconds,
186
- "debug_truncate_bytes": self.debug_truncate_bytes,
187
- "debug_headers": self.debug_headers,
188
- "rate_limit": self.rate_limit,
189
- "product": self.product,
190
- "product_version": self.product_version,
191
- }
192
-
193
- build_kwargs = {k: v for k, v in kwargs.items() if v is not None}
217
+ if self._sdk is not None:
218
+ return self
219
+
220
+ instance = self.clone_instance() if clone else self
221
+
222
+ require_databricks_sdk()
223
+ logger.debug("Connecting %s", self)
224
+
225
+ # Build Config from config_dict if available, else from fields.
226
+ kwargs = {
227
+ "host": instance.host,
228
+ "account_id": instance.account_id,
229
+ "token": instance.token,
230
+ "client_id": instance.client_id,
231
+ "client_secret": instance.client_secret,
232
+ "token_audience": instance.token_audience,
233
+ "azure_workspace_resource_id": instance.azure_workspace_resource_id,
234
+ "azure_use_msi": instance.azure_use_msi,
235
+ "azure_client_secret": instance.azure_client_secret,
236
+ "azure_client_id": instance.azure_client_id,
237
+ "azure_tenant_id": instance.azure_tenant_id,
238
+ "azure_environment": instance.azure_environment,
239
+ "google_credentials": instance.google_credentials,
240
+ "google_service_account": instance.google_service_account,
241
+ "profile": instance.profile,
242
+ "config_file": instance.config_file,
243
+ "auth_type": instance.auth_type,
244
+ "http_timeout_seconds": instance.http_timeout_seconds,
245
+ "retry_timeout_seconds": instance.retry_timeout_seconds,
246
+ "debug_truncate_bytes": instance.debug_truncate_bytes,
247
+ "debug_headers": instance.debug_headers,
248
+ "rate_limit": instance.rate_limit,
249
+ "product": instance.product,
250
+ "product_version": instance.product_version,
251
+ }
194
252
 
195
- try:
196
- self._sdk = WorkspaceClient(**build_kwargs)
197
- except ValueError as e:
198
- if "cannot configure default credentials" in str(e) and self.auth_type is None:
199
- last_error = e
253
+ build_kwargs = {k: v for k, v in kwargs.items() if v is not None}
254
+
255
+ try:
256
+ instance._sdk = WorkspaceClient(**build_kwargs)
257
+ except ValueError as e:
258
+ if "cannot configure default credentials" in str(e) and instance.auth_type is None:
259
+ last_error = e
200
260
 
201
- auth_types = ["runtime"] if self.is_in_databricks_environment() else ["external-browser"]
261
+ auth_types = ["runtime"] if instance.is_in_databricks_environment() else ["external-browser"]
202
262
 
203
- for auth_type in auth_types:
204
- build_kwargs["auth_type"] = auth_type
263
+ for auth_type in auth_types:
264
+ build_kwargs["auth_type"] = auth_type
265
+
266
+ try:
267
+ instance._sdk = WorkspaceClient(**build_kwargs)
268
+ break
269
+ except Exception as se:
270
+ last_error = se
271
+ build_kwargs.pop("auth_type")
272
+
273
+ if instance._sdk is None:
274
+ if instance.is_in_databricks_environment() and instance._cached_token:
275
+ build_kwargs["token"] = instance._cached_token
205
276
 
206
277
  try:
207
- self._sdk = WorkspaceClient(**build_kwargs)
208
- break
278
+ instance._sdk = WorkspaceClient(**build_kwargs)
209
279
  except Exception as se:
210
280
  last_error = se
211
- build_kwargs.pop("auth_type")
212
-
213
- if self._sdk is None:
214
- if self.is_in_databricks_environment() and self._cached_token:
215
- build_kwargs["token"] = self._cached_token
216
281
 
217
- try:
218
- self._sdk = WorkspaceClient(**build_kwargs)
219
- except Exception as se:
220
- last_error = se
282
+ if instance._sdk is None:
283
+ raise last_error
284
+ else:
285
+ raise e
221
286
 
222
- if self._sdk is None:
223
- raise last_error
224
- else:
225
- raise e
287
+ # backfill resolved config values
288
+ for key in list(kwargs.keys()):
289
+ if getattr(instance, key, None) is None:
290
+ v = getattr(instance._sdk.config, key, None)
291
+ if v is not None:
292
+ setattr(instance, key, v)
226
293
 
227
- # backfill resolved config values
228
- for key in list(kwargs.keys()):
229
- if getattr(self, key, None) is None:
230
- v = getattr(self._sdk.config, key, None)
231
- if v is not None:
232
- setattr(self, key, v)
294
+ logger.info("Connected %s", instance)
233
295
 
234
- logger.info("Connected %s", self)
235
-
236
- return self
296
+ return instance
237
297
 
238
298
  # ------------------------------------------------------------------ #
239
299
  # Context manager + lifecycle
@@ -260,6 +320,11 @@ class Workspace:
260
320
  return str(files[0]) if files else None
261
321
 
262
322
  def reset_local_cache(self):
323
+ """Remove cached browser OAuth tokens.
324
+
325
+ Returns:
326
+ None.
327
+ """
263
328
  local_cache = self._local_cache_token_path()
264
329
 
265
330
  if local_cache:
@@ -267,6 +332,11 @@ class Workspace:
267
332
 
268
333
  @property
269
334
  def current_user(self):
335
+ """Return the current Databricks user.
336
+
337
+ Returns:
338
+ The current user object from the SDK.
339
+ """
270
340
  try:
271
341
  return self.sdk().current_user.me()
272
342
  except:
@@ -275,6 +345,11 @@ class Workspace:
275
345
  raise
276
346
 
277
347
  def current_token(self) -> str:
348
+ """Return the active API token for this workspace.
349
+
350
+ Returns:
351
+ The bearer token string.
352
+ """
278
353
  if self.token:
279
354
  return self.token
280
355
 
@@ -287,24 +362,56 @@ class Workspace:
287
362
  # ------------------------------------------------------------------ #
288
363
  # Path helpers
289
364
  # ------------------------------------------------------------------ #
365
+ def filesytem(
366
+ self,
367
+ workspace: Optional["Workspace"] = None,
368
+ ):
369
+ """Return a PyArrow filesystem for Databricks paths.
370
+
371
+ Args:
372
+ workspace: Optional workspace override.
373
+
374
+ Returns:
375
+ A DatabricksFileSystem instance.
376
+ """
377
+ from .filesytem import DatabricksFileSystem, DatabricksFileSystemHandler
378
+
379
+ handler = DatabricksFileSystemHandler(
380
+ workspace=self if workspace is None else workspace
381
+ )
382
+
383
+ return DatabricksFileSystem(
384
+ handler=handler
385
+ )
386
+
290
387
  def dbfs_path(
291
388
  self,
292
389
  parts: Union[List[str], str],
293
390
  kind: Optional[DatabricksPathKind] = None,
294
391
  workspace: Optional["Workspace"] = None
295
392
  ):
393
+ """Create a DatabricksPath in this workspace.
394
+
395
+ Args:
396
+ parts: Path parts or string to parse.
397
+ kind: Optional path kind override.
398
+ workspace: Optional workspace override.
399
+
400
+ Returns:
401
+ A DatabricksPath instance.
402
+ """
296
403
  workspace = self if workspace is None else workspace
297
404
 
298
405
  if kind is None or isinstance(parts, str):
299
406
  return DatabricksPath.parse(
300
- parts=parts,
407
+ obj=parts,
301
408
  workspace=workspace
302
409
  )
303
410
 
304
411
  return DatabricksPath(
305
412
  kind=kind,
306
413
  parts=parts,
307
- workspace=workspace
414
+ _workspace=workspace
308
415
  )
309
416
 
310
417
  def shared_cache_path(
@@ -313,6 +420,12 @@ class Workspace:
313
420
  ) -> DatabricksPath:
314
421
  """
315
422
  Shared cache base under Volumes for the current user.
423
+
424
+ Args:
425
+ suffix: Optional path suffix to append.
426
+
427
+ Returns:
428
+ A DatabricksPath pointing at the shared cache location.
316
429
  """
317
430
  base = "/Workspace/Shared/.ygg/cache"
318
431
 
@@ -327,61 +440,12 @@ class Workspace:
327
440
  # ------------------------------------------------------------------ #
328
441
 
329
442
  def sdk(self) -> "WorkspaceClient":
330
- return self.connect()._sdk
443
+ """Return the connected WorkspaceClient.
331
444
 
332
- # ------------------------------------------------------------------ #
333
- # UC volume + directory management
334
- # ------------------------------------------------------------------ #
335
-
336
- def ensure_uc_volume_and_dir(
337
- self,
338
- target_path: str,
339
- ) -> None:
340
- """
341
- Ensure catalog, schema, volume exist for a UC volume path
342
- like /Volumes/<catalog>/<schema>/<volume>/...,
343
- then create the directory.
445
+ Returns:
446
+ The WorkspaceClient instance.
344
447
  """
345
- sdk = self.sdk()
346
- parts = target_path.split("/")
347
-
348
- # basic sanity check
349
- if len(parts) < 5 or parts[1] != "Volumes":
350
- raise ValueError(
351
- f"Unexpected UC volume path: {target_path!r}. "
352
- "Expected /Volumes/<catalog>/<schema>/<volume>/..."
353
- )
354
-
355
- # /Volumes/<catalog>/<schema>/<volume>/...
356
- _, _, catalog_name, schema_name, volume_name, *subpath = parts
357
-
358
- # 1) ensure catalog
359
- try:
360
- sdk.catalogs.get(name=catalog_name)
361
- except NotFound:
362
- sdk.catalogs.create(name=catalog_name)
363
-
364
- # 2) ensure schema
365
- schema_full_name = f"{catalog_name}.{schema_name}"
366
- try:
367
- sdk.schemas.get(full_name=schema_full_name)
368
- except NotFound:
369
- sdk.schemas.create(name=schema_name, catalog_name=catalog_name)
370
-
371
- # 3) ensure volume (managed volume is simplest)
372
- volume_full_name = f"{catalog_name}.{schema_name}.{volume_name}"
373
- try:
374
- sdk.volumes.read(name=volume_full_name)
375
- except NotFound:
376
- sdk.volumes.create(
377
- catalog_name=catalog_name,
378
- schema_name=schema_name,
379
- name=volume_name,
380
- volume_type=catalog_svc.VolumeType.MANAGED,
381
- )
382
-
383
- # 4) finally create the directory path itself
384
- sdk.files.create_directory(target_path)
448
+ return self.connect()._sdk
385
449
 
386
450
  # ------------------------------------------------------------------ #
387
451
  # List / open / delete / SQL
@@ -400,6 +464,13 @@ class Workspace:
400
464
  - other paths -> Workspace paths (sdk.workspace.list)
401
465
 
402
466
  If recursive=True, yield all nested files/directories.
467
+
468
+ Args:
469
+ path: Path string to list.
470
+ recursive: Whether to list recursively.
471
+
472
+ Returns:
473
+ An iterator of workspace/DBFS/volume entries.
403
474
  """
404
475
  sdk = self.sdk()
405
476
 
@@ -452,6 +523,13 @@ class Workspace:
452
523
  via workspace.download(...).
453
524
 
454
525
  Returned object is a BinaryIO context manager.
526
+
527
+ Args:
528
+ path: Path to open.
529
+ workspace_format: Optional export format for workspace paths.
530
+
531
+ Returns:
532
+ A BinaryIO stream for reading.
455
533
  """
456
534
  sdk = self.sdk()
457
535
 
@@ -462,37 +540,24 @@ class Workspace:
462
540
 
463
541
  # Workspace path
464
542
  fmt = workspace_format or ExportFormat.AUTO
465
- return sdk.workspace.download(path=path, format=fmt)
466
-
467
- def delete_path(
468
- self,
469
- target_path: str,
470
- recursive: bool = True,
471
- ignore_missing: bool = True,
472
- ) -> None:
473
- """
474
- Delete a path in Databricks Workspace (file or directory).
475
-
476
- - If recursive=True and target_path is a directory, deletes entire tree.
477
- - If ignore_missing=True, missing paths won't raise.
478
- """
479
- sdk = self.sdk()
480
543
 
481
- try:
482
- sdk.workspace.delete(
483
- path=target_path,
484
- recursive=recursive,
485
- )
486
- except ResourceDoesNotExist:
487
- if ignore_missing:
488
- return
489
- raise
544
+ return sdk.workspace.download(path=path, format=fmt)
490
545
 
491
546
  @staticmethod
492
547
  def is_in_databricks_environment():
548
+ """Return True when running on a Databricks runtime.
549
+
550
+ Returns:
551
+ True if running on Databricks, otherwise False.
552
+ """
493
553
  return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
494
554
 
495
555
  def default_tags(self):
556
+ """Return default resource tags for Databricks assets.
557
+
558
+ Returns:
559
+ A dict of default tags.
560
+ """
496
561
  return {
497
562
  k: v
498
563
  for k, v in (
@@ -504,6 +569,14 @@ class Workspace:
504
569
  }
505
570
 
506
571
  def merge_tags(self, existing: dict | None = None):
572
+ """Merge default tags with an existing set.
573
+
574
+ Args:
575
+ existing: Optional existing tags.
576
+
577
+ Returns:
578
+ A dict of merged tags.
579
+ """
507
580
  if existing:
508
581
  return self.default_tags()
509
582
 
@@ -514,6 +587,17 @@ class Workspace:
514
587
  schema_name: Optional[str] = None,
515
588
  **kwargs
516
589
  ):
590
+ """Return a SQLEngine configured for this workspace.
591
+
592
+ Args:
593
+ workspace: Optional workspace override.
594
+ catalog_name: Optional catalog name.
595
+ schema_name: Optional schema name.
596
+ **kwargs: Additional SQLEngine parameters.
597
+
598
+ Returns:
599
+ A SQLEngine instance.
600
+ """
517
601
  from ..sql import SQLEngine
518
602
 
519
603
  return SQLEngine(
@@ -529,11 +613,20 @@ class Workspace:
529
613
  cluster_name: Optional[str] = None,
530
614
  **kwargs
531
615
  ) -> "Cluster":
616
+ """Return a Cluster helper bound to this workspace.
617
+
618
+ Args:
619
+ cluster_id: Optional cluster id.
620
+ cluster_name: Optional cluster name.
621
+ **kwargs: Additional Cluster parameters.
622
+
623
+ Returns:
624
+ A Cluster instance.
625
+ """
532
626
  from ..compute.cluster import Cluster
533
627
 
534
628
  return Cluster(workspace=self, cluster_id=cluster_id, cluster_name=cluster_name, **kwargs)
535
629
 
536
-
537
630
  # ---------------------------------------------------------------------------
538
631
  # Workspace-bound base class
539
632
  # ---------------------------------------------------------------------------
@@ -543,23 +636,54 @@ DBXWorkspace = Workspace
543
636
 
544
637
  @dataclass
545
638
  class WorkspaceService(ABC):
639
+ """Base class for helpers that depend on a Workspace."""
546
640
  workspace: Workspace = dataclasses.field(default_factory=Workspace)
547
641
 
548
642
  def __post_init__(self):
643
+ """Ensure a Workspace instance is available.
644
+
645
+ Returns:
646
+ None.
647
+ """
549
648
  if self.workspace is None:
550
649
  self.workspace = Workspace()
551
650
 
552
651
  def __enter__(self):
652
+ """Enter a context manager and connect the workspace.
653
+
654
+ Returns:
655
+ The current WorkspaceService instance.
656
+ """
553
657
  self.workspace.__enter__()
554
658
  return self
555
659
 
556
660
  def __exit__(self, exc_type, exc_val, exc_tb):
661
+ """Exit the context manager and close the workspace.
662
+
663
+ Args:
664
+ exc_type: Exception type, if raised.
665
+ exc_val: Exception value, if raised.
666
+ exc_tb: Exception traceback, if raised.
667
+
668
+ Returns:
669
+ None.
670
+ """
557
671
  self.workspace.__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb)
558
672
 
559
673
  def is_in_databricks_environment(self):
674
+ """Return True when running on a Databricks runtime.
675
+
676
+ Returns:
677
+ True if running on Databricks, otherwise False.
678
+ """
560
679
  return self.workspace.is_in_databricks_environment()
561
680
 
562
681
  def connect(self):
682
+ """Connect the underlying workspace.
683
+
684
+ Returns:
685
+ The current WorkspaceService instance.
686
+ """
563
687
  self.workspace = self.workspace.connect()
564
688
  return self
565
689
 
@@ -569,6 +693,16 @@ class WorkspaceService(ABC):
569
693
  kind: Optional[DatabricksPathKind] = None,
570
694
  workspace: Optional["Workspace"] = None
571
695
  ):
696
+ """Create a DatabricksPath in the underlying workspace.
697
+
698
+ Args:
699
+ parts: Path parts or string to parse.
700
+ kind: Optional path kind override.
701
+ workspace: Optional workspace override.
702
+
703
+ Returns:
704
+ A DatabricksPath instance.
705
+ """
572
706
  return self.workspace.dbfs_path(
573
707
  kind=kind,
574
708
  parts=parts,
@@ -576,8 +710,18 @@ class WorkspaceService(ABC):
576
710
  )
577
711
 
578
712
  def sdk(self):
713
+ """Return the WorkspaceClient for the underlying workspace.
714
+
715
+ Returns:
716
+ The WorkspaceClient instance.
717
+ """
579
718
  return self.workspace.sdk()
580
719
 
581
720
  @property
582
721
  def current_user(self):
722
+ """Return the current Databricks user.
723
+
724
+ Returns:
725
+ The current user object from the SDK.
726
+ """
583
727
  return self.workspace.current_user
@@ -1,3 +1,5 @@
1
+ """Enhanced dataclass helpers with Arrow awareness."""
2
+
1
3
  from .dataclass import yggdataclass
2
4
 
3
5
  __all__ = ["yggdataclass"]