ygg 0.1.24__tar.gz → 0.1.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.24 → ygg-0.1.25}/PKG-INFO +1 -1
  2. {ygg-0.1.24 → ygg-0.1.25}/pyproject.toml +1 -1
  3. {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/PKG-INFO +1 -1
  4. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/workspaces/databricks_path.py +156 -100
  5. {ygg-0.1.24 → ygg-0.1.25}/LICENSE +0 -0
  6. {ygg-0.1.24 → ygg-0.1.25}/README.md +0 -0
  7. {ygg-0.1.24 → ygg-0.1.25}/setup.cfg +0 -0
  8. {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/SOURCES.txt +0 -0
  9. {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/dependency_links.txt +0 -0
  10. {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/entry_points.txt +0 -0
  11. {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/requires.txt +0 -0
  12. {ygg-0.1.24 → ygg-0.1.25}/src/ygg.egg-info/top_level.txt +0 -0
  13. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/__init__.py +0 -0
  14. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/__init__.py +0 -0
  15. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/__init__.py +0 -0
  16. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/cluster.py +0 -0
  17. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/execution_context.py +0 -0
  18. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/compute/remote.py +0 -0
  19. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
  20. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/jobs/config.py +0 -0
  21. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/__init__.py +0 -0
  22. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/engine.py +0 -0
  23. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
  24. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/statement_result.py +0 -0
  25. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/sql/types.py +0 -0
  26. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
  27. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/databricks/workspaces/workspace.py +0 -0
  28. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/dataclasses/__init__.py +0 -0
  29. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/dataclasses/dataclass.py +0 -0
  30. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/__init__.py +0 -0
  31. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/databrickslib.py +0 -0
  32. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/extensions/__init__.py +0 -0
  33. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
  34. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
  35. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/pandaslib.py +0 -0
  36. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/polarslib.py +0 -0
  37. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/libs/sparklib.py +0 -0
  38. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/__init__.py +0 -0
  39. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/callable_serde.py +0 -0
  40. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/exceptions.py +0 -0
  41. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/modules.py +0 -0
  42. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/parallel.py +0 -0
  43. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/python_env.py +0 -0
  44. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/pyutils/retry.py +0 -0
  45. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/requests/__init__.py +0 -0
  46. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/requests/msal.py +0 -0
  47. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/requests/session.py +0 -0
  48. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/__init__.py +0 -0
  49. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/__init__.py +0 -0
  50. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
  51. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/cast_options.py +0 -0
  52. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
  53. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/polars_cast.py +0 -0
  54. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
  55. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/registry.py +0 -0
  56. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/spark_cast.py +0 -0
  57. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
  58. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
  59. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/libs.py +0 -0
  60. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/python_arrow.py +0 -0
  61. {ygg-0.1.24 → ygg-0.1.25}/src/yggdrasil/types/python_defaults.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.24
3
+ Version: 0.1.25
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ygg"
7
- version = "0.1.24"
7
+ version = "0.1.25"
8
8
  description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = { file = "LICENSE" }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.24
3
+ Version: 0.1.25
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -7,7 +7,7 @@ import urllib.parse as urlparse
7
7
  from contextlib import contextmanager
8
8
  from enum import Enum
9
9
  from pathlib import PurePosixPath, Path as SysPath
10
- from typing import BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
10
+ from typing import Any, BinaryIO, Iterator, Optional, Tuple, Union, TYPE_CHECKING
11
11
 
12
12
  from databricks.sdk.service.catalog import VolumeType
13
13
 
@@ -15,7 +15,14 @@ from ...libs.databrickslib import databricks
15
15
 
16
16
  if databricks is not None:
17
17
  from databricks.sdk.service.workspace import ImportFormat, ObjectType
18
- from databricks.sdk.errors.platform import NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied, AlreadyExists, ResourceAlreadyExists
18
+ from databricks.sdk.errors.platform import (
19
+ NotFound,
20
+ ResourceDoesNotExist,
21
+ BadRequest,
22
+ PermissionDenied,
23
+ AlreadyExists,
24
+ ResourceAlreadyExists,
25
+ )
19
26
 
20
27
  NOT_FOUND_ERRORS = NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied
21
28
  ALREADY_EXISTS_ERRORS = AlreadyExists, ResourceAlreadyExists, BadRequest
@@ -26,7 +33,7 @@ if TYPE_CHECKING:
26
33
 
27
34
  __all__ = [
28
35
  "DatabricksPathKind",
29
- "DatabricksPath"
36
+ "DatabricksPath",
30
37
  ]
31
38
 
32
39
 
@@ -43,7 +50,11 @@ class DatabricksPathKind(str, Enum):
43
50
  DBFS = "dbfs"
44
51
 
45
52
  @classmethod
46
- def parse(cls, path: str, workspace: Optional["Workspace"] = None) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
53
+ def parse(
54
+ cls,
55
+ path: str,
56
+ workspace: Optional["Workspace"] = None,
57
+ ) -> Tuple["DatabricksPathKind", Optional["Workspace"], str]:
47
58
  from .workspace import Workspace
48
59
 
49
60
  if path.startswith("/Workspace") or path.startswith("/Users") or path.startswith("/Shared"):
@@ -52,13 +63,19 @@ class DatabricksPathKind(str, Enum):
52
63
  path = path.replace("/Users/me", "/Users/%s" % workspace.current_user.user_name)
53
64
 
54
65
  return cls.WORKSPACE, workspace, path
66
+
55
67
  if path.startswith("/Volumes"):
56
68
  return cls.VOLUME, workspace, path
57
69
 
58
70
  if path.startswith("dbfs://"):
59
71
  parsed = urlparse.urlparse(path)
60
- kind, _, inner_path = cls.parse(parsed.path)
61
- workspace = Workspace(host=parsed.hostname) if workspace is None else workspace
72
+
73
+ # inner path is the URL path (e.g. /tmp/x or /Volumes/...)
74
+ kind, _, inner_path = cls.parse(parsed.path, workspace=workspace)
75
+
76
+ # hostname can be None for malformed/dbfs:// variants; fall back to default Workspace()
77
+ if workspace is None:
78
+ workspace = Workspace(host=parsed.hostname) if parsed.hostname else Workspace()
62
79
 
63
80
  return kind, workspace, inner_path
64
81
 
@@ -66,7 +83,7 @@ class DatabricksPathKind(str, Enum):
66
83
 
67
84
 
68
85
  class DatabricksPath(SysPath, PurePosixPath):
69
- _kind: DatabricksPathKind
86
+ _kind: "DatabricksPathKind"
70
87
  _workspace: Optional["Workspace"]
71
88
 
72
89
  _is_file: Optional[bool]
@@ -75,73 +92,135 @@ class DatabricksPath(SysPath, PurePosixPath):
75
92
  _raw_status: Optional[dict]
76
93
  _raw_status_refresh_time: float
77
94
 
95
+ @staticmethod
96
+ def _join_segments(pathsegments: tuple[Any, ...]) -> str:
97
+ if not pathsegments:
98
+ return ""
99
+
100
+ first = _seg_to_str(pathsegments[0])
101
+
102
+ # Keep dbfs:// URL-ish paths URL-ish (don't let PurePosixPath normalize it)
103
+ if first.startswith("dbfs://"):
104
+ rest = (_seg_to_str(s).lstrip("/") for s in pathsegments[1:])
105
+ first = first.rstrip("/")
106
+ tail = "/".join(rest)
107
+ return f"{first}/{tail}" if tail else first
108
+
109
+ return str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
110
+
111
+ def _init(self, template: Optional["DatabricksPath"] = None) -> None:
112
+ """
113
+ pathlib creates derived paths (parent, /, joinpath, with_name, etc.) via _from_parts
114
+ which bypasses __new__. _init(template=...) is the hook to carry our metadata forward.
115
+ """
116
+ if isinstance(template, DatabricksPath):
117
+ # Recompute kind for the NEW path string (don’t blindly copy _kind)
118
+ kind, ws, _ = DatabricksPathKind.parse(str(self), workspace=getattr(template, "_workspace", None))
119
+
120
+ self._kind = kind
121
+ self._workspace = ws if ws is not None else getattr(template, "_workspace", None)
122
+
123
+ # Never inherit caches from template
124
+ self._is_file = None
125
+ self._is_dir = None
126
+ self._raw_status = None
127
+ self._raw_status_refresh_time = 0.0
128
+ else:
129
+ kind, ws, _ = DatabricksPathKind.parse(str(self))
130
+ self._kind = kind
131
+ self._workspace = ws
132
+
133
+ self._is_file = None
134
+ self._is_dir = None
135
+ self._raw_status = None
136
+ self._raw_status_refresh_time = 0.0
137
+
78
138
  def __new__(
79
139
  cls,
80
- *pathsegments,
140
+ *pathsegments: Any,
81
141
  workspace: Optional["Workspace"] = None,
82
142
  is_file: Optional[bool] = None,
83
143
  is_dir: Optional[bool] = None,
84
144
  raw_status: Optional[dict] = None,
85
- raw_status_refresh_time: float = 0
86
- ):
87
- if not pathsegments:
88
- joined = ""
89
- else:
90
- first = _seg_to_str(pathsegments[0])
91
-
92
- # Special case: if someone passes a dbfs://... URL segment, keep it URL-like
93
- if first.startswith("dbfs://"):
94
- rest = [_seg_to_str(s).lstrip("/") for s in pathsegments[1:]]
95
- joined = first.rstrip("/")
96
- if rest:
97
- joined += "/" + "/".join(rest)
98
- else:
99
- joined = str(PurePosixPath(*(_seg_to_str(s) for s in pathsegments)))
145
+ raw_status_refresh_time: float = 0.0,
146
+ ) -> "DatabricksPath":
147
+ joined = cls._join_segments(pathsegments)
148
+ kind, parsed_ws, pure_path = DatabricksPathKind.parse(joined, workspace=workspace)
100
149
 
101
- kind, w, p = DatabricksPathKind.parse(joined)
150
+ self = cls._from_parts([pure_path]) # pathlib-style construction (calls _init)
102
151
 
103
- obj = super().__new__(cls, p)
152
+ # Override with constructor-provided metadata
153
+ self._kind = kind
154
+ self._workspace = parsed_ws if workspace is None else workspace
155
+ self._is_file = is_file
156
+ self._is_dir = is_dir
157
+ self._raw_status = raw_status
158
+ self._raw_status_refresh_time = float(raw_status_refresh_time)
104
159
 
105
- obj._kind = kind
106
- obj._workspace = w if workspace is None else workspace
107
- obj._is_file = is_file
108
- obj._is_dir = is_dir
109
- obj._raw_status = raw_status
110
- obj._raw_status_refresh_time = raw_status_refresh_time
160
+ return self
111
161
 
112
- return obj
162
+ def __init__(
163
+ self,
164
+ *pathsegments: Any,
165
+ workspace: Optional["Workspace"] = None,
166
+ is_file: Optional[bool] = None,
167
+ is_dir: Optional[bool] = None,
168
+ raw_status: Optional[dict] = None,
169
+ raw_status_refresh_time: float = 0.0,
170
+ ) -> None:
171
+ # pathlib paths are effectively immutable; all init happens in __new__ / _init
172
+ pass
113
173
 
114
174
  def __enter__(self):
115
175
  self.workspace.__enter__()
116
176
  return self
117
177
 
118
178
  def __exit__(self, exc_type, exc_val, exc_tb):
119
- self.workspace.__exit__(exc_type, exc_val, exc_tb)
179
+ return self.workspace.__exit__(exc_type, exc_val, exc_tb)
120
180
 
121
181
  @property
122
182
  def workspace(self):
123
- if self._workspace is None:
183
+ try:
184
+ if self._workspace is None:
185
+ from .workspace import Workspace
186
+
187
+ self._workspace = Workspace()
188
+ except AttributeError:
189
+ self._init(template=self)
190
+
124
191
  from .workspace import Workspace
125
192
 
126
- self._workspace = Workspace()
193
+ if self._workspace is None:
194
+ from .workspace import Workspace
195
+
196
+ self._workspace = Workspace()
197
+
127
198
  return self._workspace
128
199
 
129
200
  @property
130
201
  def kind(self):
131
- return self._kind
202
+ try:
203
+ return self._kind
204
+ except AttributeError:
205
+ self._init(template=self)
206
+ return self._kind
207
+
208
+ @kind.setter
209
+ def kind(self, value: DatabricksPathKind):
210
+ self._kind = value
132
211
 
133
- def is_file(self, *, follow_symlinks = True):
212
+ def is_file(self, *, follow_symlinks=True):
134
213
  if self._is_file is None:
135
214
  self.refresh_status()
136
215
  return self._is_file
137
216
 
138
- def is_dir(self, *, follow_symlinks = True):
217
+ def is_dir(self, *, follow_symlinks=True):
139
218
  if self._is_dir is None:
140
219
  self.refresh_status()
141
220
  return self._is_dir
142
221
 
143
222
  def volume_parts(self) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[PurePosixPath]]:
144
- if self._kind != DatabricksPathKind.VOLUME:
223
+ if self.kind != DatabricksPathKind.VOLUME:
145
224
  return None, None, None, None
146
225
 
147
226
  s = str(self)
@@ -169,12 +248,12 @@ class DatabricksPath(SysPath, PurePosixPath):
169
248
  sdk = connected.workspace.sdk()
170
249
 
171
250
  try:
172
- if connected._kind == DatabricksPathKind.VOLUME:
251
+ if connected.kind == DatabricksPathKind.VOLUME:
173
252
  info = sdk.files.get_metadata(connected.as_files_api_path())
174
253
 
175
254
  connected._raw_status = info
176
255
  connected._is_file, connected._is_dir = True, False
177
- elif connected._kind == DatabricksPathKind.WORKSPACE:
256
+ elif connected.kind == DatabricksPathKind.WORKSPACE:
178
257
  info = sdk.workspace.get_status(connected.as_workspace_api_path())
179
258
 
180
259
  is_dir = info.object_type in (ObjectType.DIRECTORY, ObjectType.REPO)
@@ -184,7 +263,7 @@ class DatabricksPath(SysPath, PurePosixPath):
184
263
  info = sdk.dbfs.get_status(connected.as_dbfs_api_path())
185
264
 
186
265
  connected._raw_status = info
187
- connected._is_file, connected._is_dir = not info.is_dir, info.is_dir
266
+ connected._is_file, connected._is_dir = (not info.is_dir), info.is_dir
188
267
 
189
268
  connected._raw_status_refresh_time = time.time()
190
269
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
@@ -204,7 +283,6 @@ class DatabricksPath(SysPath, PurePosixPath):
204
283
  self._is_file = None
205
284
  self._is_dir = None
206
285
 
207
-
208
286
  # ---- API path normalization helpers ----
209
287
 
210
288
  def as_workspace_api_path(self) -> str:
@@ -243,7 +321,7 @@ class DatabricksPath(SysPath, PurePosixPath):
243
321
  return True
244
322
  return False
245
323
 
246
- def mkdir(self, mode = 0o777, parents = True, exist_ok = True):
324
+ def mkdir(self, mode=0o777, parents=True, exist_ok=True):
247
325
  """
248
326
  Create a new directory at this given path.
249
327
  """
@@ -251,9 +329,9 @@ class DatabricksPath(SysPath, PurePosixPath):
251
329
  connected.clear_cache()
252
330
 
253
331
  try:
254
- if connected._kind == DatabricksPathKind.WORKSPACE:
332
+ if connected.kind == DatabricksPathKind.WORKSPACE:
255
333
  connected.workspace.sdk().workspace.mkdirs(self.as_workspace_api_path())
256
- elif connected._kind == DatabricksPathKind.VOLUME:
334
+ elif connected.kind == DatabricksPathKind.VOLUME:
257
335
  return connected._create_volume_dir(mode=mode, parents=parents, exist_ok=exist_ok)
258
336
  elif connected._kind == DatabricksPathKind.DBFS:
259
337
  connected.workspace.sdk().dbfs.mkdirs(self.as_dbfs_api_path())
@@ -266,8 +344,6 @@ class DatabricksPath(SysPath, PurePosixPath):
266
344
  connected.parent.mkdir(parents=True, exist_ok=True)
267
345
  connected.mkdir(mode, parents=False, exist_ok=exist_ok)
268
346
  except (AlreadyExists, ResourceAlreadyExists):
269
- # Cannot rely on checking for EEXIST, since the operating system
270
- # could give priority to other errors like EACCES or EROFS
271
347
  if not exist_ok:
272
348
  raise
273
349
 
@@ -279,8 +355,6 @@ class DatabricksPath(SysPath, PurePosixPath):
279
355
  try:
280
356
  sdk.catalogs.create(name=catalog_name)
281
357
  except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
282
- # Cannot rely on checking for EEXIST, since the operating system
283
- # could give priority to other errors like EACCES or EROFS
284
358
  if not exist_ok:
285
359
  raise
286
360
 
@@ -288,8 +362,6 @@ class DatabricksPath(SysPath, PurePosixPath):
288
362
  try:
289
363
  sdk.schemas.create(catalog_name=catalog_name, name=schema_name)
290
364
  except (AlreadyExists, ResourceAlreadyExists, PermissionDenied, BadRequest):
291
- # Cannot rely on checking for EEXIST, since the operating system
292
- # could give priority to other errors like EACCES or EROFS
293
365
  if not exist_ok:
294
366
  raise
295
367
 
@@ -299,15 +371,13 @@ class DatabricksPath(SysPath, PurePosixPath):
299
371
  catalog_name=catalog_name,
300
372
  schema_name=schema_name,
301
373
  name=volume_name,
302
- volume_type=VolumeType.MANAGED
374
+ volume_type=VolumeType.MANAGED,
303
375
  )
304
376
  except (AlreadyExists, ResourceAlreadyExists, BadRequest):
305
- # Cannot rely on checking for EEXIST, since the operating system
306
- # could give priority to other errors like EACCES or EROFS
307
377
  if not exist_ok:
308
378
  raise
309
379
 
310
- def _create_volume_dir(self, mode = 0o777, parents = True, exist_ok = True):
380
+ def _create_volume_dir(self, mode=0o777, parents=True, exist_ok=True):
311
381
  path = self.as_files_api_path()
312
382
  sdk = self.workspace.sdk()
313
383
 
@@ -324,8 +394,6 @@ class DatabricksPath(SysPath, PurePosixPath):
324
394
 
325
395
  sdk.files.create_directory(path)
326
396
  except (AlreadyExists, ResourceAlreadyExists, BadRequest):
327
- # Cannot rely on checking for EEXIST, since the operating system
328
- # could give priority to other errors like EACCES or EROFS
329
397
  if not exist_ok:
330
398
  raise
331
399
 
@@ -340,11 +408,11 @@ class DatabricksPath(SysPath, PurePosixPath):
340
408
 
341
409
  def rmfile(self):
342
410
  try:
343
- if self._kind == DatabricksPathKind.VOLUME:
411
+ if self.kind == DatabricksPathKind.VOLUME:
344
412
  return self._remove_volume_file()
345
- elif self._kind == DatabricksPathKind.WORKSPACE:
413
+ elif self.kind == DatabricksPathKind.WORKSPACE:
346
414
  return self._remove_workspace_file()
347
- elif self._kind == DatabricksPathKind.DBFS:
415
+ elif self.kind == DatabricksPathKind.DBFS:
348
416
  return self._remove_dbfs_file()
349
417
  finally:
350
418
  self.clear_cache()
@@ -376,17 +444,17 @@ class DatabricksPath(SysPath, PurePosixPath):
376
444
  def rmdir(self, recursive: bool = True):
377
445
  with self as connected:
378
446
  try:
379
- if connected._kind == DatabricksPathKind.WORKSPACE:
447
+ if connected.kind == DatabricksPathKind.WORKSPACE:
380
448
  connected.workspace.sdk().workspace.delete(
381
449
  self.as_workspace_api_path(),
382
- recursive=recursive
450
+ recursive=recursive,
383
451
  )
384
- elif connected._kind == DatabricksPathKind.VOLUME:
452
+ elif connected.kind == DatabricksPathKind.VOLUME:
385
453
  return self._remove_volume_dir(recursive=recursive)
386
454
  else:
387
455
  connected.workspace.sdk().dbfs.delete(
388
456
  self.as_dbfs_api_path(),
389
- recursive=recursive
457
+ recursive=recursive,
390
458
  )
391
459
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
392
460
  pass
@@ -425,13 +493,13 @@ class DatabricksPath(SysPath, PurePosixPath):
425
493
  self.clear_cache()
426
494
 
427
495
  def ls(self, recursive: bool = False, fetch_size: int = None, raise_error: bool = True):
428
- if self._kind == DatabricksPathKind.VOLUME:
496
+ if self.kind == DatabricksPathKind.VOLUME:
429
497
  for _ in self._ls_volume(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
430
498
  yield _
431
- elif self._kind == DatabricksPathKind.WORKSPACE:
499
+ elif self.kind == DatabricksPathKind.WORKSPACE:
432
500
  for _ in self._ls_workspace(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
433
501
  yield _
434
- elif self._kind == DatabricksPathKind.DBFS:
502
+ elif self.kind == DatabricksPathKind.DBFS:
435
503
  for _ in self._ls_dbfs(recursive=recursive, fetch_size=fetch_size, raise_error=raise_error):
436
504
  yield _
437
505
 
@@ -444,13 +512,13 @@ class DatabricksPath(SysPath, PurePosixPath):
444
512
  try:
445
513
  for info in sdk.volumes.list(
446
514
  catalog_name=catalog_name,
447
- schema_name=schema_name
515
+ schema_name=schema_name,
448
516
  ):
449
517
  base = DatabricksPath(
450
518
  f"/Volumes/{info.catalog_name}/{info.schema_name}/{info.name}",
451
519
  workspace=self.workspace,
452
520
  is_file=False,
453
- is_dir=True
521
+ is_dir=True,
454
522
  )
455
523
 
456
524
  if recursive:
@@ -463,12 +531,12 @@ class DatabricksPath(SysPath, PurePosixPath):
463
531
  raise
464
532
  elif schema_name is None:
465
533
  try:
466
- for info in sdk.schemas.list(catalog_name=catalog_name,):
534
+ for info in sdk.schemas.list(catalog_name=catalog_name):
467
535
  base = DatabricksPath(
468
536
  f"/Volumes/{info.catalog_name}/{info.name}",
469
537
  workspace=self.workspace,
470
538
  is_file=False,
471
- is_dir=True
539
+ is_dir=True,
472
540
  )
473
541
 
474
542
  if recursive:
@@ -486,7 +554,7 @@ class DatabricksPath(SysPath, PurePosixPath):
486
554
  f"/Volumes/{info.name}",
487
555
  workspace=self.workspace,
488
556
  is_file=False,
489
- is_dir=True
557
+ is_dir=True,
490
558
  )
491
559
 
492
560
  if recursive:
@@ -504,7 +572,7 @@ class DatabricksPath(SysPath, PurePosixPath):
504
572
  info.path,
505
573
  workspace=self.workspace,
506
574
  is_file=not info.is_directory,
507
- is_dir=info.is_directory
575
+ is_dir=info.is_directory,
508
576
  )
509
577
 
510
578
  if recursive and info.is_directory:
@@ -526,9 +594,8 @@ class DatabricksPath(SysPath, PurePosixPath):
526
594
  info.path,
527
595
  workspace=self.workspace,
528
596
  is_file=not is_dir,
529
- is_dir=is_dir
597
+ is_dir=is_dir,
530
598
  )
531
-
532
599
  yield base
533
600
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
534
601
  if raise_error:
@@ -538,14 +605,14 @@ class DatabricksPath(SysPath, PurePosixPath):
538
605
  sdk = self.workspace.sdk()
539
606
 
540
607
  try:
541
- for info in sdk.dbfs.list(self.as_workspace_api_path(), recursive=recursive):
608
+ # FIX: DBFS listing should use DBFS-normalized path, not workspace path
609
+ for info in sdk.dbfs.list(self.as_dbfs_api_path(), recursive=recursive):
542
610
  base = DatabricksPath(
543
611
  info.path,
544
612
  workspace=self.workspace,
545
613
  is_file=not info.is_dir,
546
- is_dir=info.is_dir
614
+ is_dir=info.is_dir,
547
615
  )
548
-
549
616
  yield base
550
617
  except (NotFound, ResourceDoesNotExist, BadRequest, PermissionDenied):
551
618
  if raise_error:
@@ -554,7 +621,7 @@ class DatabricksPath(SysPath, PurePosixPath):
554
621
  @contextmanager
555
622
  def open(
556
623
  self,
557
- mode='r',
624
+ mode="r",
558
625
  buffering=-1,
559
626
  encoding=None,
560
627
  errors=None,
@@ -569,11 +636,6 @@ class DatabricksPath(SysPath, PurePosixPath):
569
636
  Supported:
570
637
  - read: "rb", "r"
571
638
  - write: "wb", "w" (buffered; uploads on close for WORKSPACE/VOLUME)
572
-
573
- Notes:
574
- - VOLUME: uses w.files.download/upload (Files API). :contentReference[oaicite:5]{index=5}
575
- - DBFS: uses w.dbfs.open when possible. :contentReference[oaicite:6]{index=6}
576
- - WORKSPACE: uses w.workspace.download/upload. :contentReference[oaicite:7]{index=7}
577
639
  """
578
640
  if mode not in {"rb", "r", "wb", "w"}:
579
641
  raise ValueError(f"Unsupported mode {mode!r}. Use r/rb/w/wb.")
@@ -592,10 +654,10 @@ class DatabricksPath(SysPath, PurePosixPath):
592
654
  @contextmanager
593
655
  def open_read(self, encoding: str | None = None):
594
656
  with self as connected:
595
- if connected._kind == DatabricksPathKind.VOLUME:
657
+ if connected.kind == DatabricksPathKind.VOLUME:
596
658
  with connected._open_read_volume(encoding=encoding) as f:
597
659
  yield f
598
- elif connected._kind == DatabricksPathKind.WORKSPACE:
660
+ elif connected.kind == DatabricksPathKind.WORKSPACE:
599
661
  with connected._open_read_workspace(encoding=encoding) as f:
600
662
  yield f
601
663
  else:
@@ -607,7 +669,6 @@ class DatabricksPath(SysPath, PurePosixPath):
607
669
  workspace_client = self.workspace.sdk()
608
670
  path = self.as_files_api_path()
609
671
 
610
- # Files.download returns a stream-like response body. :contentReference[oaicite:8]{index=8}
611
672
  resp = workspace_client.files.download(path)
612
673
  raw = io.BytesIO(resp.contents.read())
613
674
 
@@ -623,12 +684,10 @@ class DatabricksPath(SysPath, PurePosixPath):
623
684
  workspace_client = self.workspace.sdk()
624
685
  path = self.as_workspace_api_path()
625
686
 
626
- # Files.download returns a stream-like response body. :contentReference[oaicite:8]{index=8}
627
- raw = workspace_client.workspace.download(path) # returns BinaryIO :contentReference[oaicite:10]{index=10}
687
+ raw = workspace_client.workspace.download(path) # returns BinaryIO
628
688
 
629
689
  if encoding is not None:
630
690
  raw = io.BytesIO(raw.read())
631
-
632
691
  with io.TextIOWrapper(raw, encoding=encoding) as f:
633
692
  yield f
634
693
  else:
@@ -640,7 +699,6 @@ class DatabricksPath(SysPath, PurePosixPath):
640
699
  workspace_client = self.workspace.sdk()
641
700
  path = self.as_dbfs_api_path()
642
701
 
643
- # dbfs.open gives BinaryIO for streaming reads :contentReference[oaicite:12]{index=12}
644
702
  raw = workspace_client.dbfs.open(path, read=True)
645
703
 
646
704
  if encoding is not None:
@@ -653,10 +711,10 @@ class DatabricksPath(SysPath, PurePosixPath):
653
711
  @contextmanager
654
712
  def open_write(self, encoding: str | None = None):
655
713
  with self as connected:
656
- if connected._kind == DatabricksPathKind.VOLUME:
714
+ if connected.kind == DatabricksPathKind.VOLUME:
657
715
  with connected._open_write_volume(encoding=encoding) as f:
658
716
  yield f
659
- elif connected._kind == DatabricksPathKind.WORKSPACE:
717
+ elif connected.kind == DatabricksPathKind.WORKSPACE:
660
718
  with connected._open_write_workspace(encoding=encoding) as f:
661
719
  yield f
662
720
  else:
@@ -668,7 +726,6 @@ class DatabricksPath(SysPath, PurePosixPath):
668
726
  workspace_client = self.workspace.sdk()
669
727
  path = self.as_files_api_path()
670
728
 
671
- # Buffer locally then upload stream on exit. :contentReference[oaicite:9]{index=9}
672
729
  buf = io.BytesIO()
673
730
 
674
731
  if encoding is not None:
@@ -703,7 +760,6 @@ class DatabricksPath(SysPath, PurePosixPath):
703
760
  workspace_client = self.workspace.sdk()
704
761
  path = self.as_workspace_api_path()
705
762
 
706
- # Buffer then upload (AUTO works for workspace files) :contentReference[oaicite:11]{index=11}
707
763
  buf = io.BytesIO()
708
764
 
709
765
  if encoding is not None:
@@ -727,7 +783,7 @@ class DatabricksPath(SysPath, PurePosixPath):
727
783
  path, buf, format=ImportFormat.AUTO, overwrite=overwrite
728
784
  )
729
785
  else:
730
- raise e
786
+ raise
731
787
 
732
788
  tw.detach()
733
789
  else:
@@ -749,14 +805,14 @@ class DatabricksPath(SysPath, PurePosixPath):
749
805
  path, buf, format=ImportFormat.AUTO, overwrite=overwrite
750
806
  )
751
807
  else:
752
- raise e
808
+ raise
753
809
 
754
810
  @contextmanager
755
811
  def _open_write_dbfs(self, encoding: str | None = None, overwrite: bool = True):
756
812
  workspace_client = self.workspace.sdk()
757
813
  path = self.as_dbfs_api_path()
758
814
 
759
- raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite) # :contentReference[oaicite:13]{index=13}
815
+ raw = workspace_client.dbfs.open(path, write=True, overwrite=overwrite)
760
816
 
761
817
  if encoding is not None:
762
818
  with io.TextIOWrapper(raw, encoding=encoding) as f:
@@ -766,4 +822,4 @@ class DatabricksPath(SysPath, PurePosixPath):
766
822
  yield f
767
823
 
768
824
  self.clear_cache()
769
- self._is_file, self._is_dir = True, False
825
+ self._is_file, self._is_dir = True, False
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes