ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +31 -34
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +148 -1
  15. yggdrasil/databricks/sql/types.py +49 -1
  16. yggdrasil/databricks/workspaces/__init__.py +4 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +344 -0
  18. yggdrasil/databricks/workspaces/io.py +1123 -0
  19. yggdrasil/databricks/workspaces/path.py +1415 -0
  20. yggdrasil/databricks/workspaces/path_kind.py +13 -0
  21. yggdrasil/databricks/workspaces/workspace.py +298 -154
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +131 -0
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.30.dist-info/RECORD +0 -56
  58. yggdrasil/databricks/workspaces/databricks_path.py +0 -784
  59. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  60. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  61. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  62. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,344 @@
1
+ """PyArrow filesystem wrappers for Databricks paths."""
2
+
3
+ __all__ = [
4
+ "DatabricksFileSystem",
5
+ "DatabricksFileSystemHandler"
6
+ ]
7
+
8
+ from typing import TYPE_CHECKING, Any, Union, List, Optional
9
+
10
+ from pyarrow import PythonFile
11
+ from pyarrow.fs import FileSystem, FileInfo, FileSelector, PyFileSystem, FileSystemHandler
12
+
13
+ if TYPE_CHECKING:
14
+ from ..workspaces.workspace import Workspace
15
+ from .path import DatabricksPath
16
+
17
+
18
+ class DatabricksFileSystemHandler(FileSystemHandler):
19
+ """PyArrow FileSystemHandler backed by Databricks paths."""
20
+
21
+ def __init__(
22
+ self,
23
+ workspace: "Workspace",
24
+ ):
25
+ """Create a handler bound to a Workspace.
26
+
27
+ Args:
28
+ workspace: Workspace instance to use.
29
+ """
30
+ super().__init__()
31
+ self.workspace = workspace
32
+
33
+ def __enter__(self):
34
+ """Enter a context manager and connect to the workspace.
35
+
36
+ Returns:
37
+ A connected DatabricksFileSystemHandler instance.
38
+ """
39
+ return self.connect(clone=True)
40
+
41
+ def __exit__(self, exc_type, exc_val, exc_tb):
42
+ """Exit the context manager and close the workspace.
43
+
44
+ Args:
45
+ exc_type: Exception type, if raised.
46
+ exc_val: Exception value, if raised.
47
+ exc_tb: Exception traceback, if raised.
48
+ """
49
+ self.workspace.__exit__(exc_type, exc_val, exc_tb)
50
+
51
+ def _parse_path(self, obj: Any) -> "DatabricksPath":
52
+ """Parse a path-like object into a DatabricksPath.
53
+
54
+ Args:
55
+ obj: Path-like object to parse.
56
+
57
+ Returns:
58
+ A DatabricksPath instance.
59
+ """
60
+ from .path import DatabricksPath
61
+
62
+ return DatabricksPath.parse(obj, workspace=self.workspace)
63
+
64
+ def connect(self, clone: bool = True):
65
+ """Connect the workspace and optionally return a cloned handler.
66
+
67
+ Args:
68
+ clone: Whether to return a cloned handler.
69
+
70
+ Returns:
71
+ A connected handler.
72
+ """
73
+ workspace = self.connect(clone=clone)
74
+
75
+ if clone:
76
+ return DatabricksFileSystemHandler(
77
+ workspace=workspace
78
+ )
79
+
80
+ self.workspace = workspace
81
+ return self
82
+
83
+ def close(self):
84
+ """Close the underlying workspace client.
85
+
86
+ Returns:
87
+ None.
88
+ """
89
+ self.workspace.close()
90
+
91
+ def copy_file(self, src, dest, *, chunk_size: int = 4 * 1024 * 1024):
92
+ """Copy a file between Databricks paths.
93
+
94
+ Args:
95
+ src: Source path.
96
+ dest: Destination path.
97
+ chunk_size: Chunk size in bytes.
98
+ """
99
+ src = self._parse_path(src)
100
+ dest = self._parse_path(dest)
101
+
102
+ with src.open("rb") as r, dest.open("wb") as w:
103
+ while True:
104
+ chunk = r.read(chunk_size)
105
+ if not chunk:
106
+ break
107
+ w.write(chunk)
108
+
109
+ def create_dir(self, path, *args, recursive: bool = True, **kwargs):
110
+ """Create a directory at the given path.
111
+
112
+ Args:
113
+ path: Directory path to create.
114
+ recursive: Whether to create parents.
115
+
116
+ Returns:
117
+ The created DatabricksPath instance.
118
+ """
119
+ return self._parse_path(path).mkdir(parents=recursive)
120
+
121
+ def delete_dir(self, path):
122
+ """Delete a directory recursively.
123
+
124
+ Args:
125
+ path: Directory path to delete.
126
+ """
127
+ return self._parse_path(path).rmdir(recursive=True)
128
+
129
+ def delete_dir_contents(self, path, *args, accept_root_dir: bool = False, **kwargs):
130
+ """Delete the contents of a directory.
131
+
132
+ Args:
133
+ path: Directory path whose contents should be removed.
134
+ accept_root_dir: Whether to allow deleting root contents.
135
+ """
136
+ return self._parse_path(path).rmdir(recursive=True)
137
+
138
+ def delete_root_dir_contents(self):
139
+ """Delete the contents of the root directory."""
140
+ return self.delete_dir_contents("/", accept_root_dir=True)
141
+
142
+ def delete_file(self, path):
143
+ """Delete a single file.
144
+
145
+ Args:
146
+ path: File path to delete.
147
+ """
148
+ return self._parse_path(path).rmfile()
149
+
150
+ def equals(self, other: FileSystem):
151
+ """Return True if the filesystem handler matches another.
152
+
153
+ Args:
154
+ other: Another FileSystem instance.
155
+
156
+ Returns:
157
+ True if equal, otherwise False.
158
+ """
159
+ return self == other
160
+
161
+ def from_uri(self, uri):
162
+ """Return a handler for the workspace in the provided URI.
163
+
164
+ Args:
165
+ uri: URI or path to parse.
166
+
167
+ Returns:
168
+ A DatabricksFileSystemHandler for the URI.
169
+ """
170
+ uri = self._parse_path(uri)
171
+
172
+ return self.__class__(
173
+ workspace=uri.workspace
174
+ )
175
+
176
+ def get_file_info(
177
+ self,
178
+ paths_or_selector: Union[FileSelector, str, "DatabricksPath", List[Union[str, "DatabricksPath"]]]
179
+ ) -> Union[FileInfo, List[FileInfo]]:
180
+ """Return FileInfo objects for paths or selectors.
181
+
182
+ Args:
183
+ paths_or_selector: Path(s) or a FileSelector.
184
+
185
+ Returns:
186
+ A FileInfo or list of FileInfo objects.
187
+ """
188
+ from .path import DatabricksPath
189
+
190
+ if isinstance(paths_or_selector, (str, DatabricksPath)):
191
+ result = self._parse_path(paths_or_selector).file_info
192
+
193
+ return result
194
+
195
+ if isinstance(paths_or_selector, FileSelector):
196
+ return self.get_file_info_selector(paths_or_selector)
197
+
198
+ return [
199
+ self.get_file_info(obj)
200
+ for obj in paths_or_selector
201
+ ]
202
+
203
+ def get_file_info_selector(
204
+ self,
205
+ selector: FileSelector
206
+ ):
207
+ """Return FileInfo entries for a FileSelector.
208
+
209
+ Args:
210
+ selector: FileSelector describing the listing.
211
+
212
+ Returns:
213
+ A list of FileInfo entries.
214
+ """
215
+ base_dir = self._parse_path(selector.base_dir)
216
+
217
+ return [
218
+ p.file_info
219
+ for p in base_dir.ls(
220
+ recursive=selector.recursive,
221
+ allow_not_found=selector.allow_not_found
222
+ )
223
+ ]
224
+
225
+ def get_type_name(self):
226
+ """Return the filesystem type name.
227
+
228
+ Returns:
229
+ The filesystem type name string.
230
+ """
231
+ return "dbfs"
232
+
233
+ def move(self, src, dest):
234
+ """Move a file by copying then deleting.
235
+
236
+ Args:
237
+ src: Source path.
238
+ dest: Destination path.
239
+ """
240
+ src = self._parse_path(src)
241
+
242
+ src.copy_to(dest)
243
+
244
+ src.remove(recursive=True)
245
+
246
+ def normalize_path(self, path):
247
+ """Normalize a path to a full Databricks path string.
248
+
249
+ Args:
250
+ path: Path to normalize.
251
+
252
+ Returns:
253
+ The normalized full path string.
254
+ """
255
+ return self._parse_path(path).full_path()
256
+
257
+ def open(
258
+ self,
259
+ path,
260
+ mode: str = "r+",
261
+ encoding: Optional[str] = None,
262
+ ):
263
+ """Open a file path as a Databricks IO stream.
264
+
265
+ Args:
266
+ path: Path to open.
267
+ mode: File mode string.
268
+ encoding: Optional text encoding.
269
+
270
+ Returns:
271
+ A DatabricksIO instance.
272
+ """
273
+ return self._parse_path(path).open(mode=mode, encoding=encoding, clone=False)
274
+
275
+ def open_append_stream(self, path, compression='detect', buffer_size=None, metadata=None):
276
+ """Open an append stream.
277
+
278
+ Args:
279
+ path: Path to open.
280
+ compression: Optional compression hint.
281
+ buffer_size: Optional buffer size.
282
+ metadata: Optional metadata.
283
+
284
+ Returns:
285
+ A DatabricksIO instance.
286
+ """
287
+ return self._parse_path(path).open(mode="ab")
288
+
289
+ def open_input_file(self, path, mode: str = "rb", **kwargs):
290
+ """Open an input file as a PyArrow PythonFile.
291
+
292
+ Args:
293
+ path: Path to open.
294
+ mode: File mode string.
295
+ **kwargs: Additional options.
296
+
297
+ Returns:
298
+ A PyArrow PythonFile instance.
299
+ """
300
+ buf = self._parse_path(path).open(mode=mode).connect(clone=True)
301
+
302
+ return PythonFile(
303
+ buf,
304
+ mode=mode
305
+ )
306
+
307
+ def open_input_stream(self, path, compression='detect', buffer_size=None):
308
+ """Open an input stream for reading bytes.
309
+
310
+ Args:
311
+ path: Path to open.
312
+ compression: Optional compression hint.
313
+ buffer_size: Optional buffer size.
314
+
315
+ Returns:
316
+ A DatabricksIO instance.
317
+ """
318
+ return self._parse_path(path).open(mode="rb")
319
+
320
+ def open_output_stream(self, path, compression='detect', buffer_size=None, metadata=None):
321
+ """Open an output stream for writing bytes.
322
+
323
+ Args:
324
+ path: Path to open.
325
+ compression: Optional compression hint.
326
+ buffer_size: Optional buffer size.
327
+ metadata: Optional metadata.
328
+
329
+ Returns:
330
+ A DatabricksIO instance.
331
+ """
332
+ return self._parse_path(path).open(mode="wb")
333
+
334
+
335
+ class DatabricksFileSystem(PyFileSystem):
336
+ """PyArrow filesystem wrapper for Databricks paths."""
337
+
338
+ def __init__(self, handler): # real signature unknown; restored from __doc__
339
+ """Initialize the filesystem with a handler.
340
+
341
+ Args:
342
+ handler: FileSystemHandler instance.
343
+ """
344
+ super().__init__(handler)