ygg 0.1.30__py3-none-any.whl → 0.1.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.30.dist-info → ygg-0.1.31.dist-info}/METADATA +1 -1
- {ygg-0.1.30.dist-info → ygg-0.1.31.dist-info}/RECORD +16 -13
- yggdrasil/databricks/jobs/config.py +2 -30
- yggdrasil/databricks/sql/statement_result.py +1 -1
- yggdrasil/databricks/sql/types.py +16 -0
- yggdrasil/databricks/workspaces/__init__.py +3 -1
- yggdrasil/databricks/workspaces/filesytem.py +161 -0
- yggdrasil/databricks/workspaces/io.py +745 -0
- yggdrasil/databricks/workspaces/path.py +1120 -0
- yggdrasil/databricks/workspaces/path_kind.py +10 -0
- yggdrasil/databricks/workspaces/workspace.py +97 -150
- yggdrasil/types/cast/arrow_cast.py +9 -0
- yggdrasil/databricks/workspaces/databricks_path.py +0 -784
- {ygg-0.1.30.dist-info → ygg-0.1.31.dist-info}/WHEEL +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.31.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.31.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.30.dist-info → ygg-0.1.31.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
ygg-0.1.
|
|
1
|
+
ygg-0.1.31.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
2
2
|
yggdrasil/__init__.py,sha256=6OPibApplA5TF4TeixkQO_qewpaAidYX-fSDvvKYcTI,91
|
|
3
3
|
yggdrasil/databricks/__init__.py,sha256=aGVve5mpoQtxSK2nfzrexjRPoutCIyaOnKZijkG4_QE,92
|
|
4
4
|
yggdrasil/databricks/compute/__init__.py,sha256=TVDwPmW2SOmHmnhzZhsvrWbrxZ_lEcgqe3l9BeB-oxM,218
|
|
@@ -6,15 +6,18 @@ yggdrasil/databricks/compute/cluster.py,sha256=P4e4eBSSSykO8lzrGLLfHEZukObqcnJgC
|
|
|
6
6
|
yggdrasil/databricks/compute/execution_context.py,sha256=ecpjx1smGYu85Gv57SUM9T1Yo3n71gyRhAVqbe7X4-0,18677
|
|
7
7
|
yggdrasil/databricks/compute/remote.py,sha256=f8IJpPmEjXQh8toaS5Kt8rxkeVph7Uxrcv7utKjnT1I,1347
|
|
8
8
|
yggdrasil/databricks/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
yggdrasil/databricks/jobs/config.py,sha256=
|
|
9
|
+
yggdrasil/databricks/jobs/config.py,sha256=FI2fiiXcxps-aRRfcqqQCqnnYj_TUHQNVJMBxQBe_OU,10693
|
|
10
10
|
yggdrasil/databricks/sql/__init__.py,sha256=JZpQ9eCphDf1l4yzIZ7a7OLigxqXkqOgb0Mio7Rj09A,181
|
|
11
11
|
yggdrasil/databricks/sql/engine.py,sha256=edNXVdAwjfGENegoeE8cq1mxelaqsUJFtf57puZFEpY,30894
|
|
12
12
|
yggdrasil/databricks/sql/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
yggdrasil/databricks/sql/statement_result.py,sha256=
|
|
14
|
-
yggdrasil/databricks/sql/types.py,sha256=
|
|
15
|
-
yggdrasil/databricks/workspaces/__init__.py,sha256=
|
|
16
|
-
yggdrasil/databricks/workspaces/
|
|
17
|
-
yggdrasil/databricks/workspaces/
|
|
13
|
+
yggdrasil/databricks/sql/statement_result.py,sha256=sqg3hLG-0vmMZSqQEf5QEbmb3CPvBwBKov7hPc_cPlo,12909
|
|
14
|
+
yggdrasil/databricks/sql/types.py,sha256=nkoLNCan5tGRFq_rQJUz2mbz5bjr8effHvm4Gkuaf68,5661
|
|
15
|
+
yggdrasil/databricks/workspaces/__init__.py,sha256=9Hk1csqJp2UiaAS5MZ7oz6KHh9JNo7A4Vo1xf61y2DM,68
|
|
16
|
+
yggdrasil/databricks/workspaces/filesytem.py,sha256=e7oWaLdbhnl8evl2AjTUdhnITMem3ry2WmOFO4CQ02g,4761
|
|
17
|
+
yggdrasil/databricks/workspaces/io.py,sha256=tGyjB5MWJK01lgeZC2my8mlyEEfpS9f-2MidTJNtlCI,21363
|
|
18
|
+
yggdrasil/databricks/workspaces/path.py,sha256=MEPck7V0Pc92JGzgkFz5b6p8rH1m_otmewGttsAs_nQ,39091
|
|
19
|
+
yggdrasil/databricks/workspaces/path_kind.py,sha256=S7f9jKLeceQNc6uZu9safASkZqHfKU-06Pq_3ixtfIU,174
|
|
20
|
+
yggdrasil/databricks/workspaces/workspace.py,sha256=vVm4hD3_Bjv3qRGz6Q03LgNCaIithaJJCCAVVYTYWKM,17563
|
|
18
21
|
yggdrasil/dataclasses/__init__.py,sha256=QVAvZnNl7gFYTLOVTfMkdQZf6o_WL8_UuDV1uTZ7Aeg,67
|
|
19
22
|
yggdrasil/dataclasses/dataclass.py,sha256=ln-D1-bbiCLBd2khRMGs4dFoxzJEEGzHTKekWCnF2uk,5436
|
|
20
23
|
yggdrasil/libs/__init__.py,sha256=88VTVKz8pnB8JSit6rlNzFm9O1ORyxqh0TQ7OBkXkQo,104
|
|
@@ -40,7 +43,7 @@ yggdrasil/types/libs.py,sha256=7-p0M4C6TnEWpUGf2nY4XshhJxtXOE_-bsYmJWU6jtk,227
|
|
|
40
43
|
yggdrasil/types/python_arrow.py,sha256=1Ac1ZnEgmH4nLNBfrfbM6E9EfANGWtdANqlwMENQkTw,21408
|
|
41
44
|
yggdrasil/types/python_defaults.py,sha256=7t0GMdgn1yJ-xhObeD3-vP091e6tdPl6qrm-wU7bbqw,8390
|
|
42
45
|
yggdrasil/types/cast/__init__.py,sha256=2qiXTrUk7HRcdN8ZuDDUxSDCkqJfAfuwG0M1XA7rTzk,240
|
|
43
|
-
yggdrasil/types/cast/arrow_cast.py,sha256=
|
|
46
|
+
yggdrasil/types/cast/arrow_cast.py,sha256=1brucYDBLJ9MUjQQ4ura2slIW2lUApzTc_7efq043fg,38664
|
|
44
47
|
yggdrasil/types/cast/cast_options.py,sha256=R758Gfv3latG6hq_xrB4hbNPHPL51D-euwIvKEY8YYw,12149
|
|
45
48
|
yggdrasil/types/cast/pandas_cast.py,sha256=-DlaZxGmisIklsZElUZI3g6nrj4R-xNDTI7q01IMB6g,7889
|
|
46
49
|
yggdrasil/types/cast/polars_cast.py,sha256=Z48vvAVyRYZVYabiad22e3tn44tZ1iaSJgOOoJ1MorU,27523
|
|
@@ -49,8 +52,8 @@ yggdrasil/types/cast/registry.py,sha256=-88mq-U1pDSGbEC9PRY0zJCzloyBodXgeSRBPb6h
|
|
|
49
52
|
yggdrasil/types/cast/spark_cast.py,sha256=IHthM78dugabGXxNNW9sSHn-olDwzXcFdIFcPo9IiXU,23021
|
|
50
53
|
yggdrasil/types/cast/spark_pandas_cast.py,sha256=8PgJItF_XbyBcNuBnXkMQU3PBy3sAPEXZT9SXL2WbU4,4200
|
|
51
54
|
yggdrasil/types/cast/spark_polars_cast.py,sha256=ba1UOvY1ouGCro1Np9slXmJ4TEyWnUtwVEAwxGvPLlk,8336
|
|
52
|
-
ygg-0.1.
|
|
53
|
-
ygg-0.1.
|
|
54
|
-
ygg-0.1.
|
|
55
|
-
ygg-0.1.
|
|
56
|
-
ygg-0.1.
|
|
55
|
+
ygg-0.1.31.dist-info/METADATA,sha256=6xmpGqSGuLKSCgOJkXoMJz3gHI4494h9fCT8Mx5daEQ,19204
|
|
56
|
+
ygg-0.1.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
57
|
+
ygg-0.1.31.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
|
|
58
|
+
ygg-0.1.31.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
59
|
+
ygg-0.1.31.dist-info/RECORD,,
|
|
@@ -2,11 +2,11 @@ import builtins
|
|
|
2
2
|
import dataclasses
|
|
3
3
|
import datetime as dt
|
|
4
4
|
import inspect
|
|
5
|
+
import logging
|
|
5
6
|
from dataclasses import dataclass, fields
|
|
6
7
|
from enum import Enum
|
|
7
8
|
from inspect import isclass
|
|
8
|
-
from typing import Any, Dict, List, get_type_hints,
|
|
9
|
-
import logging
|
|
9
|
+
from typing import Any, Dict, List, get_type_hints, get_origin
|
|
10
10
|
|
|
11
11
|
from ...libs.sparklib import SparkSession
|
|
12
12
|
from ...types.cast.registry import convert
|
|
@@ -308,31 +308,3 @@ class NotebookConfig:
|
|
|
308
308
|
spark_session.conf.set("spark.sql.session.timeZone", "UTC")
|
|
309
309
|
|
|
310
310
|
return cls.from_environment()
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
class ExampleEnum(Enum):
|
|
314
|
-
"""Example enum for widget demonstration"""
|
|
315
|
-
OPTION1 = "option1"
|
|
316
|
-
OPTION2 = "option2"
|
|
317
|
-
OPTION3 = "option3"
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
@dataclass
|
|
321
|
-
class CompleteNotebookConfig(NotebookConfig):
|
|
322
|
-
"""Example JobConfig with various field types to demonstrate widget handling"""
|
|
323
|
-
# Basic types
|
|
324
|
-
text_field: str
|
|
325
|
-
integer_field: int = 42
|
|
326
|
-
float_field: float = 3.14
|
|
327
|
-
boolean_field: bool = True
|
|
328
|
-
|
|
329
|
-
# Special types
|
|
330
|
-
date_field: dt.date = dt.date(2023, 1, 1)
|
|
331
|
-
datetime_field: dt.datetime = dt.datetime(2023, 1, 1, 12, 0, 0)
|
|
332
|
-
enum_field: ExampleEnum = ExampleEnum.OPTION1
|
|
333
|
-
|
|
334
|
-
# Collection types
|
|
335
|
-
list_of_strings: List[str] = None # Will be displayed as multiselect
|
|
336
|
-
|
|
337
|
-
# Optional fields
|
|
338
|
-
optional_text: Optional[str] = None
|
|
@@ -96,7 +96,7 @@ class StatementResult:
|
|
|
96
96
|
if self.is_spark_sql:
|
|
97
97
|
return self._response
|
|
98
98
|
|
|
99
|
-
if
|
|
99
|
+
if self.statement_id and not self.done and time.time() - self._response_refresh_time > delay:
|
|
100
100
|
self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
|
|
101
101
|
|
|
102
102
|
return self._response
|
|
@@ -102,6 +102,18 @@ def _split_top_level_commas(s: str):
|
|
|
102
102
|
return parts
|
|
103
103
|
|
|
104
104
|
|
|
105
|
+
def _safe_bytes(obj):
|
|
106
|
+
if not isinstance(obj, bytes):
|
|
107
|
+
if not obj:
|
|
108
|
+
return b""
|
|
109
|
+
|
|
110
|
+
if not isinstance(obj, str):
|
|
111
|
+
obj = str(obj)
|
|
112
|
+
|
|
113
|
+
return obj.encode("utf-8")
|
|
114
|
+
return obj
|
|
115
|
+
|
|
116
|
+
|
|
105
117
|
def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
|
|
106
118
|
"""
|
|
107
119
|
Adapted parser that:
|
|
@@ -170,6 +182,10 @@ def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
|
|
|
170
182
|
if isinstance(col, CatalogColumnInfo):
|
|
171
183
|
parsed = json.loads(col.type_json)
|
|
172
184
|
md = parsed.get("metadata", {}) or {}
|
|
185
|
+
md = {
|
|
186
|
+
_safe_bytes(k): _safe_bytes(v)
|
|
187
|
+
for k, v in md.items()
|
|
188
|
+
}
|
|
173
189
|
nullable = col.nullable
|
|
174
190
|
elif isinstance(col, SQLColumnInfo):
|
|
175
191
|
md = {}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
__all__ = [
|
|
2
|
+
"DatabricksFileSystem",
|
|
3
|
+
"DatabricksFileSystemHandler"
|
|
4
|
+
]
|
|
5
|
+
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Union, List, Optional
|
|
7
|
+
|
|
8
|
+
from pyarrow import PythonFile
|
|
9
|
+
from pyarrow.fs import FileSystem, FileInfo, FileSelector, PyFileSystem, FileSystemHandler
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ..workspaces.workspace import Workspace
|
|
13
|
+
from .path import DatabricksPath
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DatabricksFileSystemHandler(FileSystemHandler):
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
workspace: "Workspace",
|
|
21
|
+
):
|
|
22
|
+
super().__init__()
|
|
23
|
+
self.workspace = workspace
|
|
24
|
+
|
|
25
|
+
def __enter__(self):
|
|
26
|
+
return self.connect(clone=True)
|
|
27
|
+
|
|
28
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
29
|
+
self.workspace.__exit__(exc_type, exc_val, exc_tb)
|
|
30
|
+
|
|
31
|
+
def _parse_path(self, obj: Any) -> "DatabricksPath":
|
|
32
|
+
from .path import DatabricksPath
|
|
33
|
+
|
|
34
|
+
return DatabricksPath.parse(obj, workspace=self.workspace)
|
|
35
|
+
|
|
36
|
+
def connect(self, clone: bool = True):
|
|
37
|
+
workspace = self.connect(clone=clone)
|
|
38
|
+
|
|
39
|
+
if clone:
|
|
40
|
+
return DatabricksFileSystemHandler(
|
|
41
|
+
workspace=workspace
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
self.workspace = workspace
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
def close(self):
|
|
48
|
+
self.workspace.close()
|
|
49
|
+
|
|
50
|
+
def copy_file(self, src, dest, *, chunk_size: int = 4 * 1024 * 1024):
|
|
51
|
+
src = self._parse_path(src)
|
|
52
|
+
dest = self._parse_path(dest)
|
|
53
|
+
|
|
54
|
+
with src.open("rb") as r, dest.open("wb") as w:
|
|
55
|
+
while True:
|
|
56
|
+
chunk = r.read(chunk_size)
|
|
57
|
+
if not chunk:
|
|
58
|
+
break
|
|
59
|
+
w.write(chunk)
|
|
60
|
+
|
|
61
|
+
def create_dir(self, path, *args, recursive: bool = True, **kwargs):
|
|
62
|
+
return self._parse_path(path).mkdir(parents=recursive)
|
|
63
|
+
|
|
64
|
+
def delete_dir(self, path):
|
|
65
|
+
return self._parse_path(path).rmdir(recursive=True)
|
|
66
|
+
|
|
67
|
+
def delete_dir_contents(self, path, *args, accept_root_dir: bool = False, **kwargs):
|
|
68
|
+
return self._parse_path(path).rmdir(recursive=True)
|
|
69
|
+
|
|
70
|
+
def delete_root_dir_contents(self):
|
|
71
|
+
return self.delete_dir_contents("/", accept_root_dir=True)
|
|
72
|
+
|
|
73
|
+
def delete_file(self, path):
|
|
74
|
+
return self._parse_path(path).rmfile()
|
|
75
|
+
|
|
76
|
+
def equals(self, other: FileSystem):
|
|
77
|
+
return self == other
|
|
78
|
+
|
|
79
|
+
def from_uri(self, uri):
|
|
80
|
+
uri = self._parse_path(uri)
|
|
81
|
+
|
|
82
|
+
return self.__class__(
|
|
83
|
+
workspace=uri.workspace
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def get_file_info(
|
|
87
|
+
self,
|
|
88
|
+
paths_or_selector: Union[FileSelector, str, "DatabricksPath", List[Union[str, "DatabricksPath"]]]
|
|
89
|
+
) -> Union[FileInfo, List[FileInfo]]:
|
|
90
|
+
from .path import DatabricksPath
|
|
91
|
+
|
|
92
|
+
if isinstance(paths_or_selector, (str, DatabricksPath)):
|
|
93
|
+
result = self._parse_path(paths_or_selector).file_info
|
|
94
|
+
|
|
95
|
+
return result
|
|
96
|
+
|
|
97
|
+
if isinstance(paths_or_selector, FileSelector):
|
|
98
|
+
return self.get_file_info_selector(paths_or_selector)
|
|
99
|
+
|
|
100
|
+
return [
|
|
101
|
+
self.get_file_info(obj)
|
|
102
|
+
for obj in paths_or_selector
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
def get_file_info_selector(
|
|
106
|
+
self,
|
|
107
|
+
selector: FileSelector
|
|
108
|
+
):
|
|
109
|
+
base_dir = self._parse_path(selector.base_dir)
|
|
110
|
+
|
|
111
|
+
return [
|
|
112
|
+
p.file_info
|
|
113
|
+
for p in base_dir.ls(
|
|
114
|
+
recursive=selector.recursive,
|
|
115
|
+
allow_not_found=selector.allow_not_found
|
|
116
|
+
)
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
def get_type_name(self):
|
|
120
|
+
return "dbfs"
|
|
121
|
+
|
|
122
|
+
def move(self, src, dest):
|
|
123
|
+
src = self._parse_path(src)
|
|
124
|
+
|
|
125
|
+
src.copy_to(dest)
|
|
126
|
+
|
|
127
|
+
src.remove(recursive=True)
|
|
128
|
+
|
|
129
|
+
def normalize_path(self, path):
|
|
130
|
+
return self._parse_path(path).full_path()
|
|
131
|
+
|
|
132
|
+
def open(
|
|
133
|
+
self,
|
|
134
|
+
path,
|
|
135
|
+
mode: str = "r+",
|
|
136
|
+
encoding: Optional[str] = None,
|
|
137
|
+
):
|
|
138
|
+
return self._parse_path(path).open(mode=mode, encoding=encoding, clone=False)
|
|
139
|
+
|
|
140
|
+
def open_append_stream(self, path, compression='detect', buffer_size=None, metadata=None):
|
|
141
|
+
return self._parse_path(path).open(mode="ab")
|
|
142
|
+
|
|
143
|
+
def open_input_file(self, path, mode: str = "rb", **kwargs):
|
|
144
|
+
buf = self._parse_path(path).open(mode=mode).connect(clone=True)
|
|
145
|
+
|
|
146
|
+
return PythonFile(
|
|
147
|
+
buf,
|
|
148
|
+
mode=mode
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def open_input_stream(self, path, compression='detect', buffer_size=None):
|
|
152
|
+
return self._parse_path(path).open(mode="rb")
|
|
153
|
+
|
|
154
|
+
def open_output_stream(self, path, compression='detect', buffer_size=None, metadata=None):
|
|
155
|
+
return self._parse_path(path).open(mode="wb")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class DatabricksFileSystem(PyFileSystem):
|
|
159
|
+
|
|
160
|
+
def __init__(self, handler): # real signature unknown; restored from __doc__
|
|
161
|
+
super().__init__(handler)
|