ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
- ygg-0.1.60.dist-info/RECORD +74 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
- yggdrasil/ai/__init__.py +2 -0
- yggdrasil/ai/session.py +89 -0
- yggdrasil/ai/sql_session.py +310 -0
- yggdrasil/databricks/__init__.py +0 -3
- yggdrasil/databricks/compute/cluster.py +68 -113
- yggdrasil/databricks/compute/command_execution.py +674 -0
- yggdrasil/databricks/compute/exceptions.py +7 -2
- yggdrasil/databricks/compute/execution_context.py +465 -277
- yggdrasil/databricks/compute/remote.py +4 -14
- yggdrasil/databricks/exceptions.py +10 -0
- yggdrasil/databricks/sql/__init__.py +0 -4
- yggdrasil/databricks/sql/engine.py +161 -173
- yggdrasil/databricks/sql/exceptions.py +9 -1
- yggdrasil/databricks/sql/statement_result.py +108 -120
- yggdrasil/databricks/sql/warehouse.py +331 -92
- yggdrasil/databricks/workspaces/io.py +92 -9
- yggdrasil/databricks/workspaces/path.py +120 -74
- yggdrasil/databricks/workspaces/workspace.py +212 -68
- yggdrasil/libs/databrickslib.py +23 -18
- yggdrasil/libs/extensions/spark_extensions.py +1 -1
- yggdrasil/libs/pandaslib.py +15 -6
- yggdrasil/libs/polarslib.py +49 -13
- yggdrasil/pyutils/__init__.py +1 -0
- yggdrasil/pyutils/callable_serde.py +12 -19
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/mimetypes.py +0 -0
- yggdrasil/pyutils/python_env.py +13 -12
- yggdrasil/pyutils/waiting_config.py +171 -0
- yggdrasil/types/cast/arrow_cast.py +3 -0
- yggdrasil/types/cast/pandas_cast.py +157 -169
- yggdrasil/types/cast/polars_cast.py +11 -43
- yggdrasil/types/dummy_class.py +81 -0
- yggdrasil/version.py +1 -1
- ygg-0.1.56.dist-info/RECORD +0 -68
- yggdrasil/databricks/ai/__init__.py +0 -1
- yggdrasil/databricks/ai/loki.py +0 -374
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
4
|
import io
|
|
5
|
+
import logging
|
|
5
6
|
import time
|
|
6
7
|
from abc import ABC, abstractmethod
|
|
7
8
|
from threading import Thread
|
|
8
|
-
from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union
|
|
9
|
+
from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any
|
|
9
10
|
|
|
11
|
+
import dill
|
|
10
12
|
import pyarrow as pa
|
|
11
13
|
import pyarrow.csv as pcsv
|
|
12
14
|
import pyarrow.parquet as pq
|
|
@@ -20,6 +22,7 @@ from .path_kind import DatabricksPathKind
|
|
|
20
22
|
from ...libs.databrickslib import databricks
|
|
21
23
|
from ...libs.pandaslib import PandasDataFrame
|
|
22
24
|
from ...libs.polarslib import polars, PolarsDataFrame
|
|
25
|
+
from ...pyutils import retry
|
|
23
26
|
from ...types.cast.registry import convert
|
|
24
27
|
from ...types.file_format import ExcelFileFormat
|
|
25
28
|
|
|
@@ -30,6 +33,7 @@ if databricks is not None:
|
|
|
30
33
|
ResourceDoesNotExist,
|
|
31
34
|
BadRequest,
|
|
32
35
|
)
|
|
36
|
+
from databricks.sdk.errors import InternalError
|
|
33
37
|
|
|
34
38
|
if TYPE_CHECKING:
|
|
35
39
|
from .path import DatabricksPath
|
|
@@ -40,6 +44,9 @@ __all__ = [
|
|
|
40
44
|
]
|
|
41
45
|
|
|
42
46
|
|
|
47
|
+
LOGGER = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
43
50
|
class DatabricksIO(ABC, IO):
|
|
44
51
|
"""File-like interface for Databricks workspace, volume, or DBFS paths."""
|
|
45
52
|
|
|
@@ -94,6 +101,15 @@ class DatabricksIO(ABC, IO):
|
|
|
94
101
|
def __hash__(self):
|
|
95
102
|
return self.path.__hash__()
|
|
96
103
|
|
|
104
|
+
def __str__(self):
|
|
105
|
+
return self.path.__str__()
|
|
106
|
+
|
|
107
|
+
def __repr__(self):
|
|
108
|
+
return "%s(path=%s)" % (
|
|
109
|
+
self.__class__.__name__,
|
|
110
|
+
self.path.__repr__()
|
|
111
|
+
)
|
|
112
|
+
|
|
97
113
|
@classmethod
|
|
98
114
|
def create_instance(
|
|
99
115
|
cls,
|
|
@@ -502,7 +518,7 @@ class DatabricksIO(ABC, IO):
|
|
|
502
518
|
return True
|
|
503
519
|
|
|
504
520
|
@abstractmethod
|
|
505
|
-
def write_all_bytes(self, data: bytes):
|
|
521
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
506
522
|
"""Write raw bytes to the remote path.
|
|
507
523
|
|
|
508
524
|
Args:
|
|
@@ -919,10 +935,32 @@ class DatabricksIO(ABC, IO):
|
|
|
919
935
|
|
|
920
936
|
self.write_all_bytes(data=buffer.getvalue())
|
|
921
937
|
|
|
938
|
+
def read_object(
|
|
939
|
+
self,
|
|
940
|
+
):
|
|
941
|
+
content = self.read_all_bytes()
|
|
942
|
+
obj = dill.loads(content)
|
|
943
|
+
|
|
944
|
+
return obj
|
|
945
|
+
|
|
946
|
+
def write_object(
|
|
947
|
+
self,
|
|
948
|
+
obj: Any,
|
|
949
|
+
file_format: Optional[FileFormat] = None,
|
|
950
|
+
):
|
|
951
|
+
buffer = io.BytesIO()
|
|
952
|
+
|
|
953
|
+
if isinstance(obj, PandasDataFrame):
|
|
954
|
+
obj.to_pickle(buffer)
|
|
955
|
+
else:
|
|
956
|
+
buffer.write(dill.dumps(obj))
|
|
957
|
+
|
|
958
|
+
self.write_all_bytes(data=buffer.getvalue())
|
|
922
959
|
|
|
923
960
|
class DatabricksWorkspaceIO(DatabricksIO):
|
|
924
961
|
"""IO adapter for Workspace files."""
|
|
925
962
|
|
|
963
|
+
@retry(exceptions=(InternalError,))
|
|
926
964
|
def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
|
|
927
965
|
"""Read bytes from a Workspace file.
|
|
928
966
|
|
|
@@ -954,11 +992,12 @@ class DatabricksWorkspaceIO(DatabricksIO):
|
|
|
954
992
|
end = start + length
|
|
955
993
|
return data[start:end]
|
|
956
994
|
|
|
957
|
-
|
|
995
|
+
@retry(exceptions=(InternalError,))
|
|
996
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
958
997
|
"""Write bytes to a Workspace file.
|
|
959
998
|
|
|
960
999
|
Args:
|
|
961
|
-
data:
|
|
1000
|
+
data: Union[bytes, IO[bytes]] to write.
|
|
962
1001
|
|
|
963
1002
|
Returns:
|
|
964
1003
|
The DatabricksWorkspaceIO instance.
|
|
@@ -967,6 +1006,20 @@ class DatabricksWorkspaceIO(DatabricksIO):
|
|
|
967
1006
|
workspace_client = sdk.workspace
|
|
968
1007
|
full_path = self.path.workspace_full_path()
|
|
969
1008
|
|
|
1009
|
+
if isinstance(data, bytes):
|
|
1010
|
+
bsize = len(data)
|
|
1011
|
+
elif isinstance(data, io.BytesIO):
|
|
1012
|
+
bsize = len(data.getvalue())
|
|
1013
|
+
else:
|
|
1014
|
+
bsize = None
|
|
1015
|
+
|
|
1016
|
+
LOGGER.debug(
|
|
1017
|
+
"Writing %s(size=%s) in %s",
|
|
1018
|
+
type(data),
|
|
1019
|
+
bsize,
|
|
1020
|
+
self
|
|
1021
|
+
)
|
|
1022
|
+
|
|
970
1023
|
try:
|
|
971
1024
|
workspace_client.upload(
|
|
972
1025
|
full_path,
|
|
@@ -987,16 +1040,23 @@ class DatabricksWorkspaceIO(DatabricksIO):
|
|
|
987
1040
|
self.path.reset_metadata(
|
|
988
1041
|
is_file=True,
|
|
989
1042
|
is_dir=False,
|
|
990
|
-
size=
|
|
1043
|
+
size=bsize,
|
|
991
1044
|
mtime=time.time()
|
|
992
1045
|
)
|
|
993
1046
|
|
|
1047
|
+
LOGGER.info(
|
|
1048
|
+
"Written %s bytes in %s",
|
|
1049
|
+
bsize,
|
|
1050
|
+
self
|
|
1051
|
+
)
|
|
1052
|
+
|
|
994
1053
|
return self
|
|
995
1054
|
|
|
996
1055
|
|
|
997
1056
|
class DatabricksVolumeIO(DatabricksIO):
|
|
998
1057
|
"""IO adapter for Unity Catalog volume files."""
|
|
999
1058
|
|
|
1059
|
+
@retry(exceptions=(InternalError,))
|
|
1000
1060
|
def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
|
|
1001
1061
|
"""Read bytes from a volume file.
|
|
1002
1062
|
|
|
@@ -1036,11 +1096,12 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1036
1096
|
end = start + length
|
|
1037
1097
|
return data[start:end]
|
|
1038
1098
|
|
|
1039
|
-
|
|
1099
|
+
@retry(exceptions=(InternalError,))
|
|
1100
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
1040
1101
|
"""Write bytes to a volume file.
|
|
1041
1102
|
|
|
1042
1103
|
Args:
|
|
1043
|
-
data:
|
|
1104
|
+
data: Union[bytes, IO[bytes]] to write.
|
|
1044
1105
|
|
|
1045
1106
|
Returns:
|
|
1046
1107
|
The DatabricksVolumeIO instance.
|
|
@@ -1049,6 +1110,11 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1049
1110
|
client = sdk.files
|
|
1050
1111
|
full_path = self.path.files_full_path()
|
|
1051
1112
|
|
|
1113
|
+
LOGGER.debug(
|
|
1114
|
+
"Writing all bytes in %s",
|
|
1115
|
+
self
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1052
1118
|
try:
|
|
1053
1119
|
client.upload(
|
|
1054
1120
|
full_path,
|
|
@@ -1064,6 +1130,11 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1064
1130
|
overwrite=True
|
|
1065
1131
|
)
|
|
1066
1132
|
|
|
1133
|
+
LOGGER.info(
|
|
1134
|
+
"Written all bytes in %s",
|
|
1135
|
+
self
|
|
1136
|
+
)
|
|
1137
|
+
|
|
1067
1138
|
self.path.reset_metadata(
|
|
1068
1139
|
is_file=True,
|
|
1069
1140
|
is_dir=False,
|
|
@@ -1077,6 +1148,7 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1077
1148
|
class DatabricksDBFSIO(DatabricksIO):
|
|
1078
1149
|
"""IO adapter for DBFS files."""
|
|
1079
1150
|
|
|
1151
|
+
@retry(exceptions=(InternalError,))
|
|
1080
1152
|
def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
|
|
1081
1153
|
"""Read bytes from a DBFS file.
|
|
1082
1154
|
|
|
@@ -1121,11 +1193,12 @@ class DatabricksDBFSIO(DatabricksIO):
|
|
|
1121
1193
|
|
|
1122
1194
|
return bytes(read_bytes)
|
|
1123
1195
|
|
|
1124
|
-
|
|
1196
|
+
@retry(exceptions=(InternalError,))
|
|
1197
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
1125
1198
|
"""Write bytes to a DBFS file.
|
|
1126
1199
|
|
|
1127
1200
|
Args:
|
|
1128
|
-
data:
|
|
1201
|
+
data: Union[bytes, IO[bytes]] to write.
|
|
1129
1202
|
|
|
1130
1203
|
Returns:
|
|
1131
1204
|
The DatabricksDBFSIO instance.
|
|
@@ -1134,6 +1207,11 @@ class DatabricksDBFSIO(DatabricksIO):
|
|
|
1134
1207
|
client = sdk.dbfs
|
|
1135
1208
|
full_path = self.path.dbfs_full_path()
|
|
1136
1209
|
|
|
1210
|
+
LOGGER.debug(
|
|
1211
|
+
"Writing all bytes in %s",
|
|
1212
|
+
self
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1137
1215
|
try:
|
|
1138
1216
|
with client.open(
|
|
1139
1217
|
path=full_path,
|
|
@@ -1153,6 +1231,11 @@ class DatabricksDBFSIO(DatabricksIO):
|
|
|
1153
1231
|
) as f:
|
|
1154
1232
|
f.write(data)
|
|
1155
1233
|
|
|
1234
|
+
LOGGER.info(
|
|
1235
|
+
"Written all bytes in %s",
|
|
1236
|
+
self
|
|
1237
|
+
)
|
|
1238
|
+
|
|
1156
1239
|
self.path.reset_metadata(
|
|
1157
1240
|
is_file=True,
|
|
1158
1241
|
is_dir=False,
|