ygg 0.1.57__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
- ygg-0.1.60.dist-info/RECORD +74 -0
- yggdrasil/ai/__init__.py +2 -0
- yggdrasil/ai/session.py +89 -0
- yggdrasil/ai/sql_session.py +310 -0
- yggdrasil/databricks/__init__.py +0 -3
- yggdrasil/databricks/compute/cluster.py +68 -113
- yggdrasil/databricks/compute/command_execution.py +674 -0
- yggdrasil/databricks/compute/exceptions.py +19 -0
- yggdrasil/databricks/compute/execution_context.py +491 -282
- yggdrasil/databricks/compute/remote.py +4 -14
- yggdrasil/databricks/exceptions.py +10 -0
- yggdrasil/databricks/sql/__init__.py +0 -4
- yggdrasil/databricks/sql/engine.py +161 -173
- yggdrasil/databricks/sql/exceptions.py +9 -1
- yggdrasil/databricks/sql/statement_result.py +108 -120
- yggdrasil/databricks/sql/warehouse.py +331 -92
- yggdrasil/databricks/workspaces/io.py +89 -9
- yggdrasil/databricks/workspaces/path.py +120 -72
- yggdrasil/databricks/workspaces/workspace.py +214 -61
- yggdrasil/exceptions.py +7 -0
- yggdrasil/libs/databrickslib.py +23 -18
- yggdrasil/libs/extensions/spark_extensions.py +1 -1
- yggdrasil/libs/pandaslib.py +15 -6
- yggdrasil/libs/polarslib.py +49 -13
- yggdrasil/pyutils/__init__.py +1 -2
- yggdrasil/pyutils/callable_serde.py +12 -19
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/python_env.py +14 -13
- yggdrasil/pyutils/waiting_config.py +171 -0
- yggdrasil/types/cast/arrow_cast.py +3 -0
- yggdrasil/types/cast/pandas_cast.py +157 -169
- yggdrasil/types/cast/polars_cast.py +11 -43
- yggdrasil/types/dummy_class.py +81 -0
- yggdrasil/version.py +1 -1
- ygg-0.1.57.dist-info/RECORD +0 -66
- yggdrasil/databricks/ai/loki.py +0 -53
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/WHEEL +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
- /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
import base64
|
|
4
4
|
import io
|
|
5
|
+
import logging
|
|
5
6
|
import time
|
|
6
7
|
from abc import ABC, abstractmethod
|
|
7
8
|
from threading import Thread
|
|
8
|
-
from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union
|
|
9
|
+
from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any
|
|
9
10
|
|
|
11
|
+
import dill
|
|
10
12
|
import pyarrow as pa
|
|
11
13
|
import pyarrow.csv as pcsv
|
|
12
14
|
import pyarrow.parquet as pq
|
|
@@ -42,6 +44,9 @@ __all__ = [
|
|
|
42
44
|
]
|
|
43
45
|
|
|
44
46
|
|
|
47
|
+
LOGGER = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
45
50
|
class DatabricksIO(ABC, IO):
|
|
46
51
|
"""File-like interface for Databricks workspace, volume, or DBFS paths."""
|
|
47
52
|
|
|
@@ -96,6 +101,15 @@ class DatabricksIO(ABC, IO):
|
|
|
96
101
|
def __hash__(self):
|
|
97
102
|
return self.path.__hash__()
|
|
98
103
|
|
|
104
|
+
def __str__(self):
|
|
105
|
+
return self.path.__str__()
|
|
106
|
+
|
|
107
|
+
def __repr__(self):
|
|
108
|
+
return "%s(path=%s)" % (
|
|
109
|
+
self.__class__.__name__,
|
|
110
|
+
self.path.__repr__()
|
|
111
|
+
)
|
|
112
|
+
|
|
99
113
|
@classmethod
|
|
100
114
|
def create_instance(
|
|
101
115
|
cls,
|
|
@@ -504,7 +518,7 @@ class DatabricksIO(ABC, IO):
|
|
|
504
518
|
return True
|
|
505
519
|
|
|
506
520
|
@abstractmethod
|
|
507
|
-
def write_all_bytes(self, data: bytes):
|
|
521
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
508
522
|
"""Write raw bytes to the remote path.
|
|
509
523
|
|
|
510
524
|
Args:
|
|
@@ -921,10 +935,32 @@ class DatabricksIO(ABC, IO):
|
|
|
921
935
|
|
|
922
936
|
self.write_all_bytes(data=buffer.getvalue())
|
|
923
937
|
|
|
938
|
+
def read_object(
|
|
939
|
+
self,
|
|
940
|
+
):
|
|
941
|
+
content = self.read_all_bytes()
|
|
942
|
+
obj = dill.loads(content)
|
|
943
|
+
|
|
944
|
+
return obj
|
|
945
|
+
|
|
946
|
+
def write_object(
|
|
947
|
+
self,
|
|
948
|
+
obj: Any,
|
|
949
|
+
file_format: Optional[FileFormat] = None,
|
|
950
|
+
):
|
|
951
|
+
buffer = io.BytesIO()
|
|
952
|
+
|
|
953
|
+
if isinstance(obj, PandasDataFrame):
|
|
954
|
+
obj.to_pickle(buffer)
|
|
955
|
+
else:
|
|
956
|
+
buffer.write(dill.dumps(obj))
|
|
957
|
+
|
|
958
|
+
self.write_all_bytes(data=buffer.getvalue())
|
|
924
959
|
|
|
925
960
|
class DatabricksWorkspaceIO(DatabricksIO):
|
|
926
961
|
"""IO adapter for Workspace files."""
|
|
927
962
|
|
|
963
|
+
@retry(exceptions=(InternalError,))
|
|
928
964
|
def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
|
|
929
965
|
"""Read bytes from a Workspace file.
|
|
930
966
|
|
|
@@ -956,11 +992,12 @@ class DatabricksWorkspaceIO(DatabricksIO):
|
|
|
956
992
|
end = start + length
|
|
957
993
|
return data[start:end]
|
|
958
994
|
|
|
959
|
-
|
|
995
|
+
@retry(exceptions=(InternalError,))
|
|
996
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
960
997
|
"""Write bytes to a Workspace file.
|
|
961
998
|
|
|
962
999
|
Args:
|
|
963
|
-
data:
|
|
1000
|
+
data: Union[bytes, IO[bytes]] to write.
|
|
964
1001
|
|
|
965
1002
|
Returns:
|
|
966
1003
|
The DatabricksWorkspaceIO instance.
|
|
@@ -969,6 +1006,20 @@ class DatabricksWorkspaceIO(DatabricksIO):
|
|
|
969
1006
|
workspace_client = sdk.workspace
|
|
970
1007
|
full_path = self.path.workspace_full_path()
|
|
971
1008
|
|
|
1009
|
+
if isinstance(data, bytes):
|
|
1010
|
+
bsize = len(data)
|
|
1011
|
+
elif isinstance(data, io.BytesIO):
|
|
1012
|
+
bsize = len(data.getvalue())
|
|
1013
|
+
else:
|
|
1014
|
+
bsize = None
|
|
1015
|
+
|
|
1016
|
+
LOGGER.debug(
|
|
1017
|
+
"Writing %s(size=%s) in %s",
|
|
1018
|
+
type(data),
|
|
1019
|
+
bsize,
|
|
1020
|
+
self
|
|
1021
|
+
)
|
|
1022
|
+
|
|
972
1023
|
try:
|
|
973
1024
|
workspace_client.upload(
|
|
974
1025
|
full_path,
|
|
@@ -989,16 +1040,23 @@ class DatabricksWorkspaceIO(DatabricksIO):
|
|
|
989
1040
|
self.path.reset_metadata(
|
|
990
1041
|
is_file=True,
|
|
991
1042
|
is_dir=False,
|
|
992
|
-
size=
|
|
1043
|
+
size=bsize,
|
|
993
1044
|
mtime=time.time()
|
|
994
1045
|
)
|
|
995
1046
|
|
|
1047
|
+
LOGGER.info(
|
|
1048
|
+
"Written %s bytes in %s",
|
|
1049
|
+
bsize,
|
|
1050
|
+
self
|
|
1051
|
+
)
|
|
1052
|
+
|
|
996
1053
|
return self
|
|
997
1054
|
|
|
998
1055
|
|
|
999
1056
|
class DatabricksVolumeIO(DatabricksIO):
|
|
1000
1057
|
"""IO adapter for Unity Catalog volume files."""
|
|
1001
1058
|
|
|
1059
|
+
@retry(exceptions=(InternalError,))
|
|
1002
1060
|
def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
|
|
1003
1061
|
"""Read bytes from a volume file.
|
|
1004
1062
|
|
|
@@ -1039,11 +1097,11 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1039
1097
|
return data[start:end]
|
|
1040
1098
|
|
|
1041
1099
|
@retry(exceptions=(InternalError,))
|
|
1042
|
-
def write_all_bytes(self, data: bytes):
|
|
1100
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
1043
1101
|
"""Write bytes to a volume file.
|
|
1044
1102
|
|
|
1045
1103
|
Args:
|
|
1046
|
-
data:
|
|
1104
|
+
data: Union[bytes, IO[bytes]] to write.
|
|
1047
1105
|
|
|
1048
1106
|
Returns:
|
|
1049
1107
|
The DatabricksVolumeIO instance.
|
|
@@ -1052,6 +1110,11 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1052
1110
|
client = sdk.files
|
|
1053
1111
|
full_path = self.path.files_full_path()
|
|
1054
1112
|
|
|
1113
|
+
LOGGER.debug(
|
|
1114
|
+
"Writing all bytes in %s",
|
|
1115
|
+
self
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1055
1118
|
try:
|
|
1056
1119
|
client.upload(
|
|
1057
1120
|
full_path,
|
|
@@ -1067,6 +1130,11 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1067
1130
|
overwrite=True
|
|
1068
1131
|
)
|
|
1069
1132
|
|
|
1133
|
+
LOGGER.info(
|
|
1134
|
+
"Written all bytes in %s",
|
|
1135
|
+
self
|
|
1136
|
+
)
|
|
1137
|
+
|
|
1070
1138
|
self.path.reset_metadata(
|
|
1071
1139
|
is_file=True,
|
|
1072
1140
|
is_dir=False,
|
|
@@ -1080,6 +1148,7 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1080
1148
|
class DatabricksDBFSIO(DatabricksIO):
|
|
1081
1149
|
"""IO adapter for DBFS files."""
|
|
1082
1150
|
|
|
1151
|
+
@retry(exceptions=(InternalError,))
|
|
1083
1152
|
def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
|
|
1084
1153
|
"""Read bytes from a DBFS file.
|
|
1085
1154
|
|
|
@@ -1124,11 +1193,12 @@ class DatabricksDBFSIO(DatabricksIO):
|
|
|
1124
1193
|
|
|
1125
1194
|
return bytes(read_bytes)
|
|
1126
1195
|
|
|
1127
|
-
|
|
1196
|
+
@retry(exceptions=(InternalError,))
|
|
1197
|
+
def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
|
|
1128
1198
|
"""Write bytes to a DBFS file.
|
|
1129
1199
|
|
|
1130
1200
|
Args:
|
|
1131
|
-
data:
|
|
1201
|
+
data: Union[bytes, IO[bytes]] to write.
|
|
1132
1202
|
|
|
1133
1203
|
Returns:
|
|
1134
1204
|
The DatabricksDBFSIO instance.
|
|
@@ -1137,6 +1207,11 @@ class DatabricksDBFSIO(DatabricksIO):
|
|
|
1137
1207
|
client = sdk.dbfs
|
|
1138
1208
|
full_path = self.path.dbfs_full_path()
|
|
1139
1209
|
|
|
1210
|
+
LOGGER.debug(
|
|
1211
|
+
"Writing all bytes in %s",
|
|
1212
|
+
self
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1140
1215
|
try:
|
|
1141
1216
|
with client.open(
|
|
1142
1217
|
path=full_path,
|
|
@@ -1156,6 +1231,11 @@ class DatabricksDBFSIO(DatabricksIO):
|
|
|
1156
1231
|
) as f:
|
|
1157
1232
|
f.write(data)
|
|
1158
1233
|
|
|
1234
|
+
LOGGER.info(
|
|
1235
|
+
"Written all bytes in %s",
|
|
1236
|
+
self
|
|
1237
|
+
)
|
|
1238
|
+
|
|
1159
1239
|
self.path.reset_metadata(
|
|
1160
1240
|
is_file=True,
|
|
1161
1241
|
is_dir=False,
|