ygg 0.1.57__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. yggdrasil/ai/__init__.py +2 -0
  4. yggdrasil/ai/session.py +89 -0
  5. yggdrasil/ai/sql_session.py +310 -0
  6. yggdrasil/databricks/__init__.py +0 -3
  7. yggdrasil/databricks/compute/cluster.py +68 -113
  8. yggdrasil/databricks/compute/command_execution.py +674 -0
  9. yggdrasil/databricks/compute/exceptions.py +19 -0
  10. yggdrasil/databricks/compute/execution_context.py +491 -282
  11. yggdrasil/databricks/compute/remote.py +4 -14
  12. yggdrasil/databricks/exceptions.py +10 -0
  13. yggdrasil/databricks/sql/__init__.py +0 -4
  14. yggdrasil/databricks/sql/engine.py +161 -173
  15. yggdrasil/databricks/sql/exceptions.py +9 -1
  16. yggdrasil/databricks/sql/statement_result.py +108 -120
  17. yggdrasil/databricks/sql/warehouse.py +331 -92
  18. yggdrasil/databricks/workspaces/io.py +89 -9
  19. yggdrasil/databricks/workspaces/path.py +120 -72
  20. yggdrasil/databricks/workspaces/workspace.py +214 -61
  21. yggdrasil/exceptions.py +7 -0
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -2
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/python_env.py +14 -13
  30. yggdrasil/pyutils/waiting_config.py +171 -0
  31. yggdrasil/types/cast/arrow_cast.py +3 -0
  32. yggdrasil/types/cast/pandas_cast.py +157 -169
  33. yggdrasil/types/cast/polars_cast.py +11 -43
  34. yggdrasil/types/dummy_class.py +81 -0
  35. yggdrasil/version.py +1 -1
  36. ygg-0.1.57.dist-info/RECORD +0 -66
  37. yggdrasil/databricks/ai/loki.py +0 -53
  38. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/WHEEL +0 -0
  39. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  40. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  41. {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
  42. /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
@@ -2,11 +2,13 @@
2
2
 
3
3
  import base64
4
4
  import io
5
+ import logging
5
6
  import time
6
7
  from abc import ABC, abstractmethod
7
8
  from threading import Thread
8
- from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union
9
+ from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any
9
10
 
11
+ import dill
10
12
  import pyarrow as pa
11
13
  import pyarrow.csv as pcsv
12
14
  import pyarrow.parquet as pq
@@ -42,6 +44,9 @@ __all__ = [
42
44
  ]
43
45
 
44
46
 
47
+ LOGGER = logging.getLogger(__name__)
48
+
49
+
45
50
  class DatabricksIO(ABC, IO):
46
51
  """File-like interface for Databricks workspace, volume, or DBFS paths."""
47
52
 
@@ -96,6 +101,15 @@ class DatabricksIO(ABC, IO):
96
101
  def __hash__(self):
97
102
  return self.path.__hash__()
98
103
 
104
+ def __str__(self):
105
+ return self.path.__str__()
106
+
107
+ def __repr__(self):
108
+ return "%s(path=%s)" % (
109
+ self.__class__.__name__,
110
+ self.path.__repr__()
111
+ )
112
+
99
113
  @classmethod
100
114
  def create_instance(
101
115
  cls,
@@ -504,7 +518,7 @@ class DatabricksIO(ABC, IO):
504
518
  return True
505
519
 
506
520
  @abstractmethod
507
- def write_all_bytes(self, data: bytes):
521
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
508
522
  """Write raw bytes to the remote path.
509
523
 
510
524
  Args:
@@ -921,10 +935,32 @@ class DatabricksIO(ABC, IO):
921
935
 
922
936
  self.write_all_bytes(data=buffer.getvalue())
923
937
 
938
+ def read_object(
939
+ self,
940
+ ):
941
+ content = self.read_all_bytes()
942
+ obj = dill.loads(content)
943
+
944
+ return obj
945
+
946
+ def write_object(
947
+ self,
948
+ obj: Any,
949
+ file_format: Optional[FileFormat] = None,
950
+ ):
951
+ buffer = io.BytesIO()
952
+
953
+ if isinstance(obj, PandasDataFrame):
954
+ obj.to_pickle(buffer)
955
+ else:
956
+ buffer.write(dill.dumps(obj))
957
+
958
+ self.write_all_bytes(data=buffer.getvalue())
924
959
 
925
960
  class DatabricksWorkspaceIO(DatabricksIO):
926
961
  """IO adapter for Workspace files."""
927
962
 
963
+ @retry(exceptions=(InternalError,))
928
964
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
929
965
  """Read bytes from a Workspace file.
930
966
 
@@ -956,11 +992,12 @@ class DatabricksWorkspaceIO(DatabricksIO):
956
992
  end = start + length
957
993
  return data[start:end]
958
994
 
959
- def write_all_bytes(self, data: bytes):
995
+ @retry(exceptions=(InternalError,))
996
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
960
997
  """Write bytes to a Workspace file.
961
998
 
962
999
  Args:
963
- data: Bytes to write.
1000
+ data: Union[bytes, IO[bytes]] to write.
964
1001
 
965
1002
  Returns:
966
1003
  The DatabricksWorkspaceIO instance.
@@ -969,6 +1006,20 @@ class DatabricksWorkspaceIO(DatabricksIO):
969
1006
  workspace_client = sdk.workspace
970
1007
  full_path = self.path.workspace_full_path()
971
1008
 
1009
+ if isinstance(data, bytes):
1010
+ bsize = len(data)
1011
+ elif isinstance(data, io.BytesIO):
1012
+ bsize = len(data.getvalue())
1013
+ else:
1014
+ bsize = None
1015
+
1016
+ LOGGER.debug(
1017
+ "Writing %s(size=%s) in %s",
1018
+ type(data),
1019
+ bsize,
1020
+ self
1021
+ )
1022
+
972
1023
  try:
973
1024
  workspace_client.upload(
974
1025
  full_path,
@@ -989,16 +1040,23 @@ class DatabricksWorkspaceIO(DatabricksIO):
989
1040
  self.path.reset_metadata(
990
1041
  is_file=True,
991
1042
  is_dir=False,
992
- size=len(data),
1043
+ size=bsize,
993
1044
  mtime=time.time()
994
1045
  )
995
1046
 
1047
+ LOGGER.info(
1048
+ "Written %s bytes in %s",
1049
+ bsize,
1050
+ self
1051
+ )
1052
+
996
1053
  return self
997
1054
 
998
1055
 
999
1056
  class DatabricksVolumeIO(DatabricksIO):
1000
1057
  """IO adapter for Unity Catalog volume files."""
1001
1058
 
1059
+ @retry(exceptions=(InternalError,))
1002
1060
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
1003
1061
  """Read bytes from a volume file.
1004
1062
 
@@ -1039,11 +1097,11 @@ class DatabricksVolumeIO(DatabricksIO):
1039
1097
  return data[start:end]
1040
1098
 
1041
1099
  @retry(exceptions=(InternalError,))
1042
- def write_all_bytes(self, data: bytes):
1100
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
1043
1101
  """Write bytes to a volume file.
1044
1102
 
1045
1103
  Args:
1046
- data: Bytes to write.
1104
+ data: Union[bytes, IO[bytes]] to write.
1047
1105
 
1048
1106
  Returns:
1049
1107
  The DatabricksVolumeIO instance.
@@ -1052,6 +1110,11 @@ class DatabricksVolumeIO(DatabricksIO):
1052
1110
  client = sdk.files
1053
1111
  full_path = self.path.files_full_path()
1054
1112
 
1113
+ LOGGER.debug(
1114
+ "Writing all bytes in %s",
1115
+ self
1116
+ )
1117
+
1055
1118
  try:
1056
1119
  client.upload(
1057
1120
  full_path,
@@ -1067,6 +1130,11 @@ class DatabricksVolumeIO(DatabricksIO):
1067
1130
  overwrite=True
1068
1131
  )
1069
1132
 
1133
+ LOGGER.info(
1134
+ "Written all bytes in %s",
1135
+ self
1136
+ )
1137
+
1070
1138
  self.path.reset_metadata(
1071
1139
  is_file=True,
1072
1140
  is_dir=False,
@@ -1080,6 +1148,7 @@ class DatabricksVolumeIO(DatabricksIO):
1080
1148
  class DatabricksDBFSIO(DatabricksIO):
1081
1149
  """IO adapter for DBFS files."""
1082
1150
 
1151
+ @retry(exceptions=(InternalError,))
1083
1152
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
1084
1153
  """Read bytes from a DBFS file.
1085
1154
 
@@ -1124,11 +1193,12 @@ class DatabricksDBFSIO(DatabricksIO):
1124
1193
 
1125
1194
  return bytes(read_bytes)
1126
1195
 
1127
- def write_all_bytes(self, data: bytes):
1196
+ @retry(exceptions=(InternalError,))
1197
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
1128
1198
  """Write bytes to a DBFS file.
1129
1199
 
1130
1200
  Args:
1131
- data: Bytes to write.
1201
+ data: Union[bytes, IO[bytes]] to write.
1132
1202
 
1133
1203
  Returns:
1134
1204
  The DatabricksDBFSIO instance.
@@ -1137,6 +1207,11 @@ class DatabricksDBFSIO(DatabricksIO):
1137
1207
  client = sdk.dbfs
1138
1208
  full_path = self.path.dbfs_full_path()
1139
1209
 
1210
+ LOGGER.debug(
1211
+ "Writing all bytes in %s",
1212
+ self
1213
+ )
1214
+
1140
1215
  try:
1141
1216
  with client.open(
1142
1217
  path=full_path,
@@ -1156,6 +1231,11 @@ class DatabricksDBFSIO(DatabricksIO):
1156
1231
  ) as f:
1157
1232
  f.write(data)
1158
1233
 
1234
+ LOGGER.info(
1235
+ "Written all bytes in %s",
1236
+ self
1237
+ )
1238
+
1159
1239
  self.path.reset_metadata(
1160
1240
  is_file=True,
1161
1241
  is_dir=False,