ygg 0.1.56__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
  2. ygg-0.1.60.dist-info/RECORD +74 -0
  3. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/WHEEL +1 -1
  4. yggdrasil/ai/__init__.py +2 -0
  5. yggdrasil/ai/session.py +89 -0
  6. yggdrasil/ai/sql_session.py +310 -0
  7. yggdrasil/databricks/__init__.py +0 -3
  8. yggdrasil/databricks/compute/cluster.py +68 -113
  9. yggdrasil/databricks/compute/command_execution.py +674 -0
  10. yggdrasil/databricks/compute/exceptions.py +7 -2
  11. yggdrasil/databricks/compute/execution_context.py +465 -277
  12. yggdrasil/databricks/compute/remote.py +4 -14
  13. yggdrasil/databricks/exceptions.py +10 -0
  14. yggdrasil/databricks/sql/__init__.py +0 -4
  15. yggdrasil/databricks/sql/engine.py +161 -173
  16. yggdrasil/databricks/sql/exceptions.py +9 -1
  17. yggdrasil/databricks/sql/statement_result.py +108 -120
  18. yggdrasil/databricks/sql/warehouse.py +331 -92
  19. yggdrasil/databricks/workspaces/io.py +92 -9
  20. yggdrasil/databricks/workspaces/path.py +120 -74
  21. yggdrasil/databricks/workspaces/workspace.py +212 -68
  22. yggdrasil/libs/databrickslib.py +23 -18
  23. yggdrasil/libs/extensions/spark_extensions.py +1 -1
  24. yggdrasil/libs/pandaslib.py +15 -6
  25. yggdrasil/libs/polarslib.py +49 -13
  26. yggdrasil/pyutils/__init__.py +1 -0
  27. yggdrasil/pyutils/callable_serde.py +12 -19
  28. yggdrasil/pyutils/exceptions.py +16 -0
  29. yggdrasil/pyutils/mimetypes.py +0 -0
  30. yggdrasil/pyutils/python_env.py +13 -12
  31. yggdrasil/pyutils/waiting_config.py +171 -0
  32. yggdrasil/types/cast/arrow_cast.py +3 -0
  33. yggdrasil/types/cast/pandas_cast.py +157 -169
  34. yggdrasil/types/cast/polars_cast.py +11 -43
  35. yggdrasil/types/dummy_class.py +81 -0
  36. yggdrasil/version.py +1 -1
  37. ygg-0.1.56.dist-info/RECORD +0 -68
  38. yggdrasil/databricks/ai/__init__.py +0 -1
  39. yggdrasil/databricks/ai/loki.py +0 -374
  40. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
  41. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
  42. {ygg-0.1.56.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
@@ -2,11 +2,13 @@
2
2
 
3
3
  import base64
4
4
  import io
5
+ import logging
5
6
  import time
6
7
  from abc import ABC, abstractmethod
7
8
  from threading import Thread
8
- from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union
9
+ from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any
9
10
 
11
+ import dill
10
12
  import pyarrow as pa
11
13
  import pyarrow.csv as pcsv
12
14
  import pyarrow.parquet as pq
@@ -20,6 +22,7 @@ from .path_kind import DatabricksPathKind
20
22
  from ...libs.databrickslib import databricks
21
23
  from ...libs.pandaslib import PandasDataFrame
22
24
  from ...libs.polarslib import polars, PolarsDataFrame
25
+ from ...pyutils import retry
23
26
  from ...types.cast.registry import convert
24
27
  from ...types.file_format import ExcelFileFormat
25
28
 
@@ -30,6 +33,7 @@ if databricks is not None:
30
33
  ResourceDoesNotExist,
31
34
  BadRequest,
32
35
  )
36
+ from databricks.sdk.errors import InternalError
33
37
 
34
38
  if TYPE_CHECKING:
35
39
  from .path import DatabricksPath
@@ -40,6 +44,9 @@ __all__ = [
40
44
  ]
41
45
 
42
46
 
47
+ LOGGER = logging.getLogger(__name__)
48
+
49
+
43
50
  class DatabricksIO(ABC, IO):
44
51
  """File-like interface for Databricks workspace, volume, or DBFS paths."""
45
52
 
@@ -94,6 +101,15 @@ class DatabricksIO(ABC, IO):
94
101
  def __hash__(self):
95
102
  return self.path.__hash__()
96
103
 
104
+ def __str__(self):
105
+ return self.path.__str__()
106
+
107
+ def __repr__(self):
108
+ return "%s(path=%s)" % (
109
+ self.__class__.__name__,
110
+ self.path.__repr__()
111
+ )
112
+
97
113
  @classmethod
98
114
  def create_instance(
99
115
  cls,
@@ -502,7 +518,7 @@ class DatabricksIO(ABC, IO):
502
518
  return True
503
519
 
504
520
  @abstractmethod
505
- def write_all_bytes(self, data: bytes):
521
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
506
522
  """Write raw bytes to the remote path.
507
523
 
508
524
  Args:
@@ -919,10 +935,32 @@ class DatabricksIO(ABC, IO):
919
935
 
920
936
  self.write_all_bytes(data=buffer.getvalue())
921
937
 
938
+ def read_object(
939
+ self,
940
+ ):
941
+ content = self.read_all_bytes()
942
+ obj = dill.loads(content)
943
+
944
+ return obj
945
+
946
+ def write_object(
947
+ self,
948
+ obj: Any,
949
+ file_format: Optional[FileFormat] = None,
950
+ ):
951
+ buffer = io.BytesIO()
952
+
953
+ if isinstance(obj, PandasDataFrame):
954
+ obj.to_pickle(buffer)
955
+ else:
956
+ buffer.write(dill.dumps(obj))
957
+
958
+ self.write_all_bytes(data=buffer.getvalue())
922
959
 
923
960
  class DatabricksWorkspaceIO(DatabricksIO):
924
961
  """IO adapter for Workspace files."""
925
962
 
963
+ @retry(exceptions=(InternalError,))
926
964
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
927
965
  """Read bytes from a Workspace file.
928
966
 
@@ -954,11 +992,12 @@ class DatabricksWorkspaceIO(DatabricksIO):
954
992
  end = start + length
955
993
  return data[start:end]
956
994
 
957
- def write_all_bytes(self, data: bytes):
995
+ @retry(exceptions=(InternalError,))
996
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
958
997
  """Write bytes to a Workspace file.
959
998
 
960
999
  Args:
961
- data: Bytes to write.
1000
+ data: Union[bytes, IO[bytes]] to write.
962
1001
 
963
1002
  Returns:
964
1003
  The DatabricksWorkspaceIO instance.
@@ -967,6 +1006,20 @@ class DatabricksWorkspaceIO(DatabricksIO):
967
1006
  workspace_client = sdk.workspace
968
1007
  full_path = self.path.workspace_full_path()
969
1008
 
1009
+ if isinstance(data, bytes):
1010
+ bsize = len(data)
1011
+ elif isinstance(data, io.BytesIO):
1012
+ bsize = len(data.getvalue())
1013
+ else:
1014
+ bsize = None
1015
+
1016
+ LOGGER.debug(
1017
+ "Writing %s(size=%s) in %s",
1018
+ type(data),
1019
+ bsize,
1020
+ self
1021
+ )
1022
+
970
1023
  try:
971
1024
  workspace_client.upload(
972
1025
  full_path,
@@ -987,16 +1040,23 @@ class DatabricksWorkspaceIO(DatabricksIO):
987
1040
  self.path.reset_metadata(
988
1041
  is_file=True,
989
1042
  is_dir=False,
990
- size=len(data),
1043
+ size=bsize,
991
1044
  mtime=time.time()
992
1045
  )
993
1046
 
1047
+ LOGGER.info(
1048
+ "Written %s bytes in %s",
1049
+ bsize,
1050
+ self
1051
+ )
1052
+
994
1053
  return self
995
1054
 
996
1055
 
997
1056
  class DatabricksVolumeIO(DatabricksIO):
998
1057
  """IO adapter for Unity Catalog volume files."""
999
1058
 
1059
+ @retry(exceptions=(InternalError,))
1000
1060
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
1001
1061
  """Read bytes from a volume file.
1002
1062
 
@@ -1036,11 +1096,12 @@ class DatabricksVolumeIO(DatabricksIO):
1036
1096
  end = start + length
1037
1097
  return data[start:end]
1038
1098
 
1039
- def write_all_bytes(self, data: bytes):
1099
+ @retry(exceptions=(InternalError,))
1100
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
1040
1101
  """Write bytes to a volume file.
1041
1102
 
1042
1103
  Args:
1043
- data: Bytes to write.
1104
+ data: Union[bytes, IO[bytes]] to write.
1044
1105
 
1045
1106
  Returns:
1046
1107
  The DatabricksVolumeIO instance.
@@ -1049,6 +1110,11 @@ class DatabricksVolumeIO(DatabricksIO):
1049
1110
  client = sdk.files
1050
1111
  full_path = self.path.files_full_path()
1051
1112
 
1113
+ LOGGER.debug(
1114
+ "Writing all bytes in %s",
1115
+ self
1116
+ )
1117
+
1052
1118
  try:
1053
1119
  client.upload(
1054
1120
  full_path,
@@ -1064,6 +1130,11 @@ class DatabricksVolumeIO(DatabricksIO):
1064
1130
  overwrite=True
1065
1131
  )
1066
1132
 
1133
+ LOGGER.info(
1134
+ "Written all bytes in %s",
1135
+ self
1136
+ )
1137
+
1067
1138
  self.path.reset_metadata(
1068
1139
  is_file=True,
1069
1140
  is_dir=False,
@@ -1077,6 +1148,7 @@ class DatabricksVolumeIO(DatabricksIO):
1077
1148
  class DatabricksDBFSIO(DatabricksIO):
1078
1149
  """IO adapter for DBFS files."""
1079
1150
 
1151
+ @retry(exceptions=(InternalError,))
1080
1152
  def read_byte_range(self, start: int, length: int, allow_not_found: bool = False) -> bytes:
1081
1153
  """Read bytes from a DBFS file.
1082
1154
 
@@ -1121,11 +1193,12 @@ class DatabricksDBFSIO(DatabricksIO):
1121
1193
 
1122
1194
  return bytes(read_bytes)
1123
1195
 
1124
- def write_all_bytes(self, data: bytes):
1196
+ @retry(exceptions=(InternalError,))
1197
+ def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
1125
1198
  """Write bytes to a DBFS file.
1126
1199
 
1127
1200
  Args:
1128
- data: Bytes to write.
1201
+ data: Union[bytes, IO[bytes]] to write.
1129
1202
 
1130
1203
  Returns:
1131
1204
  The DatabricksDBFSIO instance.
@@ -1134,6 +1207,11 @@ class DatabricksDBFSIO(DatabricksIO):
1134
1207
  client = sdk.dbfs
1135
1208
  full_path = self.path.dbfs_full_path()
1136
1209
 
1210
+ LOGGER.debug(
1211
+ "Writing all bytes in %s",
1212
+ self
1213
+ )
1214
+
1137
1215
  try:
1138
1216
  with client.open(
1139
1217
  path=full_path,
@@ -1153,6 +1231,11 @@ class DatabricksDBFSIO(DatabricksIO):
1153
1231
  ) as f:
1154
1232
  f.write(data)
1155
1233
 
1234
+ LOGGER.info(
1235
+ "Written all bytes in %s",
1236
+ self
1237
+ )
1238
+
1156
1239
  self.path.reset_metadata(
1157
1240
  is_file=True,
1158
1241
  is_dir=False,