snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +150 -25
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +48 -4
- snowflake/snowpark_connect/expression/map_extension.py +25 -5
- snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
- snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
- snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
- snowflake/snowpark_connect/relation/map_extension.py +28 -8
- snowflake/snowpark_connect/relation/map_join.py +21 -10
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +91 -24
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +131 -34
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +72 -19
- snowflake/snowpark_connect/type_mapping.py +54 -17
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +5 -1
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +78 -77
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
|
@@ -11,6 +11,8 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
11
11
|
)
|
|
12
12
|
from snowflake.snowpark.types import StructType
|
|
13
13
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
15
17
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
16
18
|
from snowflake.snowpark_connect.utils.context import get_outer_dataframes
|
|
@@ -152,9 +154,11 @@ def map_unresolved_star(
|
|
|
152
154
|
typed_column.set_multi_col_qualifiers(column_mapping.get_qualifiers())
|
|
153
155
|
return spark_names, typed_column
|
|
154
156
|
|
|
155
|
-
|
|
157
|
+
exception = AnalysisException(
|
|
156
158
|
f"[UNRESOLVED_STAR] The unresolved star expression {exp} is not supported."
|
|
157
159
|
)
|
|
160
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
161
|
+
raise exception
|
|
158
162
|
|
|
159
163
|
|
|
160
164
|
def map_unresolved_star_struct(
|
|
@@ -8,6 +8,8 @@ from pyspark.errors.exceptions.base import AnalysisException
|
|
|
8
8
|
import snowflake.snowpark.functions as snowpark_fn
|
|
9
9
|
from snowflake.snowpark.types import DataType, StringType, StructField, StructType
|
|
10
10
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
11
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
12
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
13
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
12
14
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
13
15
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
@@ -39,9 +41,11 @@ def update_field_in_schema(
|
|
|
39
41
|
field.name, updated_subschema, field.nullable, _is_column=False
|
|
40
42
|
)
|
|
41
43
|
else:
|
|
42
|
-
|
|
44
|
+
exception = AnalysisException(
|
|
43
45
|
message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}` in `{field}`"
|
|
44
46
|
)
|
|
47
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
48
|
+
raise exception
|
|
45
49
|
field_updated = True
|
|
46
50
|
else:
|
|
47
51
|
new_field = field # leave unchanged
|
|
@@ -59,9 +63,11 @@ def update_field_in_schema(
|
|
|
59
63
|
# if the value type is None that means we want to drop the field and spark does not throw an error if the field does not exists
|
|
60
64
|
# but if the value type is not None, it means we should add or update this field which has already been covered above
|
|
61
65
|
# if we reach this code, it means the field should have existed
|
|
62
|
-
|
|
66
|
+
exception = AnalysisException(
|
|
63
67
|
message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}`"
|
|
64
68
|
)
|
|
69
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
70
|
+
raise exception
|
|
65
71
|
return StructType(new_fields)
|
|
66
72
|
|
|
67
73
|
|
|
@@ -99,9 +105,11 @@ def map_update_fields(
|
|
|
99
105
|
)
|
|
100
106
|
|
|
101
107
|
if not isinstance(struct_typed_column.typ, StructType):
|
|
102
|
-
|
|
108
|
+
exception = AnalysisException(
|
|
103
109
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "update_fields({struct_name}, ...)" due to data type mismatch: Parameter 1 requires the "STRUCT" type'
|
|
104
110
|
)
|
|
111
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
112
|
+
raise exception
|
|
105
113
|
|
|
106
114
|
final_schema = struct_typed_column.typ
|
|
107
115
|
value_column_list = []
|
|
@@ -137,9 +145,11 @@ def map_update_fields(
|
|
|
137
145
|
final_name = f"update_fields({struct_name}, {update_operations_str})"
|
|
138
146
|
|
|
139
147
|
if len(final_schema.fields) == 0:
|
|
140
|
-
|
|
148
|
+
exception = AnalysisException(
|
|
141
149
|
f'[DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "{final_name}" due to data type mismatch: Cannot drop all fields in struct.'
|
|
142
150
|
)
|
|
151
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
152
|
+
raise exception
|
|
143
153
|
|
|
144
154
|
@snowpark_fn.udf(
|
|
145
155
|
input_types=input_types_to_the_udf,
|
|
@@ -6,7 +6,11 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
|
6
6
|
|
|
7
7
|
from snowflake import snowpark
|
|
8
8
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
9
|
-
from snowflake.snowpark_connect.error.
|
|
9
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
10
|
+
from snowflake.snowpark_connect.error.error_utils import (
|
|
11
|
+
SparkException,
|
|
12
|
+
attach_custom_error_code,
|
|
13
|
+
)
|
|
10
14
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
11
15
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
12
16
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
@@ -29,6 +33,8 @@ SPARK_RANKING_FUNCTIONS = frozenset(
|
|
|
29
33
|
]
|
|
30
34
|
)
|
|
31
35
|
|
|
36
|
+
RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS = frozenset(["percent_rank"])
|
|
37
|
+
|
|
32
38
|
CAPITAL_FUNCTION_NAMES = frozenset(["rank()", "dense_rank()", "percent_rank()"])
|
|
33
39
|
|
|
34
40
|
|
|
@@ -128,6 +134,11 @@ def map_window_function(
|
|
|
128
134
|
case expressions_proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW:
|
|
129
135
|
frame_name.append("ROWS BETWEEN")
|
|
130
136
|
frame_type_func_string = "rows_between"
|
|
137
|
+
if proto_func_name in RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS:
|
|
138
|
+
# Seems like Snowflake and Spark have different understanding of some functions. For those,
|
|
139
|
+
# Spark only allows rows_between while Snowflake only allows range_between. To be compatible
|
|
140
|
+
# with Spark, we have to use range_between here.
|
|
141
|
+
frame_type_func_string = "range_between"
|
|
131
142
|
lower_name, lower = parse_frame_boundary(
|
|
132
143
|
exp.window.frame_spec.lower, is_upper=False
|
|
133
144
|
)
|
|
@@ -138,9 +149,11 @@ def map_window_function(
|
|
|
138
149
|
lower != snowpark.Window.UNBOUNDED_PRECEDING
|
|
139
150
|
or upper != snowpark.Window.CURRENT_ROW
|
|
140
151
|
):
|
|
141
|
-
|
|
152
|
+
exception = SparkException.invalid_ranking_function_window_frame(
|
|
142
153
|
window_frame=f"specifiedwindowframe(RowFrame, {lower_name}, {upper_name})"
|
|
143
154
|
)
|
|
155
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
156
|
+
raise exception
|
|
144
157
|
|
|
145
158
|
is_unbounded = (
|
|
146
159
|
lower == snowpark.Window.UNBOUNDED_PRECEDING
|
|
@@ -165,9 +178,11 @@ def map_window_function(
|
|
|
165
178
|
orders = orders[:1]
|
|
166
179
|
|
|
167
180
|
if proto_func_name in SPARK_RANKING_FUNCTIONS:
|
|
168
|
-
|
|
181
|
+
exception = SparkException.invalid_ranking_function_window_frame(
|
|
169
182
|
window_frame=f"specifiedwindowframe(RangeFrame, {lower_name}, {upper_name})"
|
|
170
183
|
)
|
|
184
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
185
|
+
raise exception
|
|
171
186
|
|
|
172
187
|
is_unbounded = (
|
|
173
188
|
lower == snowpark.Window.UNBOUNDED_PRECEDING
|
|
@@ -11,6 +11,8 @@ import pyspark.sql.connect.proto.types_pb2 as types_proto
|
|
|
11
11
|
|
|
12
12
|
from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
|
|
13
13
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.error.exceptions import MissingDatabase, MissingSchema
|
|
15
17
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
16
18
|
split_fully_qualified_spark_name,
|
|
@@ -39,28 +41,40 @@ class AbstractSparkCatalog(ABC):
|
|
|
39
41
|
description: str,
|
|
40
42
|
**options: typing.Any,
|
|
41
43
|
) -> DataFrameContainer:
|
|
42
|
-
|
|
44
|
+
exception = SnowparkConnectNotImplementedError("createTable is not implemented")
|
|
45
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
46
|
+
raise exception
|
|
43
47
|
|
|
44
48
|
@abstractmethod
|
|
45
49
|
def listDatabases(
|
|
46
50
|
self,
|
|
47
51
|
pattern: str | None = None,
|
|
48
52
|
) -> pandas.DataFrame:
|
|
49
|
-
|
|
53
|
+
exception = SnowparkConnectNotImplementedError(
|
|
54
|
+
"listDatabases is not implemented"
|
|
55
|
+
)
|
|
56
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
57
|
+
raise exception
|
|
50
58
|
|
|
51
59
|
@abstractmethod
|
|
52
60
|
def getDatabase(
|
|
53
61
|
self,
|
|
54
62
|
spark_dbName: str,
|
|
55
63
|
) -> pandas.DataFrame:
|
|
56
|
-
|
|
64
|
+
exception = SnowparkConnectNotImplementedError("getDatabase is not implemented")
|
|
65
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
66
|
+
raise exception
|
|
57
67
|
|
|
58
68
|
@abstractmethod
|
|
59
69
|
def databaseExists(
|
|
60
70
|
self,
|
|
61
71
|
spark_dbName: str,
|
|
62
72
|
) -> pandas.DataFrame:
|
|
63
|
-
|
|
73
|
+
exception = SnowparkConnectNotImplementedError(
|
|
74
|
+
"databaseExists is not implemented"
|
|
75
|
+
)
|
|
76
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
77
|
+
raise exception
|
|
64
78
|
|
|
65
79
|
@abstractmethod
|
|
66
80
|
def listTables(
|
|
@@ -68,14 +82,18 @@ class AbstractSparkCatalog(ABC):
|
|
|
68
82
|
spark_dbName: str | None = None,
|
|
69
83
|
pattern: str | None = None,
|
|
70
84
|
) -> pandas.DataFrame:
|
|
71
|
-
|
|
85
|
+
exception = SnowparkConnectNotImplementedError("listTables is not implemented")
|
|
86
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
87
|
+
raise exception
|
|
72
88
|
|
|
73
89
|
@abstractmethod
|
|
74
90
|
def getTable(
|
|
75
91
|
self,
|
|
76
92
|
spark_tableName: str,
|
|
77
93
|
) -> pandas.DataFrame:
|
|
78
|
-
|
|
94
|
+
exception = SnowparkConnectNotImplementedError("getTable is not implemented")
|
|
95
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
96
|
+
raise exception
|
|
79
97
|
|
|
80
98
|
@abstractmethod
|
|
81
99
|
def tableExists(
|
|
@@ -83,7 +101,9 @@ class AbstractSparkCatalog(ABC):
|
|
|
83
101
|
spark_tableName: str,
|
|
84
102
|
spark_dbName: str | None,
|
|
85
103
|
) -> pandas.DataFrame:
|
|
86
|
-
|
|
104
|
+
exception = SnowparkConnectNotImplementedError("tableExists is not implemented")
|
|
105
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
106
|
+
raise exception
|
|
87
107
|
|
|
88
108
|
@abstractmethod
|
|
89
109
|
def listColumns(
|
|
@@ -91,36 +111,50 @@ class AbstractSparkCatalog(ABC):
|
|
|
91
111
|
spark_tableName: str,
|
|
92
112
|
spark_dbName: str | None = None,
|
|
93
113
|
) -> pandas.DataFrame:
|
|
94
|
-
|
|
114
|
+
exception = SnowparkConnectNotImplementedError("listColumns is not implemented")
|
|
115
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
116
|
+
raise exception
|
|
95
117
|
|
|
96
118
|
@abstractmethod
|
|
97
119
|
def currentDatabase(self) -> pandas.DataFrame:
|
|
98
|
-
|
|
120
|
+
exception = SnowparkConnectNotImplementedError(
|
|
121
|
+
"currentDatabase is not implemented"
|
|
122
|
+
)
|
|
123
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
124
|
+
raise exception
|
|
99
125
|
|
|
100
126
|
@abstractmethod
|
|
101
127
|
def setCurrentDatabase(
|
|
102
128
|
self,
|
|
103
129
|
spark_dbName: str,
|
|
104
130
|
) -> pandas.DataFrame:
|
|
105
|
-
|
|
131
|
+
exception = SnowparkConnectNotImplementedError(
|
|
106
132
|
"setCurrentDatabase is not implemented"
|
|
107
133
|
)
|
|
134
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
135
|
+
raise exception
|
|
108
136
|
|
|
109
137
|
@abstractmethod
|
|
110
138
|
def dropGlobalTempView(
|
|
111
139
|
self,
|
|
112
140
|
spark_view_name: str,
|
|
113
141
|
) -> DataFrameContainer:
|
|
114
|
-
|
|
142
|
+
exception = SnowparkConnectNotImplementedError(
|
|
115
143
|
"dropGlobalTempView is not implemented"
|
|
116
144
|
)
|
|
145
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
146
|
+
raise exception
|
|
117
147
|
|
|
118
148
|
@abstractmethod
|
|
119
149
|
def dropTempView(
|
|
120
150
|
self,
|
|
121
151
|
spark_view_name: str,
|
|
122
152
|
) -> DataFrameContainer:
|
|
123
|
-
|
|
153
|
+
exception = SnowparkConnectNotImplementedError(
|
|
154
|
+
"dropTempView is not implemented"
|
|
155
|
+
)
|
|
156
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
157
|
+
raise exception
|
|
124
158
|
|
|
125
159
|
def cacheTable(
|
|
126
160
|
self,
|
|
@@ -135,9 +169,11 @@ class AbstractSparkCatalog(ABC):
|
|
|
135
169
|
spark_tableName
|
|
136
170
|
)
|
|
137
171
|
if catalog is not None and self != catalog:
|
|
138
|
-
|
|
172
|
+
exception = SnowparkConnectNotImplementedError(
|
|
139
173
|
"Calling into another catalog is not currently supported"
|
|
140
174
|
)
|
|
175
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
176
|
+
raise exception
|
|
141
177
|
if sf_database is None:
|
|
142
178
|
sf_database = _get_current_snowflake_database()
|
|
143
179
|
if sf_schema is None:
|
|
@@ -168,9 +204,11 @@ class AbstractSparkCatalog(ABC):
|
|
|
168
204
|
spark_tableName
|
|
169
205
|
)
|
|
170
206
|
if catalog is not None and self != catalog:
|
|
171
|
-
|
|
207
|
+
exception = SnowparkConnectNotImplementedError(
|
|
172
208
|
"Calling into another catalog is not currently supported"
|
|
173
209
|
)
|
|
210
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
211
|
+
raise exception
|
|
174
212
|
if sf_database is None:
|
|
175
213
|
sf_database = _get_current_snowflake_database()
|
|
176
214
|
if sf_schema is None:
|
|
@@ -194,9 +232,11 @@ class AbstractSparkCatalog(ABC):
|
|
|
194
232
|
spark_tableName
|
|
195
233
|
)
|
|
196
234
|
if catalog is not None and self != catalog:
|
|
197
|
-
|
|
235
|
+
exception = SnowparkConnectNotImplementedError(
|
|
198
236
|
"Calling into another catalog is not currently supported"
|
|
199
237
|
)
|
|
238
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
239
|
+
raise exception
|
|
200
240
|
if sf_database is None:
|
|
201
241
|
sf_database = _get_current_snowflake_database()
|
|
202
242
|
if sf_schema is None:
|
|
@@ -249,7 +289,11 @@ def _process_multi_layer_database(
|
|
|
249
289
|
else:
|
|
250
290
|
return None, c, d
|
|
251
291
|
case _:
|
|
252
|
-
|
|
292
|
+
exception = ValueError(
|
|
293
|
+
f"Unexpected database identifier format: {spark_mli}"
|
|
294
|
+
)
|
|
295
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
296
|
+
raise exception
|
|
253
297
|
|
|
254
298
|
|
|
255
299
|
def _process_multi_layer_identifier(
|
|
@@ -283,5 +327,9 @@ def _process_multi_layer_identifier(
|
|
|
283
327
|
else:
|
|
284
328
|
snowflake_database, snowflake_schema, snowflake_obj = d, s, t
|
|
285
329
|
case _:
|
|
286
|
-
|
|
330
|
+
exception = ValueError(
|
|
331
|
+
f"Unexpected table/view identifier format: {spark_mli}"
|
|
332
|
+
)
|
|
333
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
334
|
+
raise exception
|
|
287
335
|
return spark_catalog, snowflake_database, snowflake_schema, snowflake_obj
|
|
@@ -24,6 +24,8 @@ from snowflake.snowpark_connect.config import (
|
|
|
24
24
|
global_config,
|
|
25
25
|
)
|
|
26
26
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
27
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
28
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
27
29
|
from snowflake.snowpark_connect.error.exceptions import MaxRetryExceeded
|
|
28
30
|
from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
|
|
29
31
|
AbstractSparkCatalog,
|
|
@@ -109,9 +111,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
109
111
|
catalog, sf_database, sf_schema = _process_multi_layer_database(pattern)
|
|
110
112
|
sf_schema = sf_schema.replace("*", ".*")
|
|
111
113
|
if catalog is not None and self != catalog:
|
|
112
|
-
|
|
114
|
+
exception = SnowparkConnectNotImplementedError(
|
|
113
115
|
"Calling into another catalog is not currently supported"
|
|
114
116
|
)
|
|
117
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
118
|
+
raise exception
|
|
115
119
|
sp_catalog = get_or_create_snowpark_session().catalog
|
|
116
120
|
|
|
117
121
|
dbs: list[Schema] | None = None
|
|
@@ -131,7 +135,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
131
135
|
)
|
|
132
136
|
if dbs is None:
|
|
133
137
|
raise MaxRetryExceeded(
|
|
134
|
-
f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts"
|
|
138
|
+
f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts",
|
|
139
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
135
140
|
)
|
|
136
141
|
names: list[str] = list()
|
|
137
142
|
catalogs: list[str] = list()
|
|
@@ -163,9 +168,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
163
168
|
"""Listing a single database that's accessible in Snowflake."""
|
|
164
169
|
catalog, sf_database, sf_schema = _process_multi_layer_database(spark_dbName)
|
|
165
170
|
if catalog is not None and self != catalog:
|
|
166
|
-
|
|
171
|
+
exception = SnowparkConnectNotImplementedError(
|
|
167
172
|
"Calling into another catalog is not currently supported"
|
|
168
173
|
)
|
|
174
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
175
|
+
raise exception
|
|
169
176
|
sp_catalog = get_or_create_snowpark_session().catalog
|
|
170
177
|
|
|
171
178
|
db: Schema | None = None
|
|
@@ -184,7 +191,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
184
191
|
)
|
|
185
192
|
if db is None:
|
|
186
193
|
raise MaxRetryExceeded(
|
|
187
|
-
f"Failed to fetch database {spark_dbName} after all retry attempts"
|
|
194
|
+
f"Failed to fetch database {spark_dbName} after all retry attempts",
|
|
195
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
188
196
|
)
|
|
189
197
|
|
|
190
198
|
name = unquote_if_quoted(db.name)
|
|
@@ -307,9 +315,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
307
315
|
spark_dbName
|
|
308
316
|
)
|
|
309
317
|
if catalog is not None and self != catalog:
|
|
310
|
-
|
|
318
|
+
exception = SnowparkConnectNotImplementedError(
|
|
311
319
|
"Calling into another catalog is not currently supported"
|
|
312
320
|
)
|
|
321
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
322
|
+
raise exception
|
|
313
323
|
else:
|
|
314
324
|
catalog = sf_database = sf_schema = None
|
|
315
325
|
|
|
@@ -440,9 +450,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
440
450
|
spark_tableName
|
|
441
451
|
)
|
|
442
452
|
if catalog is not None and self != catalog:
|
|
443
|
-
|
|
453
|
+
exception = SnowparkConnectNotImplementedError(
|
|
444
454
|
"Calling into another catalog is not currently supported"
|
|
445
455
|
)
|
|
456
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
457
|
+
raise exception
|
|
446
458
|
|
|
447
459
|
table: Table | None = None
|
|
448
460
|
for attempt in Retrying(
|
|
@@ -463,7 +475,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
463
475
|
|
|
464
476
|
if table is None:
|
|
465
477
|
raise MaxRetryExceeded(
|
|
466
|
-
f"Failed to fetch table {spark_tableName} after all retry attempts"
|
|
478
|
+
f"Failed to fetch table {spark_tableName} after all retry attempts",
|
|
479
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
467
480
|
)
|
|
468
481
|
|
|
469
482
|
return pandas.DataFrame(
|
|
@@ -563,9 +576,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
563
576
|
spark_tableName
|
|
564
577
|
)
|
|
565
578
|
if catalog is not None and self != catalog:
|
|
566
|
-
|
|
579
|
+
exception = SnowparkConnectNotImplementedError(
|
|
567
580
|
"Calling into another catalog is not currently supported"
|
|
568
581
|
)
|
|
582
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
583
|
+
raise exception
|
|
569
584
|
for attempt in Retrying(
|
|
570
585
|
max_retries=5,
|
|
571
586
|
initial_backoff=100, # 100ms
|
|
@@ -598,7 +613,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
598
613
|
)
|
|
599
614
|
if columns is None:
|
|
600
615
|
raise MaxRetryExceeded(
|
|
601
|
-
f"Failed to fetch columns of {spark_tableName} after all retry attempts"
|
|
616
|
+
f"Failed to fetch columns of {spark_tableName} after all retry attempts",
|
|
617
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
602
618
|
)
|
|
603
619
|
names: list[str] = list()
|
|
604
620
|
descriptions: list[str | None] = list()
|
|
@@ -702,22 +718,28 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
702
718
|
if source == "":
|
|
703
719
|
source = global_config.get("spark.sql.sources.default")
|
|
704
720
|
if source not in ("csv", "json", "avro", "parquet", "orc", "xml"):
|
|
705
|
-
|
|
721
|
+
exception = SnowparkConnectNotImplementedError(
|
|
706
722
|
f"Source '{source}' is not currently supported by Catalog.createTable. "
|
|
707
723
|
"Maybe default value through 'spark.sql.sources.default' should be set."
|
|
708
724
|
)
|
|
725
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
726
|
+
raise exception
|
|
709
727
|
if path != "":
|
|
710
728
|
# External table creation is not supported currently.
|
|
711
|
-
|
|
729
|
+
exception = SnowparkConnectNotImplementedError(
|
|
712
730
|
"External table creation is not supported currently."
|
|
713
731
|
)
|
|
732
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
733
|
+
raise exception
|
|
714
734
|
|
|
715
735
|
session = get_or_create_snowpark_session()
|
|
716
736
|
# Managed table
|
|
717
737
|
if schema.ByteSize() == 0:
|
|
718
|
-
|
|
738
|
+
exception = SnowparkConnectNotImplementedError(
|
|
719
739
|
f"Unable to infer schema for {source.upper()}. It must be specified manually.",
|
|
720
740
|
)
|
|
741
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
742
|
+
raise exception
|
|
721
743
|
sp_schema = proto_to_snowpark_type(schema)
|
|
722
744
|
columns = [c.name for c in schema.struct.fields]
|
|
723
745
|
table_name_parts = split_fully_qualified_spark_name(tableName)
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
|
|
7
7
|
from snowflake.connector.errors import ProgrammingError
|
|
8
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
9
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
8
10
|
from snowflake.snowpark_connect.relation.catalogs import CATALOGS, SNOWFLAKE_CATALOG
|
|
9
11
|
from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
|
|
10
12
|
AbstractSparkCatalog,
|
|
@@ -27,11 +29,15 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
|
|
|
27
29
|
|
|
28
30
|
# Validate input parameters to match PySpark behavior
|
|
29
31
|
if catalog_name is None:
|
|
30
|
-
|
|
32
|
+
exception = ValueError("Catalog name cannot be None")
|
|
33
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
34
|
+
raise exception
|
|
31
35
|
if catalog_name == "":
|
|
32
|
-
|
|
36
|
+
exception = ValueError(
|
|
33
37
|
"Catalog '' plugin class not found: spark.sql.catalog. is not defined"
|
|
34
38
|
)
|
|
39
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
40
|
+
raise exception
|
|
35
41
|
|
|
36
42
|
CURRENT_CATALOG_NAME = catalog_name
|
|
37
43
|
if catalog_name in CATALOGS:
|
|
@@ -42,9 +48,11 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
|
|
|
42
48
|
sf_catalog.setCurrentDatabase(catalog_name if catalog_name is not None else "")
|
|
43
49
|
return get_current_catalog()
|
|
44
50
|
except ProgrammingError as e:
|
|
45
|
-
|
|
51
|
+
exception = Exception(
|
|
46
52
|
f"Catalog '{catalog_name}' plugin class not found: spark.sql.catalog.{catalog_name} is not defined"
|
|
47
|
-
)
|
|
53
|
+
)
|
|
54
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
55
|
+
raise exception from e
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
def _get_current_temp_objects() -> set[tuple[str | None, str | None, str]]:
|
|
@@ -6,6 +6,9 @@ from urllib.parse import urlparse
|
|
|
6
6
|
|
|
7
7
|
from pyspark.errors.exceptions.base import AnalysisException
|
|
8
8
|
|
|
9
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
10
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
|
+
|
|
9
12
|
CLOUD_PREFIX_TO_CLOUD = {
|
|
10
13
|
"abfss": "azure",
|
|
11
14
|
"wasbs": "azure",
|
|
@@ -74,7 +77,7 @@ def get_compression_for_source_and_options(
|
|
|
74
77
|
|
|
75
78
|
if not is_supported_compression(source, compression):
|
|
76
79
|
supported_compressions = supported_compressions_for_format(source)
|
|
77
|
-
|
|
80
|
+
exception = AnalysisException(
|
|
78
81
|
f"Compression {compression} is not supported for {source} format. "
|
|
79
82
|
+ (
|
|
80
83
|
f"Supported compressions: {sorted(supported_compressions)}"
|
|
@@ -82,6 +85,8 @@ def get_compression_for_source_and_options(
|
|
|
82
85
|
else "None compression supported for this format."
|
|
83
86
|
)
|
|
84
87
|
)
|
|
88
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
89
|
+
raise exception
|
|
85
90
|
|
|
86
91
|
return compression
|
|
87
92
|
|
|
@@ -8,6 +8,8 @@ import pandas
|
|
|
8
8
|
import pyspark.sql.connect.proto.catalog_pb2 as catalog_proto
|
|
9
9
|
|
|
10
10
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
11
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
12
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
13
|
from snowflake.snowpark_connect.relation.catalogs import CATALOGS
|
|
12
14
|
from snowflake.snowpark_connect.relation.catalogs.utils import (
|
|
13
15
|
CURRENT_CATALOG_NAME,
|
|
@@ -148,4 +150,6 @@ def map_catalog(
|
|
|
148
150
|
return get_current_catalog().uncacheTable(rel.uncache_table.table_name)
|
|
149
151
|
case other:
|
|
150
152
|
# TODO: list_function implementation is blocked on SNOW-1787268
|
|
151
|
-
|
|
153
|
+
exception = SnowparkConnectNotImplementedError(f"Other Relation {other}")
|
|
154
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
155
|
+
raise exception
|