snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +200 -102
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +53 -8
- snowflake/snowpark_connect/expression/map_extension.py +37 -11
- snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
- snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
- snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
- snowflake/snowpark_connect/relation/map_extension.py +38 -17
- snowflake/snowpark_connect/relation/map_join.py +26 -12
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +124 -25
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +160 -48
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +73 -21
- snowflake/snowpark_connect/type_mapping.py +90 -20
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +24 -4
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -11,6 +11,9 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
11
11
|
)
|
|
12
12
|
from snowflake.snowpark.types import StructType
|
|
13
13
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
14
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
15
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
16
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
17
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
15
18
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
16
19
|
from snowflake.snowpark_connect.utils.context import get_outer_dataframes
|
|
@@ -26,7 +29,7 @@ def check_struct_and_get_field_datatype(field_name, schema):
|
|
|
26
29
|
else:
|
|
27
30
|
return None
|
|
28
31
|
else:
|
|
29
|
-
None
|
|
32
|
+
return None
|
|
30
33
|
|
|
31
34
|
|
|
32
35
|
def map_unresolved_star(
|
|
@@ -53,16 +56,17 @@ def map_unresolved_star(
|
|
|
53
56
|
return spark_names, typed_column
|
|
54
57
|
|
|
55
58
|
# scenario where it is expanding * to mulitple columns
|
|
56
|
-
spark_names = []
|
|
57
|
-
snowpark_names = []
|
|
58
|
-
qualifiers = []
|
|
59
|
+
spark_names: list[str] = []
|
|
60
|
+
snowpark_names: list[str] = []
|
|
61
|
+
qualifiers: list[set[ColumnQualifier]] = []
|
|
59
62
|
|
|
63
|
+
target_qualifier = ColumnQualifier(tuple(name_parts[:-1]))
|
|
60
64
|
(
|
|
61
65
|
spark_names,
|
|
62
66
|
snowpark_names,
|
|
63
67
|
qualifiers,
|
|
64
68
|
) = column_mapping.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
65
|
-
|
|
69
|
+
target_qualifier
|
|
66
70
|
)
|
|
67
71
|
|
|
68
72
|
if len(spark_names) == 0:
|
|
@@ -73,7 +77,7 @@ def map_unresolved_star(
|
|
|
73
77
|
snowpark_names,
|
|
74
78
|
qualifiers,
|
|
75
79
|
) = column_mapping_for_outer_df.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
|
|
76
|
-
|
|
80
|
+
target_qualifier
|
|
77
81
|
)
|
|
78
82
|
if len(spark_names) > 0:
|
|
79
83
|
break
|
|
@@ -139,7 +143,7 @@ def map_unresolved_star(
|
|
|
139
143
|
final_sql_expr,
|
|
140
144
|
lambda final_sql_expr=final_sql_expr: typer.type(final_sql_expr),
|
|
141
145
|
)
|
|
142
|
-
typed_column.set_multi_col_qualifiers([
|
|
146
|
+
typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
|
|
143
147
|
return spark_names, typed_column
|
|
144
148
|
else:
|
|
145
149
|
result_exp = snowpark_fn.sql_expr(
|
|
@@ -152,9 +156,11 @@ def map_unresolved_star(
|
|
|
152
156
|
typed_column.set_multi_col_qualifiers(column_mapping.get_qualifiers())
|
|
153
157
|
return spark_names, typed_column
|
|
154
158
|
|
|
155
|
-
|
|
159
|
+
exception = AnalysisException(
|
|
156
160
|
f"[UNRESOLVED_STAR] The unresolved star expression {exp} is not supported."
|
|
157
161
|
)
|
|
162
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
163
|
+
raise exception
|
|
158
164
|
|
|
159
165
|
|
|
160
166
|
def map_unresolved_star_struct(
|
|
@@ -8,6 +8,8 @@ from pyspark.errors.exceptions.base import AnalysisException
|
|
|
8
8
|
import snowflake.snowpark.functions as snowpark_fn
|
|
9
9
|
from snowflake.snowpark.types import DataType, StringType, StructField, StructType
|
|
10
10
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
11
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
12
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
13
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
12
14
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
13
15
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
@@ -39,9 +41,11 @@ def update_field_in_schema(
|
|
|
39
41
|
field.name, updated_subschema, field.nullable, _is_column=False
|
|
40
42
|
)
|
|
41
43
|
else:
|
|
42
|
-
|
|
44
|
+
exception = AnalysisException(
|
|
43
45
|
message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}` in `{field}`"
|
|
44
46
|
)
|
|
47
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
48
|
+
raise exception
|
|
45
49
|
field_updated = True
|
|
46
50
|
else:
|
|
47
51
|
new_field = field # leave unchanged
|
|
@@ -59,9 +63,11 @@ def update_field_in_schema(
|
|
|
59
63
|
# if the value type is None that means we want to drop the field and spark does not throw an error if the field does not exists
|
|
60
64
|
# but if the value type is not None, it means we should add or update this field which has already been covered above
|
|
61
65
|
# if we reach this code, it means the field should have existed
|
|
62
|
-
|
|
66
|
+
exception = AnalysisException(
|
|
63
67
|
message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}`"
|
|
64
68
|
)
|
|
69
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
70
|
+
raise exception
|
|
65
71
|
return StructType(new_fields)
|
|
66
72
|
|
|
67
73
|
|
|
@@ -99,9 +105,11 @@ def map_update_fields(
|
|
|
99
105
|
)
|
|
100
106
|
|
|
101
107
|
if not isinstance(struct_typed_column.typ, StructType):
|
|
102
|
-
|
|
108
|
+
exception = AnalysisException(
|
|
103
109
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "update_fields({struct_name}, ...)" due to data type mismatch: Parameter 1 requires the "STRUCT" type'
|
|
104
110
|
)
|
|
111
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
112
|
+
raise exception
|
|
105
113
|
|
|
106
114
|
final_schema = struct_typed_column.typ
|
|
107
115
|
value_column_list = []
|
|
@@ -137,9 +145,11 @@ def map_update_fields(
|
|
|
137
145
|
final_name = f"update_fields({struct_name}, {update_operations_str})"
|
|
138
146
|
|
|
139
147
|
if len(final_schema.fields) == 0:
|
|
140
|
-
|
|
148
|
+
exception = AnalysisException(
|
|
141
149
|
f'[DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "{final_name}" due to data type mismatch: Cannot drop all fields in struct.'
|
|
142
150
|
)
|
|
151
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
152
|
+
raise exception
|
|
143
153
|
|
|
144
154
|
@snowpark_fn.udf(
|
|
145
155
|
input_types=input_types_to_the_udf,
|
|
@@ -6,7 +6,11 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
|
6
6
|
|
|
7
7
|
from snowflake import snowpark
|
|
8
8
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
9
|
-
from snowflake.snowpark_connect.error.
|
|
9
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
10
|
+
from snowflake.snowpark_connect.error.error_utils import (
|
|
11
|
+
SparkException,
|
|
12
|
+
attach_custom_error_code,
|
|
13
|
+
)
|
|
10
14
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
11
15
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
12
16
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
@@ -29,6 +33,8 @@ SPARK_RANKING_FUNCTIONS = frozenset(
|
|
|
29
33
|
]
|
|
30
34
|
)
|
|
31
35
|
|
|
36
|
+
RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS = frozenset(["percent_rank"])
|
|
37
|
+
|
|
32
38
|
CAPITAL_FUNCTION_NAMES = frozenset(["rank()", "dense_rank()", "percent_rank()"])
|
|
33
39
|
|
|
34
40
|
|
|
@@ -128,6 +134,11 @@ def map_window_function(
|
|
|
128
134
|
case expressions_proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW:
|
|
129
135
|
frame_name.append("ROWS BETWEEN")
|
|
130
136
|
frame_type_func_string = "rows_between"
|
|
137
|
+
if proto_func_name in RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS:
|
|
138
|
+
# Seems like Snowflake and Spark have different understanding of some functions. For those,
|
|
139
|
+
# Spark only allows rows_between while Snowflake only allows range_between. To be compatible
|
|
140
|
+
# with Spark, we have to use range_between here.
|
|
141
|
+
frame_type_func_string = "range_between"
|
|
131
142
|
lower_name, lower = parse_frame_boundary(
|
|
132
143
|
exp.window.frame_spec.lower, is_upper=False
|
|
133
144
|
)
|
|
@@ -138,9 +149,11 @@ def map_window_function(
|
|
|
138
149
|
lower != snowpark.Window.UNBOUNDED_PRECEDING
|
|
139
150
|
or upper != snowpark.Window.CURRENT_ROW
|
|
140
151
|
):
|
|
141
|
-
|
|
152
|
+
exception = SparkException.invalid_ranking_function_window_frame(
|
|
142
153
|
window_frame=f"specifiedwindowframe(RowFrame, {lower_name}, {upper_name})"
|
|
143
154
|
)
|
|
155
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
156
|
+
raise exception
|
|
144
157
|
|
|
145
158
|
is_unbounded = (
|
|
146
159
|
lower == snowpark.Window.UNBOUNDED_PRECEDING
|
|
@@ -165,9 +178,11 @@ def map_window_function(
|
|
|
165
178
|
orders = orders[:1]
|
|
166
179
|
|
|
167
180
|
if proto_func_name in SPARK_RANKING_FUNCTIONS:
|
|
168
|
-
|
|
181
|
+
exception = SparkException.invalid_ranking_function_window_frame(
|
|
169
182
|
window_frame=f"specifiedwindowframe(RangeFrame, {lower_name}, {upper_name})"
|
|
170
183
|
)
|
|
184
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
185
|
+
raise exception
|
|
171
186
|
|
|
172
187
|
is_unbounded = (
|
|
173
188
|
lower == snowpark.Window.UNBOUNDED_PRECEDING
|
|
@@ -11,6 +11,8 @@ import pyspark.sql.connect.proto.types_pb2 as types_proto
|
|
|
11
11
|
|
|
12
12
|
from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
|
|
13
13
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.error.exceptions import MissingDatabase, MissingSchema
|
|
15
17
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
16
18
|
split_fully_qualified_spark_name,
|
|
@@ -39,28 +41,40 @@ class AbstractSparkCatalog(ABC):
|
|
|
39
41
|
description: str,
|
|
40
42
|
**options: typing.Any,
|
|
41
43
|
) -> DataFrameContainer:
|
|
42
|
-
|
|
44
|
+
exception = SnowparkConnectNotImplementedError("createTable is not implemented")
|
|
45
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
46
|
+
raise exception
|
|
43
47
|
|
|
44
48
|
@abstractmethod
|
|
45
49
|
def listDatabases(
|
|
46
50
|
self,
|
|
47
51
|
pattern: str | None = None,
|
|
48
52
|
) -> pandas.DataFrame:
|
|
49
|
-
|
|
53
|
+
exception = SnowparkConnectNotImplementedError(
|
|
54
|
+
"listDatabases is not implemented"
|
|
55
|
+
)
|
|
56
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
57
|
+
raise exception
|
|
50
58
|
|
|
51
59
|
@abstractmethod
|
|
52
60
|
def getDatabase(
|
|
53
61
|
self,
|
|
54
62
|
spark_dbName: str,
|
|
55
63
|
) -> pandas.DataFrame:
|
|
56
|
-
|
|
64
|
+
exception = SnowparkConnectNotImplementedError("getDatabase is not implemented")
|
|
65
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
66
|
+
raise exception
|
|
57
67
|
|
|
58
68
|
@abstractmethod
|
|
59
69
|
def databaseExists(
|
|
60
70
|
self,
|
|
61
71
|
spark_dbName: str,
|
|
62
72
|
) -> pandas.DataFrame:
|
|
63
|
-
|
|
73
|
+
exception = SnowparkConnectNotImplementedError(
|
|
74
|
+
"databaseExists is not implemented"
|
|
75
|
+
)
|
|
76
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
77
|
+
raise exception
|
|
64
78
|
|
|
65
79
|
@abstractmethod
|
|
66
80
|
def listTables(
|
|
@@ -68,14 +82,18 @@ class AbstractSparkCatalog(ABC):
|
|
|
68
82
|
spark_dbName: str | None = None,
|
|
69
83
|
pattern: str | None = None,
|
|
70
84
|
) -> pandas.DataFrame:
|
|
71
|
-
|
|
85
|
+
exception = SnowparkConnectNotImplementedError("listTables is not implemented")
|
|
86
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
87
|
+
raise exception
|
|
72
88
|
|
|
73
89
|
@abstractmethod
|
|
74
90
|
def getTable(
|
|
75
91
|
self,
|
|
76
92
|
spark_tableName: str,
|
|
77
93
|
) -> pandas.DataFrame:
|
|
78
|
-
|
|
94
|
+
exception = SnowparkConnectNotImplementedError("getTable is not implemented")
|
|
95
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
96
|
+
raise exception
|
|
79
97
|
|
|
80
98
|
@abstractmethod
|
|
81
99
|
def tableExists(
|
|
@@ -83,7 +101,9 @@ class AbstractSparkCatalog(ABC):
|
|
|
83
101
|
spark_tableName: str,
|
|
84
102
|
spark_dbName: str | None,
|
|
85
103
|
) -> pandas.DataFrame:
|
|
86
|
-
|
|
104
|
+
exception = SnowparkConnectNotImplementedError("tableExists is not implemented")
|
|
105
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
106
|
+
raise exception
|
|
87
107
|
|
|
88
108
|
@abstractmethod
|
|
89
109
|
def listColumns(
|
|
@@ -91,36 +111,50 @@ class AbstractSparkCatalog(ABC):
|
|
|
91
111
|
spark_tableName: str,
|
|
92
112
|
spark_dbName: str | None = None,
|
|
93
113
|
) -> pandas.DataFrame:
|
|
94
|
-
|
|
114
|
+
exception = SnowparkConnectNotImplementedError("listColumns is not implemented")
|
|
115
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
116
|
+
raise exception
|
|
95
117
|
|
|
96
118
|
@abstractmethod
|
|
97
119
|
def currentDatabase(self) -> pandas.DataFrame:
|
|
98
|
-
|
|
120
|
+
exception = SnowparkConnectNotImplementedError(
|
|
121
|
+
"currentDatabase is not implemented"
|
|
122
|
+
)
|
|
123
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
124
|
+
raise exception
|
|
99
125
|
|
|
100
126
|
@abstractmethod
|
|
101
127
|
def setCurrentDatabase(
|
|
102
128
|
self,
|
|
103
129
|
spark_dbName: str,
|
|
104
130
|
) -> pandas.DataFrame:
|
|
105
|
-
|
|
131
|
+
exception = SnowparkConnectNotImplementedError(
|
|
106
132
|
"setCurrentDatabase is not implemented"
|
|
107
133
|
)
|
|
134
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
135
|
+
raise exception
|
|
108
136
|
|
|
109
137
|
@abstractmethod
|
|
110
138
|
def dropGlobalTempView(
|
|
111
139
|
self,
|
|
112
140
|
spark_view_name: str,
|
|
113
141
|
) -> DataFrameContainer:
|
|
114
|
-
|
|
142
|
+
exception = SnowparkConnectNotImplementedError(
|
|
115
143
|
"dropGlobalTempView is not implemented"
|
|
116
144
|
)
|
|
145
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
146
|
+
raise exception
|
|
117
147
|
|
|
118
148
|
@abstractmethod
|
|
119
149
|
def dropTempView(
|
|
120
150
|
self,
|
|
121
151
|
spark_view_name: str,
|
|
122
152
|
) -> DataFrameContainer:
|
|
123
|
-
|
|
153
|
+
exception = SnowparkConnectNotImplementedError(
|
|
154
|
+
"dropTempView is not implemented"
|
|
155
|
+
)
|
|
156
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
157
|
+
raise exception
|
|
124
158
|
|
|
125
159
|
def cacheTable(
|
|
126
160
|
self,
|
|
@@ -135,9 +169,11 @@ class AbstractSparkCatalog(ABC):
|
|
|
135
169
|
spark_tableName
|
|
136
170
|
)
|
|
137
171
|
if catalog is not None and self != catalog:
|
|
138
|
-
|
|
172
|
+
exception = SnowparkConnectNotImplementedError(
|
|
139
173
|
"Calling into another catalog is not currently supported"
|
|
140
174
|
)
|
|
175
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
176
|
+
raise exception
|
|
141
177
|
if sf_database is None:
|
|
142
178
|
sf_database = _get_current_snowflake_database()
|
|
143
179
|
if sf_schema is None:
|
|
@@ -168,9 +204,11 @@ class AbstractSparkCatalog(ABC):
|
|
|
168
204
|
spark_tableName
|
|
169
205
|
)
|
|
170
206
|
if catalog is not None and self != catalog:
|
|
171
|
-
|
|
207
|
+
exception = SnowparkConnectNotImplementedError(
|
|
172
208
|
"Calling into another catalog is not currently supported"
|
|
173
209
|
)
|
|
210
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
211
|
+
raise exception
|
|
174
212
|
if sf_database is None:
|
|
175
213
|
sf_database = _get_current_snowflake_database()
|
|
176
214
|
if sf_schema is None:
|
|
@@ -194,9 +232,11 @@ class AbstractSparkCatalog(ABC):
|
|
|
194
232
|
spark_tableName
|
|
195
233
|
)
|
|
196
234
|
if catalog is not None and self != catalog:
|
|
197
|
-
|
|
235
|
+
exception = SnowparkConnectNotImplementedError(
|
|
198
236
|
"Calling into another catalog is not currently supported"
|
|
199
237
|
)
|
|
238
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
239
|
+
raise exception
|
|
200
240
|
if sf_database is None:
|
|
201
241
|
sf_database = _get_current_snowflake_database()
|
|
202
242
|
if sf_schema is None:
|
|
@@ -249,7 +289,11 @@ def _process_multi_layer_database(
|
|
|
249
289
|
else:
|
|
250
290
|
return None, c, d
|
|
251
291
|
case _:
|
|
252
|
-
|
|
292
|
+
exception = ValueError(
|
|
293
|
+
f"Unexpected database identifier format: {spark_mli}"
|
|
294
|
+
)
|
|
295
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
296
|
+
raise exception
|
|
253
297
|
|
|
254
298
|
|
|
255
299
|
def _process_multi_layer_identifier(
|
|
@@ -283,5 +327,9 @@ def _process_multi_layer_identifier(
|
|
|
283
327
|
else:
|
|
284
328
|
snowflake_database, snowflake_schema, snowflake_obj = d, s, t
|
|
285
329
|
case _:
|
|
286
|
-
|
|
330
|
+
exception = ValueError(
|
|
331
|
+
f"Unexpected table/view identifier format: {spark_mli}"
|
|
332
|
+
)
|
|
333
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
334
|
+
raise exception
|
|
287
335
|
return spark_catalog, snowflake_database, snowflake_schema, snowflake_obj
|
|
@@ -19,11 +19,14 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
19
19
|
)
|
|
20
20
|
from snowflake.snowpark.functions import lit
|
|
21
21
|
from snowflake.snowpark.types import BooleanType, StringType
|
|
22
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
22
23
|
from snowflake.snowpark_connect.config import (
|
|
23
24
|
auto_uppercase_non_column_identifiers,
|
|
24
25
|
global_config,
|
|
25
26
|
)
|
|
26
27
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
28
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
29
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
27
30
|
from snowflake.snowpark_connect.error.exceptions import MaxRetryExceeded
|
|
28
31
|
from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
|
|
29
32
|
AbstractSparkCatalog,
|
|
@@ -109,9 +112,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
109
112
|
catalog, sf_database, sf_schema = _process_multi_layer_database(pattern)
|
|
110
113
|
sf_schema = sf_schema.replace("*", ".*")
|
|
111
114
|
if catalog is not None and self != catalog:
|
|
112
|
-
|
|
115
|
+
exception = SnowparkConnectNotImplementedError(
|
|
113
116
|
"Calling into another catalog is not currently supported"
|
|
114
117
|
)
|
|
118
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
119
|
+
raise exception
|
|
115
120
|
sp_catalog = get_or_create_snowpark_session().catalog
|
|
116
121
|
|
|
117
122
|
dbs: list[Schema] | None = None
|
|
@@ -131,7 +136,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
131
136
|
)
|
|
132
137
|
if dbs is None:
|
|
133
138
|
raise MaxRetryExceeded(
|
|
134
|
-
f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts"
|
|
139
|
+
f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts",
|
|
140
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
135
141
|
)
|
|
136
142
|
names: list[str] = list()
|
|
137
143
|
catalogs: list[str] = list()
|
|
@@ -163,9 +169,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
163
169
|
"""Listing a single database that's accessible in Snowflake."""
|
|
164
170
|
catalog, sf_database, sf_schema = _process_multi_layer_database(spark_dbName)
|
|
165
171
|
if catalog is not None and self != catalog:
|
|
166
|
-
|
|
172
|
+
exception = SnowparkConnectNotImplementedError(
|
|
167
173
|
"Calling into another catalog is not currently supported"
|
|
168
174
|
)
|
|
175
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
176
|
+
raise exception
|
|
169
177
|
sp_catalog = get_or_create_snowpark_session().catalog
|
|
170
178
|
|
|
171
179
|
db: Schema | None = None
|
|
@@ -184,7 +192,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
184
192
|
)
|
|
185
193
|
if db is None:
|
|
186
194
|
raise MaxRetryExceeded(
|
|
187
|
-
f"Failed to fetch database {spark_dbName} after all retry attempts"
|
|
195
|
+
f"Failed to fetch database {spark_dbName} after all retry attempts",
|
|
196
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
188
197
|
)
|
|
189
198
|
|
|
190
199
|
name = unquote_if_quoted(db.name)
|
|
@@ -307,9 +316,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
307
316
|
spark_dbName
|
|
308
317
|
)
|
|
309
318
|
if catalog is not None and self != catalog:
|
|
310
|
-
|
|
319
|
+
exception = SnowparkConnectNotImplementedError(
|
|
311
320
|
"Calling into another catalog is not currently supported"
|
|
312
321
|
)
|
|
322
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
323
|
+
raise exception
|
|
313
324
|
else:
|
|
314
325
|
catalog = sf_database = sf_schema = None
|
|
315
326
|
|
|
@@ -440,9 +451,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
440
451
|
spark_tableName
|
|
441
452
|
)
|
|
442
453
|
if catalog is not None and self != catalog:
|
|
443
|
-
|
|
454
|
+
exception = SnowparkConnectNotImplementedError(
|
|
444
455
|
"Calling into another catalog is not currently supported"
|
|
445
456
|
)
|
|
457
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
458
|
+
raise exception
|
|
446
459
|
|
|
447
460
|
table: Table | None = None
|
|
448
461
|
for attempt in Retrying(
|
|
@@ -463,7 +476,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
463
476
|
|
|
464
477
|
if table is None:
|
|
465
478
|
raise MaxRetryExceeded(
|
|
466
|
-
f"Failed to fetch table {spark_tableName} after all retry attempts"
|
|
479
|
+
f"Failed to fetch table {spark_tableName} after all retry attempts",
|
|
480
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
467
481
|
)
|
|
468
482
|
|
|
469
483
|
return pandas.DataFrame(
|
|
@@ -563,9 +577,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
563
577
|
spark_tableName
|
|
564
578
|
)
|
|
565
579
|
if catalog is not None and self != catalog:
|
|
566
|
-
|
|
580
|
+
exception = SnowparkConnectNotImplementedError(
|
|
567
581
|
"Calling into another catalog is not currently supported"
|
|
568
582
|
)
|
|
583
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
584
|
+
raise exception
|
|
569
585
|
for attempt in Retrying(
|
|
570
586
|
max_retries=5,
|
|
571
587
|
initial_backoff=100, # 100ms
|
|
@@ -598,7 +614,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
598
614
|
)
|
|
599
615
|
if columns is None:
|
|
600
616
|
raise MaxRetryExceeded(
|
|
601
|
-
f"Failed to fetch columns of {spark_tableName} after all retry attempts"
|
|
617
|
+
f"Failed to fetch columns of {spark_tableName} after all retry attempts",
|
|
618
|
+
custom_error_code=ErrorCodes.INTERNAL_ERROR,
|
|
602
619
|
)
|
|
603
620
|
names: list[str] = list()
|
|
604
621
|
descriptions: list[str | None] = list()
|
|
@@ -702,26 +719,34 @@ class SnowflakeCatalog(AbstractSparkCatalog):
|
|
|
702
719
|
if source == "":
|
|
703
720
|
source = global_config.get("spark.sql.sources.default")
|
|
704
721
|
if source not in ("csv", "json", "avro", "parquet", "orc", "xml"):
|
|
705
|
-
|
|
722
|
+
exception = SnowparkConnectNotImplementedError(
|
|
706
723
|
f"Source '{source}' is not currently supported by Catalog.createTable. "
|
|
707
724
|
"Maybe default value through 'spark.sql.sources.default' should be set."
|
|
708
725
|
)
|
|
726
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
727
|
+
raise exception
|
|
709
728
|
if path != "":
|
|
710
729
|
# External table creation is not supported currently.
|
|
711
|
-
|
|
730
|
+
exception = SnowparkConnectNotImplementedError(
|
|
712
731
|
"External table creation is not supported currently."
|
|
713
732
|
)
|
|
733
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
734
|
+
raise exception
|
|
714
735
|
|
|
715
736
|
session = get_or_create_snowpark_session()
|
|
716
737
|
# Managed table
|
|
717
738
|
if schema.ByteSize() == 0:
|
|
718
|
-
|
|
739
|
+
exception = SnowparkConnectNotImplementedError(
|
|
719
740
|
f"Unable to infer schema for {source.upper()}. It must be specified manually.",
|
|
720
741
|
)
|
|
742
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
743
|
+
raise exception
|
|
721
744
|
sp_schema = proto_to_snowpark_type(schema)
|
|
722
745
|
columns = [c.name for c in schema.struct.fields]
|
|
723
746
|
table_name_parts = split_fully_qualified_spark_name(tableName)
|
|
724
|
-
qualifiers = [
|
|
747
|
+
qualifiers: list[set[ColumnQualifier]] = [
|
|
748
|
+
{ColumnQualifier(tuple(table_name_parts))} for _ in columns
|
|
749
|
+
]
|
|
725
750
|
column_types = [f.datatype for f in sp_schema.fields]
|
|
726
751
|
return DataFrameContainer.create_with_column_mapping(
|
|
727
752
|
dataframe=session.createDataFrame([], sp_schema),
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
from collections import defaultdict
|
|
6
6
|
|
|
7
7
|
from snowflake.connector.errors import ProgrammingError
|
|
8
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
9
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
8
10
|
from snowflake.snowpark_connect.relation.catalogs import CATALOGS, SNOWFLAKE_CATALOG
|
|
9
11
|
from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
|
|
10
12
|
AbstractSparkCatalog,
|
|
@@ -27,11 +29,15 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
|
|
|
27
29
|
|
|
28
30
|
# Validate input parameters to match PySpark behavior
|
|
29
31
|
if catalog_name is None:
|
|
30
|
-
|
|
32
|
+
exception = ValueError("Catalog name cannot be None")
|
|
33
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
34
|
+
raise exception
|
|
31
35
|
if catalog_name == "":
|
|
32
|
-
|
|
36
|
+
exception = ValueError(
|
|
33
37
|
"Catalog '' plugin class not found: spark.sql.catalog. is not defined"
|
|
34
38
|
)
|
|
39
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
40
|
+
raise exception
|
|
35
41
|
|
|
36
42
|
CURRENT_CATALOG_NAME = catalog_name
|
|
37
43
|
if catalog_name in CATALOGS:
|
|
@@ -42,9 +48,11 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
|
|
|
42
48
|
sf_catalog.setCurrentDatabase(catalog_name if catalog_name is not None else "")
|
|
43
49
|
return get_current_catalog()
|
|
44
50
|
except ProgrammingError as e:
|
|
45
|
-
|
|
51
|
+
exception = Exception(
|
|
46
52
|
f"Catalog '{catalog_name}' plugin class not found: spark.sql.catalog.{catalog_name} is not defined"
|
|
47
|
-
)
|
|
53
|
+
)
|
|
54
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
55
|
+
raise exception from e
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
def _get_current_temp_objects() -> set[tuple[str | None, str | None, str]]:
|
|
@@ -6,6 +6,9 @@ from urllib.parse import urlparse
|
|
|
6
6
|
|
|
7
7
|
from pyspark.errors.exceptions.base import AnalysisException
|
|
8
8
|
|
|
9
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
10
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
|
+
|
|
9
12
|
CLOUD_PREFIX_TO_CLOUD = {
|
|
10
13
|
"abfss": "azure",
|
|
11
14
|
"wasbs": "azure",
|
|
@@ -74,7 +77,7 @@ def get_compression_for_source_and_options(
|
|
|
74
77
|
|
|
75
78
|
if not is_supported_compression(source, compression):
|
|
76
79
|
supported_compressions = supported_compressions_for_format(source)
|
|
77
|
-
|
|
80
|
+
exception = AnalysisException(
|
|
78
81
|
f"Compression {compression} is not supported for {source} format. "
|
|
79
82
|
+ (
|
|
80
83
|
f"Supported compressions: {sorted(supported_compressions)}"
|
|
@@ -82,6 +85,8 @@ def get_compression_for_source_and_options(
|
|
|
82
85
|
else "None compression supported for this format."
|
|
83
86
|
)
|
|
84
87
|
)
|
|
88
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
89
|
+
raise exception
|
|
85
90
|
|
|
86
91
|
return compression
|
|
87
92
|
|
|
@@ -16,6 +16,7 @@ from snowflake.snowpark.types import DataType
|
|
|
16
16
|
from snowflake.snowpark_connect.column_name_handler import (
|
|
17
17
|
make_column_names_snowpark_compatible,
|
|
18
18
|
)
|
|
19
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
19
20
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
20
21
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
21
22
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
@@ -200,7 +201,9 @@ def map_pivot_aggregate(
|
|
|
200
201
|
dataframe=result.select(*column_selectors),
|
|
201
202
|
spark_column_names=reordered_spark_names,
|
|
202
203
|
snowpark_column_names=reordered_snowpark_names,
|
|
203
|
-
column_qualifiers=[
|
|
204
|
+
column_qualifiers=[
|
|
205
|
+
{ColumnQualifier.no_qualifier()} for _ in reordered_spark_names
|
|
206
|
+
],
|
|
204
207
|
parent_column_name_map=input_container.column_map,
|
|
205
208
|
snowpark_column_types=reordered_types,
|
|
206
209
|
)
|
|
@@ -349,7 +352,7 @@ class _ColumnMetadata:
|
|
|
349
352
|
spark_name: str
|
|
350
353
|
snowpark_name: str
|
|
351
354
|
data_type: DataType
|
|
352
|
-
qualifiers:
|
|
355
|
+
qualifiers: set[ColumnQualifier]
|
|
353
356
|
|
|
354
357
|
|
|
355
358
|
@dataclass(frozen=True)
|
|
@@ -385,7 +388,7 @@ class _Columns:
|
|
|
385
388
|
col.spark_name for col in self.grouping_columns + self.aggregation_columns
|
|
386
389
|
]
|
|
387
390
|
|
|
388
|
-
def get_qualifiers(self) -> list[
|
|
391
|
+
def get_qualifiers(self) -> list[set[ColumnQualifier]]:
|
|
389
392
|
return [
|
|
390
393
|
col.qualifiers for col in self.grouping_columns + self.aggregation_columns
|
|
391
394
|
]
|
|
@@ -429,7 +432,7 @@ def map_aggregate_helper(
|
|
|
429
432
|
new_name,
|
|
430
433
|
None if skip_alias else alias,
|
|
431
434
|
None if pivot else snowpark_column.typ,
|
|
432
|
-
snowpark_column.get_qualifiers(),
|
|
435
|
+
qualifiers=snowpark_column.get_qualifiers(),
|
|
433
436
|
)
|
|
434
437
|
)
|
|
435
438
|
|
|
@@ -469,7 +472,7 @@ def map_aggregate_helper(
|
|
|
469
472
|
new_name,
|
|
470
473
|
None if skip_alias else alias,
|
|
471
474
|
agg_col_typ,
|
|
472
|
-
|
|
475
|
+
qualifiers={ColumnQualifier.no_qualifier()},
|
|
473
476
|
)
|
|
474
477
|
)
|
|
475
478
|
|