snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +717 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +309 -26
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/error_utils.py +28 -0
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +224 -15
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +86 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
- snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
- snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +171 -48
- snowflake/snowpark_connect/server.py +528 -473
- snowflake/snowpark_connect/server_common/__init__.py +503 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/type_support.py +130 -0
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +195 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +192 -40
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -12,11 +12,13 @@ https://github.com/apache/spark/blob/master/common/utils/src/main/resources/erro
|
|
|
12
12
|
import json
|
|
13
13
|
import pathlib
|
|
14
14
|
import re
|
|
15
|
+
import threading
|
|
15
16
|
import traceback
|
|
16
17
|
|
|
17
18
|
import jpype
|
|
18
19
|
from google.protobuf import any_pb2
|
|
19
20
|
from google.rpc import code_pb2, error_details_pb2, status_pb2
|
|
21
|
+
from pyspark.errors import TempTableAlreadyExistsException
|
|
20
22
|
from pyspark.errors.error_classes import ERROR_CLASSES_MAP
|
|
21
23
|
from pyspark.errors.exceptions.base import (
|
|
22
24
|
AnalysisException,
|
|
@@ -35,9 +37,12 @@ from snowflake.core.exceptions import NotFoundError
|
|
|
35
37
|
|
|
36
38
|
from snowflake.connector.errors import ProgrammingError
|
|
37
39
|
from snowflake.snowpark.exceptions import SnowparkClientException, SnowparkSQLException
|
|
38
|
-
from snowflake.snowpark_connect.
|
|
40
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
39
41
|
from snowflake.snowpark_connect.error.error_mapping import ERROR_MAPPINGS_JSON
|
|
40
42
|
|
|
43
|
+
# Thread-local storage for custom error codes when we can't attach them directly to exceptions
|
|
44
|
+
_thread_local = threading.local()
|
|
45
|
+
|
|
41
46
|
# The JSON string in error_mapping.py is a copy of https://github.com/apache/spark/blob/master/common/utils/src/main/resources/error/error-conditions.json.
|
|
42
47
|
# The file doesn't have to be synced with spark latest main. Just update it when required.
|
|
43
48
|
current_dir = pathlib.Path(__file__).parent.resolve()
|
|
@@ -54,8 +59,11 @@ SPARK_PYTHON_TO_JAVA_EXCEPTION = {
|
|
|
54
59
|
SparkConnectGrpcException: "pyspark.errors.exceptions.connect.SparkConnectGrpcException",
|
|
55
60
|
PythonException: "org.apache.spark.api.python.PythonException",
|
|
56
61
|
UnsupportedOperationException: "java.lang.UnsupportedOperationException",
|
|
62
|
+
TempTableAlreadyExistsException: "org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException",
|
|
57
63
|
}
|
|
58
64
|
|
|
65
|
+
TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS = "TABLE_OR_VIEW_NOT_FOUND"
|
|
66
|
+
|
|
59
67
|
WINDOW_FUNCTION_ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {1005, 2303}
|
|
60
68
|
ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {
|
|
61
69
|
904,
|
|
@@ -79,6 +87,23 @@ invalid_bit_pattern = re.compile(
|
|
|
79
87
|
r"Invalid bit position: \d+ exceeds the bit (?:upper|lower) limit",
|
|
80
88
|
re.IGNORECASE,
|
|
81
89
|
)
|
|
90
|
+
CREATE_SCHEMA_PATTERN = re.compile(r"create\s+schema", re.IGNORECASE)
|
|
91
|
+
CREATE_TABLE_PATTERN = re.compile(r"create\s+table", re.IGNORECASE)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def attach_custom_error_code(exception: Exception, custom_error_code: int) -> Exception:
|
|
95
|
+
"""
|
|
96
|
+
Attach a custom error code to any exception instance.
|
|
97
|
+
This allows us to add custom error codes to existing PySpark exceptions.
|
|
98
|
+
"""
|
|
99
|
+
if not hasattr(exception, "custom_error_code"):
|
|
100
|
+
try:
|
|
101
|
+
exception.custom_error_code = custom_error_code
|
|
102
|
+
except (AttributeError, TypeError):
|
|
103
|
+
# Some exception types (like Java exceptions) don't allow setting custom attributes
|
|
104
|
+
# Store the error code in thread-local storage for later retrieval
|
|
105
|
+
_thread_local.pending_error_code = custom_error_code
|
|
106
|
+
return exception
|
|
82
107
|
|
|
83
108
|
|
|
84
109
|
def contains_udtf_select(sql_string):
|
|
@@ -100,20 +125,29 @@ def _get_converted_known_sql_or_custom_exception(
|
|
|
100
125
|
|
|
101
126
|
# custom exception
|
|
102
127
|
if "[snowpark_connect::invalid_array_index]" in msg:
|
|
103
|
-
|
|
128
|
+
exception = ArrayIndexOutOfBoundsException(
|
|
104
129
|
message='The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
|
|
105
130
|
)
|
|
131
|
+
attach_custom_error_code(exception, ErrorCodes.ARRAY_INDEX_OUT_OF_BOUNDS)
|
|
132
|
+
return exception
|
|
106
133
|
if "[snowpark_connect::invalid_index_of_zero]" in msg:
|
|
107
|
-
|
|
134
|
+
exception = SparkRuntimeException(
|
|
108
135
|
message="[INVALID_INDEX_OF_ZERO] The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)."
|
|
109
136
|
)
|
|
137
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
138
|
+
return exception
|
|
110
139
|
if "[snowpark_connect::invalid_index_of_zero_in_slice]" in msg:
|
|
111
|
-
|
|
140
|
+
exception = SparkRuntimeException(
|
|
112
141
|
message="Unexpected value for start in function slice: SQL array indices start at 1."
|
|
113
142
|
)
|
|
143
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
144
|
+
return exception
|
|
145
|
+
|
|
114
146
|
invalid_bit = invalid_bit_pattern.search(msg)
|
|
115
147
|
if invalid_bit:
|
|
116
|
-
|
|
148
|
+
exception = IllegalArgumentException(message=invalid_bit.group(0))
|
|
149
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
150
|
+
return exception
|
|
117
151
|
match = snowpark_connect_exception_pattern.search(
|
|
118
152
|
ex.message if hasattr(ex, "message") else str(ex)
|
|
119
153
|
)
|
|
@@ -125,71 +159,136 @@ def _get_converted_known_sql_or_custom_exception(
|
|
|
125
159
|
if class_name
|
|
126
160
|
else SparkConnectGrpcException
|
|
127
161
|
)
|
|
128
|
-
|
|
162
|
+
exception = exception_class(message=message)
|
|
163
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
164
|
+
return exception
|
|
129
165
|
|
|
130
166
|
if "select with no columns" in msg and contains_udtf_select(query):
|
|
131
167
|
# We try our best to detect if the SQL string contains a UDTF call and the output schema is empty.
|
|
132
|
-
|
|
168
|
+
exception = PythonException(
|
|
169
|
+
message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}"
|
|
170
|
+
)
|
|
171
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
172
|
+
return exception
|
|
133
173
|
|
|
134
174
|
# known sql exception
|
|
135
175
|
if ex.sql_error_code not in (100038, 100037, 100035, 100357):
|
|
136
176
|
return None
|
|
137
177
|
|
|
138
178
|
if "(22018): numeric value" in msg:
|
|
139
|
-
|
|
179
|
+
exception = NumberFormatException(
|
|
140
180
|
message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
|
|
141
181
|
)
|
|
182
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
183
|
+
return exception
|
|
142
184
|
if "(22018): boolean value" in msg:
|
|
143
|
-
|
|
185
|
+
exception = SparkRuntimeException(
|
|
144
186
|
message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
|
|
145
187
|
)
|
|
188
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
189
|
+
return exception
|
|
146
190
|
if "(22007): timestamp" in msg:
|
|
147
|
-
|
|
191
|
+
exception = AnalysisException(
|
|
148
192
|
"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Data type mismatch"
|
|
149
193
|
)
|
|
194
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
195
|
+
return exception
|
|
150
196
|
|
|
151
197
|
if getattr(ex, "sql_error_code", None) == 100357:
|
|
152
198
|
if re.search(init_multi_args_exception_pattern, msg):
|
|
153
|
-
|
|
199
|
+
exception = PythonException(
|
|
154
200
|
message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the init method {ex.message}"
|
|
155
201
|
)
|
|
202
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
203
|
+
return exception
|
|
156
204
|
if re.search(terminate_multi_args_exception_pattern, msg):
|
|
157
|
-
|
|
205
|
+
exception = PythonException(
|
|
158
206
|
message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the terminate method: {ex.message}"
|
|
159
207
|
)
|
|
208
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
209
|
+
return exception
|
|
160
210
|
|
|
161
211
|
if "failed to split string, provided pattern:" in msg:
|
|
162
|
-
|
|
212
|
+
exception = IllegalArgumentException(
|
|
163
213
|
message=f"Failed to split string using provided pattern. {ex.message}"
|
|
164
214
|
)
|
|
215
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
216
|
+
return exception
|
|
165
217
|
|
|
166
218
|
if "100357" in msg and "wrong tuple size for returned value" in msg:
|
|
167
|
-
|
|
219
|
+
exception = PythonException(
|
|
168
220
|
message=f"[UDTF_RETURN_SCHEMA_MISMATCH] The number of columns in the result does not match the specified schema. {ex.message}"
|
|
169
221
|
)
|
|
222
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
223
|
+
return exception
|
|
170
224
|
|
|
171
225
|
if "100357 (p0000): python interpreter error:" in msg:
|
|
172
226
|
if "in eval" in msg:
|
|
173
|
-
|
|
227
|
+
exception = PythonException(
|
|
174
228
|
message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'eval' method: error. {ex.message}"
|
|
175
229
|
)
|
|
230
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
231
|
+
return exception
|
|
176
232
|
|
|
177
233
|
if "in terminate" in msg:
|
|
178
|
-
|
|
234
|
+
exception = PythonException(
|
|
179
235
|
message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'terminate' method: terminate error. {ex.message}"
|
|
180
236
|
)
|
|
237
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
238
|
+
return exception
|
|
181
239
|
|
|
182
240
|
if "object is not iterable" in msg and contains_udtf_select(query):
|
|
183
|
-
|
|
241
|
+
exception = PythonException(
|
|
184
242
|
message=f"[UDTF_RETURN_NOT_ITERABLE] {ex.message}"
|
|
185
243
|
)
|
|
244
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
245
|
+
return exception
|
|
186
246
|
|
|
187
|
-
|
|
247
|
+
exception = PythonException(message=f"{ex.message}")
|
|
248
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
249
|
+
return exception
|
|
188
250
|
|
|
189
251
|
return None
|
|
190
252
|
|
|
191
253
|
|
|
254
|
+
def _sanitize_custom_error_message(msg):
|
|
255
|
+
if "[snowpark_connect::unsupported_operation]" in msg:
|
|
256
|
+
return (
|
|
257
|
+
msg.replace("[snowpark_connect::unsupported_operation] ", ""),
|
|
258
|
+
ErrorCodes.UNSUPPORTED_OPERATION,
|
|
259
|
+
)
|
|
260
|
+
if "[snowpark_connect::internal_error]" in msg:
|
|
261
|
+
return (
|
|
262
|
+
msg.replace("[snowpark_connect::internal_error] ", ""),
|
|
263
|
+
ErrorCodes.INTERNAL_ERROR,
|
|
264
|
+
)
|
|
265
|
+
if "[snowpark_connect::invalid_operation]" in msg:
|
|
266
|
+
return (
|
|
267
|
+
msg.replace("[snowpark_connect::invalid_operation] ", ""),
|
|
268
|
+
ErrorCodes.INVALID_OPERATION,
|
|
269
|
+
)
|
|
270
|
+
if "[snowpark_connect::type_mismatch]" in msg:
|
|
271
|
+
return (
|
|
272
|
+
msg.replace("[snowpark_connect::type_mismatch] ", ""),
|
|
273
|
+
ErrorCodes.TYPE_MISMATCH,
|
|
274
|
+
)
|
|
275
|
+
if "[snowpark_connect::invalid_input]" in msg:
|
|
276
|
+
return (
|
|
277
|
+
msg.replace("[snowpark_connect::invalid_input] ", ""),
|
|
278
|
+
ErrorCodes.INVALID_INPUT,
|
|
279
|
+
)
|
|
280
|
+
if "[snowpark_connect::unsupported_type]" in msg:
|
|
281
|
+
return (
|
|
282
|
+
msg.replace("[snowpark_connect::unsupported_type] ", ""),
|
|
283
|
+
ErrorCodes.UNSUPPORTED_TYPE,
|
|
284
|
+
)
|
|
285
|
+
return msg, None
|
|
286
|
+
|
|
287
|
+
|
|
192
288
|
def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
|
|
289
|
+
# Lazy import to avoid circular dependency
|
|
290
|
+
from snowflake.snowpark_connect.config import global_config
|
|
291
|
+
|
|
193
292
|
include_stack_trace = (
|
|
194
293
|
global_config.get("spark.sql.pyspark.jvmStacktrace.enabled")
|
|
195
294
|
if hasattr(global_config, "spark.sql.pyspark.jvmStacktrace.enabled")
|
|
@@ -203,6 +302,16 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
|
|
|
203
302
|
match ex:
|
|
204
303
|
case SnowparkSQLException():
|
|
205
304
|
if ex.sql_error_code in ANALYSIS_EXCEPTION_SQL_ERROR_CODE:
|
|
305
|
+
# Creation of schema that already exists
|
|
306
|
+
if ex.sql_error_code == 2002 and "already exists" in str(ex):
|
|
307
|
+
if CREATE_SCHEMA_PATTERN.search(ex.query):
|
|
308
|
+
spark_java_classes.append(
|
|
309
|
+
"org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException"
|
|
310
|
+
)
|
|
311
|
+
elif CREATE_TABLE_PATTERN.search(ex.query):
|
|
312
|
+
spark_java_classes.append(
|
|
313
|
+
"org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException"
|
|
314
|
+
)
|
|
206
315
|
# Data type mismatch, invalid window function
|
|
207
316
|
spark_java_classes.append("org.apache.spark.sql.AnalysisException")
|
|
208
317
|
elif ex.sql_error_code == 100051:
|
|
@@ -211,6 +320,7 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
|
|
|
211
320
|
error_class="DIVIDE_BY_ZERO",
|
|
212
321
|
message_parameters={"config": '"spark.sql.ansi.enabled"'},
|
|
213
322
|
)
|
|
323
|
+
attach_custom_error_code(ex, ErrorCodes.DIVISION_BY_ZERO)
|
|
214
324
|
elif ex.sql_error_code in (100096, 100040):
|
|
215
325
|
# Spark seems to want the Java base class instead of org.apache.spark.sql.SparkDateTimeException
|
|
216
326
|
# which is what should really be thrown
|
|
@@ -221,6 +331,9 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
|
|
|
221
331
|
ex = spark_ex
|
|
222
332
|
spark_java_classes.append(SPARK_PYTHON_TO_JAVA_EXCEPTION[type(ex)])
|
|
223
333
|
elif ex.sql_error_code == 2043:
|
|
334
|
+
spark_java_classes.append(
|
|
335
|
+
"org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException"
|
|
336
|
+
)
|
|
224
337
|
spark_java_classes.append("org.apache.spark.sql.AnalysisException")
|
|
225
338
|
message = f"does_not_exist: {str(ex)}"
|
|
226
339
|
else:
|
|
@@ -252,13 +365,23 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
|
|
|
252
365
|
)
|
|
253
366
|
elif isinstance(ex, PySparkException):
|
|
254
367
|
# pyspark exceptions thrown in sas layer
|
|
368
|
+
|
|
369
|
+
error_derived_java_class = []
|
|
370
|
+
if ex.error_class == TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS:
|
|
371
|
+
error_derived_java_class.append(
|
|
372
|
+
"org.apache.spark.sql.catalyst.analysis.NoSuchTableException"
|
|
373
|
+
)
|
|
374
|
+
|
|
255
375
|
classes = type(ex).__mro__
|
|
256
376
|
spark_java_classes = [
|
|
257
377
|
SPARK_PYTHON_TO_JAVA_EXCEPTION[clazz]
|
|
258
378
|
for clazz in classes
|
|
259
379
|
if clazz in SPARK_PYTHON_TO_JAVA_EXCEPTION
|
|
260
380
|
]
|
|
261
|
-
|
|
381
|
+
|
|
382
|
+
metadata = {
|
|
383
|
+
"classes": json.dumps(error_derived_java_class + spark_java_classes)
|
|
384
|
+
}
|
|
262
385
|
if include_stack_trace:
|
|
263
386
|
metadata["stackTrace"] = "".join(
|
|
264
387
|
traceback.TracebackException.from_exception(ex).format()
|
|
@@ -299,14 +422,40 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
|
|
|
299
422
|
domain="snowflake.sas",
|
|
300
423
|
)
|
|
301
424
|
|
|
302
|
-
detail = any_pb2.Any()
|
|
303
|
-
detail.Pack(error_info)
|
|
304
|
-
|
|
305
425
|
if message is None:
|
|
306
426
|
message = str(ex)
|
|
307
427
|
|
|
428
|
+
custom_error_code = None
|
|
429
|
+
|
|
430
|
+
# attach error code using visa exception message
|
|
431
|
+
message, custom_error_code_from_msg = _sanitize_custom_error_message(message)
|
|
432
|
+
|
|
433
|
+
# Check if exception already has a custom error code, if not add INTERNAL_ERROR as default
|
|
434
|
+
if not hasattr(ex, "custom_error_code") or ex.custom_error_code is None:
|
|
435
|
+
attach_custom_error_code(
|
|
436
|
+
ex,
|
|
437
|
+
ErrorCodes.INTERNAL_ERROR
|
|
438
|
+
if custom_error_code_from_msg is None
|
|
439
|
+
else custom_error_code_from_msg,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# Get the custom error code from the exception or thread-local storage
|
|
443
|
+
custom_error_code = getattr(ex, "custom_error_code", None) or getattr(
|
|
444
|
+
_thread_local, "pending_error_code", None
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# Clear thread-local storage after retrieving the error code
|
|
448
|
+
if hasattr(_thread_local, "pending_error_code"):
|
|
449
|
+
delattr(_thread_local, "pending_error_code")
|
|
450
|
+
|
|
451
|
+
separator = "==========================================="
|
|
452
|
+
error_code_added_message = f"\n{separator}\nSNOWPARK CONNECT ERROR CODE: {custom_error_code}\n{separator}\n{message}"
|
|
453
|
+
|
|
454
|
+
detail = any_pb2.Any()
|
|
455
|
+
detail.Pack(error_info)
|
|
456
|
+
|
|
308
457
|
rich_status = status_pb2.Status(
|
|
309
|
-
code=code_pb2.INTERNAL, message=
|
|
458
|
+
code=code_pb2.INTERNAL, message=error_code_added_message, details=[detail]
|
|
310
459
|
)
|
|
311
460
|
return rich_status
|
|
312
461
|
|
|
@@ -2,27 +2,36 @@
|
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class SnowparkConnectException(Exception):
|
|
7
9
|
"""Parent class to all SnowparkConnect related exceptions."""
|
|
8
10
|
|
|
9
|
-
def __init__(self, *args, **kwargs) -> None:
|
|
11
|
+
def __init__(self, *args, custom_error_code=None, **kwargs) -> None:
|
|
10
12
|
super().__init__(*args, **kwargs)
|
|
13
|
+
self.custom_error_code = custom_error_code
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class MissingDatabase(SnowparkConnectException):
|
|
14
|
-
def __init__(self) -> None:
|
|
17
|
+
def __init__(self, custom_error_code=None) -> None:
|
|
15
18
|
super().__init__(
|
|
16
19
|
"No default database found in session",
|
|
20
|
+
custom_error_code=custom_error_code or ErrorCodes.MISSING_DATABASE,
|
|
17
21
|
)
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
class MissingSchema(SnowparkConnectException):
|
|
21
|
-
def __init__(self) -> None:
|
|
25
|
+
def __init__(self, custom_error_code=None) -> None:
|
|
22
26
|
super().__init__(
|
|
23
27
|
"No default schema found in session",
|
|
28
|
+
custom_error_code=custom_error_code or ErrorCodes.MISSING_SCHEMA,
|
|
24
29
|
)
|
|
25
30
|
|
|
26
31
|
|
|
27
32
|
class MaxRetryExceeded(SnowparkConnectException):
|
|
28
|
-
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
message="Maximum retry attempts exceeded",
|
|
36
|
+
) -> None:
|
|
37
|
+
super().__init__(message)
|
|
@@ -1,90 +1,29 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
import re
|
|
5
|
-
import uuid
|
|
6
|
-
from collections import Counter
|
|
7
4
|
|
|
8
5
|
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
9
6
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
10
7
|
|
|
11
|
-
from snowflake.snowpark import DataFrame, Session
|
|
12
|
-
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
13
|
-
from snowflake.snowpark_connect.column_name_handler import ColumnNames
|
|
14
|
-
from snowflake.snowpark_connect.config import global_config, sessions_config
|
|
15
8
|
from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
|
|
9
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
10
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
16
11
|
from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
|
|
17
12
|
from snowflake.snowpark_connect.expression import map_udf
|
|
18
13
|
from snowflake.snowpark_connect.relation import map_udtf
|
|
19
14
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
20
15
|
from snowflake.snowpark_connect.relation.map_sql import map_sql_to_pandas_df
|
|
21
|
-
from snowflake.snowpark_connect.relation.
|
|
22
|
-
|
|
23
|
-
from snowflake.snowpark_connect.utils.identifiers import (
|
|
24
|
-
spark_to_sf_single_id,
|
|
25
|
-
spark_to_sf_single_id_with_unquoting,
|
|
16
|
+
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
17
|
+
without_internal_columns,
|
|
26
18
|
)
|
|
19
|
+
from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
|
|
27
20
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
28
21
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
29
22
|
SnowparkConnectNotImplementedError,
|
|
30
23
|
)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
_CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def _create_column_rename_map(
|
|
38
|
-
columns: list[ColumnNames], rename_duplicated: bool
|
|
39
|
-
) -> dict:
|
|
40
|
-
if rename_duplicated is False:
|
|
41
|
-
# if we are not renaming duplicated columns, we can just return the original names
|
|
42
|
-
return {
|
|
43
|
-
col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
|
|
44
|
-
for col in columns
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
column_counts = Counter()
|
|
48
|
-
not_renamed_cols = []
|
|
49
|
-
renamed_cols = []
|
|
50
|
-
|
|
51
|
-
for col in columns:
|
|
52
|
-
new_column_name = col.spark_name
|
|
53
|
-
normalized_name = new_column_name.lower()
|
|
54
|
-
column_counts[normalized_name] += 1
|
|
55
|
-
|
|
56
|
-
if column_counts[normalized_name] > 1:
|
|
57
|
-
new_column_name = (
|
|
58
|
-
f"{new_column_name}_DEDUP_{column_counts[normalized_name] - 1}"
|
|
59
|
-
)
|
|
60
|
-
renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
|
|
61
|
-
else:
|
|
62
|
-
not_renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
|
|
63
|
-
|
|
64
|
-
if len(renamed_cols) == 0:
|
|
65
|
-
return {
|
|
66
|
-
col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
|
|
67
|
-
for col in not_renamed_cols
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
# we need to make sure that we don't have duplicated names after renaming
|
|
71
|
-
# columns that were not renamed in this iteration should have priority over renamed duplicates
|
|
72
|
-
return _create_column_rename_map(not_renamed_cols + renamed_cols, True)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _find_duplicated_columns(
|
|
76
|
-
columns: list[ColumnNames],
|
|
77
|
-
) -> (list[str], list[ColumnNames]):
|
|
78
|
-
duplicates = []
|
|
79
|
-
remaining_columns = []
|
|
80
|
-
seen = set()
|
|
81
|
-
for col in columns:
|
|
82
|
-
if col.spark_name in seen:
|
|
83
|
-
duplicates.append(col.snowpark_name)
|
|
84
|
-
else:
|
|
85
|
-
seen.add(col.spark_name)
|
|
86
|
-
remaining_columns.append(col)
|
|
87
|
-
return duplicates, remaining_columns
|
|
24
|
+
from snowflake.snowpark_connect.utils.temporary_view_helper import (
|
|
25
|
+
create_temporary_view_from_dataframe,
|
|
26
|
+
)
|
|
88
27
|
|
|
89
28
|
|
|
90
29
|
def map_execution_command(
|
|
@@ -94,56 +33,10 @@ def map_execution_command(
|
|
|
94
33
|
match request.plan.command.WhichOneof("command_type"):
|
|
95
34
|
case "create_dataframe_view":
|
|
96
35
|
req = request.plan.command.create_dataframe_view
|
|
97
|
-
input_df_container = map_relation(req.input)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
session_config = sessions_config[get_session_id()]
|
|
102
|
-
duplicate_column_names_handling_mode = session_config[
|
|
103
|
-
"snowpark.connect.views.duplicate_column_names_handling_mode"
|
|
104
|
-
]
|
|
105
|
-
|
|
106
|
-
# rename columns to match spark names
|
|
107
|
-
if duplicate_column_names_handling_mode == "rename":
|
|
108
|
-
# deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
|
|
109
|
-
input_df = input_df.rename(
|
|
110
|
-
_create_column_rename_map(column_map.columns, True)
|
|
111
|
-
)
|
|
112
|
-
elif duplicate_column_names_handling_mode == "drop":
|
|
113
|
-
# Drop duplicate column names by removing all but the first occurrence.
|
|
114
|
-
duplicated_columns, remaining_columns = _find_duplicated_columns(
|
|
115
|
-
column_map.columns
|
|
116
|
-
)
|
|
117
|
-
if len(duplicated_columns) > 0:
|
|
118
|
-
input_df = input_df.drop(*duplicated_columns)
|
|
119
|
-
input_df = input_df.rename(
|
|
120
|
-
_create_column_rename_map(remaining_columns, False)
|
|
121
|
-
)
|
|
122
|
-
else:
|
|
123
|
-
# rename columns without deduplication
|
|
124
|
-
input_df = input_df.rename(
|
|
125
|
-
_create_column_rename_map(column_map.columns, False)
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
if req.is_global:
|
|
129
|
-
view_name = [global_config.spark_sql_globalTempDatabase, req.name]
|
|
130
|
-
else:
|
|
131
|
-
view_name = [req.name]
|
|
132
|
-
view_name = [
|
|
133
|
-
spark_to_sf_single_id_with_unquoting(part) for part in view_name
|
|
134
|
-
]
|
|
135
|
-
|
|
136
|
-
if req.replace:
|
|
137
|
-
try:
|
|
138
|
-
input_df.create_or_replace_temp_view(view_name)
|
|
139
|
-
except SnowparkSQLException as exc:
|
|
140
|
-
if _is_error_caused_by_view_referencing_itself(exc):
|
|
141
|
-
# This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
|
|
142
|
-
_create_chained_view(input_df, view_name)
|
|
143
|
-
else:
|
|
144
|
-
raise
|
|
145
|
-
else:
|
|
146
|
-
input_df.create_temp_view(view_name)
|
|
36
|
+
input_df_container = without_internal_columns(map_relation(req.input))
|
|
37
|
+
create_temporary_view_from_dataframe(
|
|
38
|
+
input_df_container, req.name, req.is_global, req.replace
|
|
39
|
+
)
|
|
147
40
|
case "write_stream_operation_start":
|
|
148
41
|
match request.plan.command.write_stream_operation_start.format:
|
|
149
42
|
case "console":
|
|
@@ -204,46 +97,8 @@ def map_execution_command(
|
|
|
204
97
|
map_udtf.register_udtf(request.plan.command.register_table_function)
|
|
205
98
|
|
|
206
99
|
case other:
|
|
207
|
-
|
|
100
|
+
exception = SnowparkConnectNotImplementedError(
|
|
208
101
|
f"Command type {other} not implemented"
|
|
209
102
|
)
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def _generate_random_builtin_view_name() -> str:
|
|
213
|
-
return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
|
|
217
|
-
return "view definition refers to view being defined" in str(exc).lower()
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def _create_chained_view(input_df: DataFrame, view_name: str) -> None:
|
|
221
|
-
"""
|
|
222
|
-
In order to create a view, which references itself, Spark would here take the previous
|
|
223
|
-
definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
|
|
224
|
-
as a workaround, we create a chain of internal views instead. This function:
|
|
225
|
-
1. Renames previous definition of A to some internal name (instead of deleting).
|
|
226
|
-
2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
|
|
227
|
-
"""
|
|
228
|
-
|
|
229
|
-
session = Session.get_active_session()
|
|
230
|
-
|
|
231
|
-
view_name = ".".join(view_name)
|
|
232
|
-
|
|
233
|
-
tmp_name = _generate_random_builtin_view_name()
|
|
234
|
-
old_name_replacement = _generate_random_builtin_view_name()
|
|
235
|
-
|
|
236
|
-
input_df.create_or_replace_temp_view(tmp_name)
|
|
237
|
-
|
|
238
|
-
session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
|
|
239
|
-
|
|
240
|
-
ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
|
|
241
|
-
|
|
242
|
-
ddl = ddl.replace(view_name, old_name_replacement)
|
|
243
|
-
|
|
244
|
-
# GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
|
|
245
|
-
ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
|
|
246
|
-
|
|
247
|
-
session.sql(ddl).collect()
|
|
248
|
-
|
|
249
|
-
session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
|
|
103
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
104
|
+
raise exception
|