snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -12,11 +12,13 @@ https://github.com/apache/spark/blob/master/common/utils/src/main/resources/erro
12
12
  import json
13
13
  import pathlib
14
14
  import re
15
+ import threading
15
16
  import traceback
16
17
 
17
18
  import jpype
18
19
  from google.protobuf import any_pb2
19
20
  from google.rpc import code_pb2, error_details_pb2, status_pb2
21
+ from pyspark.errors import TempTableAlreadyExistsException
20
22
  from pyspark.errors.error_classes import ERROR_CLASSES_MAP
21
23
  from pyspark.errors.exceptions.base import (
22
24
  AnalysisException,
@@ -35,9 +37,12 @@ from snowflake.core.exceptions import NotFoundError
35
37
 
36
38
  from snowflake.connector.errors import ProgrammingError
37
39
  from snowflake.snowpark.exceptions import SnowparkClientException, SnowparkSQLException
38
- from snowflake.snowpark_connect.config import global_config
40
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
39
41
  from snowflake.snowpark_connect.error.error_mapping import ERROR_MAPPINGS_JSON
40
42
 
43
+ # Thread-local storage for custom error codes when we can't attach them directly to exceptions
44
+ _thread_local = threading.local()
45
+
41
46
  # The JSON string in error_mapping.py is a copy of https://github.com/apache/spark/blob/master/common/utils/src/main/resources/error/error-conditions.json.
42
47
  # The file doesn't have to be synced with spark latest main. Just update it when required.
43
48
  current_dir = pathlib.Path(__file__).parent.resolve()
@@ -54,8 +59,11 @@ SPARK_PYTHON_TO_JAVA_EXCEPTION = {
54
59
  SparkConnectGrpcException: "pyspark.errors.exceptions.connect.SparkConnectGrpcException",
55
60
  PythonException: "org.apache.spark.api.python.PythonException",
56
61
  UnsupportedOperationException: "java.lang.UnsupportedOperationException",
62
+ TempTableAlreadyExistsException: "org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException",
57
63
  }
58
64
 
65
+ TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS = "TABLE_OR_VIEW_NOT_FOUND"
66
+
59
67
  WINDOW_FUNCTION_ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {1005, 2303}
60
68
  ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {
61
69
  904,
@@ -79,6 +87,23 @@ invalid_bit_pattern = re.compile(
79
87
  r"Invalid bit position: \d+ exceeds the bit (?:upper|lower) limit",
80
88
  re.IGNORECASE,
81
89
  )
90
+ CREATE_SCHEMA_PATTERN = re.compile(r"create\s+schema", re.IGNORECASE)
91
+ CREATE_TABLE_PATTERN = re.compile(r"create\s+table", re.IGNORECASE)
92
+
93
+
94
+ def attach_custom_error_code(exception: Exception, custom_error_code: int) -> Exception:
95
+ """
96
+ Attach a custom error code to any exception instance.
97
+ This allows us to add custom error codes to existing PySpark exceptions.
98
+ """
99
+ if not hasattr(exception, "custom_error_code"):
100
+ try:
101
+ exception.custom_error_code = custom_error_code
102
+ except (AttributeError, TypeError):
103
+ # Some exception types (like Java exceptions) don't allow setting custom attributes
104
+ # Store the error code in thread-local storage for later retrieval
105
+ _thread_local.pending_error_code = custom_error_code
106
+ return exception
82
107
 
83
108
 
84
109
  def contains_udtf_select(sql_string):
@@ -100,20 +125,29 @@ def _get_converted_known_sql_or_custom_exception(
100
125
 
101
126
  # custom exception
102
127
  if "[snowpark_connect::invalid_array_index]" in msg:
103
- return ArrayIndexOutOfBoundsException(
128
+ exception = ArrayIndexOutOfBoundsException(
104
129
  message='The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
105
130
  )
131
+ attach_custom_error_code(exception, ErrorCodes.ARRAY_INDEX_OUT_OF_BOUNDS)
132
+ return exception
106
133
  if "[snowpark_connect::invalid_index_of_zero]" in msg:
107
- return SparkRuntimeException(
134
+ exception = SparkRuntimeException(
108
135
  message="[INVALID_INDEX_OF_ZERO] The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)."
109
136
  )
137
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
138
+ return exception
110
139
  if "[snowpark_connect::invalid_index_of_zero_in_slice]" in msg:
111
- return SparkRuntimeException(
140
+ exception = SparkRuntimeException(
112
141
  message="Unexpected value for start in function slice: SQL array indices start at 1."
113
142
  )
143
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
144
+ return exception
145
+
114
146
  invalid_bit = invalid_bit_pattern.search(msg)
115
147
  if invalid_bit:
116
- return IllegalArgumentException(message=invalid_bit.group(0))
148
+ exception = IllegalArgumentException(message=invalid_bit.group(0))
149
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
150
+ return exception
117
151
  match = snowpark_connect_exception_pattern.search(
118
152
  ex.message if hasattr(ex, "message") else str(ex)
119
153
  )
@@ -125,71 +159,136 @@ def _get_converted_known_sql_or_custom_exception(
125
159
  if class_name
126
160
  else SparkConnectGrpcException
127
161
  )
128
- return exception_class(message=message)
162
+ exception = exception_class(message=message)
163
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
164
+ return exception
129
165
 
130
166
  if "select with no columns" in msg and contains_udtf_select(query):
131
167
  # We try our best to detect if the SQL string contains a UDTF call and the output schema is empty.
132
- return PythonException(message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}")
168
+ exception = PythonException(
169
+ message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}"
170
+ )
171
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
172
+ return exception
133
173
 
134
174
  # known sql exception
135
175
  if ex.sql_error_code not in (100038, 100037, 100035, 100357):
136
176
  return None
137
177
 
138
178
  if "(22018): numeric value" in msg:
139
- return NumberFormatException(
179
+ exception = NumberFormatException(
140
180
  message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
141
181
  )
182
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
183
+ return exception
142
184
  if "(22018): boolean value" in msg:
143
- return SparkRuntimeException(
185
+ exception = SparkRuntimeException(
144
186
  message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
145
187
  )
188
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
189
+ return exception
146
190
  if "(22007): timestamp" in msg:
147
- return AnalysisException(
191
+ exception = AnalysisException(
148
192
  "[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Data type mismatch"
149
193
  )
194
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
195
+ return exception
150
196
 
151
197
  if getattr(ex, "sql_error_code", None) == 100357:
152
198
  if re.search(init_multi_args_exception_pattern, msg):
153
- return PythonException(
199
+ exception = PythonException(
154
200
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the init method {ex.message}"
155
201
  )
202
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
203
+ return exception
156
204
  if re.search(terminate_multi_args_exception_pattern, msg):
157
- return PythonException(
205
+ exception = PythonException(
158
206
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the terminate method: {ex.message}"
159
207
  )
208
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
209
+ return exception
160
210
 
161
211
  if "failed to split string, provided pattern:" in msg:
162
- return IllegalArgumentException(
212
+ exception = IllegalArgumentException(
163
213
  message=f"Failed to split string using provided pattern. {ex.message}"
164
214
  )
215
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
216
+ return exception
165
217
 
166
218
  if "100357" in msg and "wrong tuple size for returned value" in msg:
167
- return PythonException(
219
+ exception = PythonException(
168
220
  message=f"[UDTF_RETURN_SCHEMA_MISMATCH] The number of columns in the result does not match the specified schema. {ex.message}"
169
221
  )
222
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
223
+ return exception
170
224
 
171
225
  if "100357 (p0000): python interpreter error:" in msg:
172
226
  if "in eval" in msg:
173
- return PythonException(
227
+ exception = PythonException(
174
228
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'eval' method: error. {ex.message}"
175
229
  )
230
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
231
+ return exception
176
232
 
177
233
  if "in terminate" in msg:
178
- return PythonException(
234
+ exception = PythonException(
179
235
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'terminate' method: terminate error. {ex.message}"
180
236
  )
237
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
238
+ return exception
181
239
 
182
240
  if "object is not iterable" in msg and contains_udtf_select(query):
183
- return PythonException(
241
+ exception = PythonException(
184
242
  message=f"[UDTF_RETURN_NOT_ITERABLE] {ex.message}"
185
243
  )
244
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
245
+ return exception
186
246
 
187
- return PythonException(message=f"{ex.message}")
247
+ exception = PythonException(message=f"{ex.message}")
248
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
249
+ return exception
188
250
 
189
251
  return None
190
252
 
191
253
 
254
+ def _sanitize_custom_error_message(msg):
255
+ if "[snowpark_connect::unsupported_operation]" in msg:
256
+ return (
257
+ msg.replace("[snowpark_connect::unsupported_operation] ", ""),
258
+ ErrorCodes.UNSUPPORTED_OPERATION,
259
+ )
260
+ if "[snowpark_connect::internal_error]" in msg:
261
+ return (
262
+ msg.replace("[snowpark_connect::internal_error] ", ""),
263
+ ErrorCodes.INTERNAL_ERROR,
264
+ )
265
+ if "[snowpark_connect::invalid_operation]" in msg:
266
+ return (
267
+ msg.replace("[snowpark_connect::invalid_operation] ", ""),
268
+ ErrorCodes.INVALID_OPERATION,
269
+ )
270
+ if "[snowpark_connect::type_mismatch]" in msg:
271
+ return (
272
+ msg.replace("[snowpark_connect::type_mismatch] ", ""),
273
+ ErrorCodes.TYPE_MISMATCH,
274
+ )
275
+ if "[snowpark_connect::invalid_input]" in msg:
276
+ return (
277
+ msg.replace("[snowpark_connect::invalid_input] ", ""),
278
+ ErrorCodes.INVALID_INPUT,
279
+ )
280
+ if "[snowpark_connect::unsupported_type]" in msg:
281
+ return (
282
+ msg.replace("[snowpark_connect::unsupported_type] ", ""),
283
+ ErrorCodes.UNSUPPORTED_TYPE,
284
+ )
285
+ return msg, None
286
+
287
+
192
288
  def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
289
+ # Lazy import to avoid circular dependency
290
+ from snowflake.snowpark_connect.config import global_config
291
+
193
292
  include_stack_trace = (
194
293
  global_config.get("spark.sql.pyspark.jvmStacktrace.enabled")
195
294
  if hasattr(global_config, "spark.sql.pyspark.jvmStacktrace.enabled")
@@ -203,6 +302,16 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
203
302
  match ex:
204
303
  case SnowparkSQLException():
205
304
  if ex.sql_error_code in ANALYSIS_EXCEPTION_SQL_ERROR_CODE:
305
+ # Creation of schema that already exists
306
+ if ex.sql_error_code == 2002 and "already exists" in str(ex):
307
+ if CREATE_SCHEMA_PATTERN.search(ex.query):
308
+ spark_java_classes.append(
309
+ "org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException"
310
+ )
311
+ elif CREATE_TABLE_PATTERN.search(ex.query):
312
+ spark_java_classes.append(
313
+ "org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException"
314
+ )
206
315
  # Data type mismatch, invalid window function
207
316
  spark_java_classes.append("org.apache.spark.sql.AnalysisException")
208
317
  elif ex.sql_error_code == 100051:
@@ -211,6 +320,7 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
211
320
  error_class="DIVIDE_BY_ZERO",
212
321
  message_parameters={"config": '"spark.sql.ansi.enabled"'},
213
322
  )
323
+ attach_custom_error_code(ex, ErrorCodes.DIVISION_BY_ZERO)
214
324
  elif ex.sql_error_code in (100096, 100040):
215
325
  # Spark seems to want the Java base class instead of org.apache.spark.sql.SparkDateTimeException
216
326
  # which is what should really be thrown
@@ -221,6 +331,9 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
221
331
  ex = spark_ex
222
332
  spark_java_classes.append(SPARK_PYTHON_TO_JAVA_EXCEPTION[type(ex)])
223
333
  elif ex.sql_error_code == 2043:
334
+ spark_java_classes.append(
335
+ "org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException"
336
+ )
224
337
  spark_java_classes.append("org.apache.spark.sql.AnalysisException")
225
338
  message = f"does_not_exist: {str(ex)}"
226
339
  else:
@@ -252,13 +365,23 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
252
365
  )
253
366
  elif isinstance(ex, PySparkException):
254
367
  # pyspark exceptions thrown in sas layer
368
+
369
+ error_derived_java_class = []
370
+ if ex.error_class == TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS:
371
+ error_derived_java_class.append(
372
+ "org.apache.spark.sql.catalyst.analysis.NoSuchTableException"
373
+ )
374
+
255
375
  classes = type(ex).__mro__
256
376
  spark_java_classes = [
257
377
  SPARK_PYTHON_TO_JAVA_EXCEPTION[clazz]
258
378
  for clazz in classes
259
379
  if clazz in SPARK_PYTHON_TO_JAVA_EXCEPTION
260
380
  ]
261
- metadata = {"classes": json.dumps(spark_java_classes)}
381
+
382
+ metadata = {
383
+ "classes": json.dumps(error_derived_java_class + spark_java_classes)
384
+ }
262
385
  if include_stack_trace:
263
386
  metadata["stackTrace"] = "".join(
264
387
  traceback.TracebackException.from_exception(ex).format()
@@ -299,14 +422,40 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
299
422
  domain="snowflake.sas",
300
423
  )
301
424
 
302
- detail = any_pb2.Any()
303
- detail.Pack(error_info)
304
-
305
425
  if message is None:
306
426
  message = str(ex)
307
427
 
428
+ custom_error_code = None
429
+
430
+ # attach error code using visa exception message
431
+ message, custom_error_code_from_msg = _sanitize_custom_error_message(message)
432
+
433
+ # Check if exception already has a custom error code, if not add INTERNAL_ERROR as default
434
+ if not hasattr(ex, "custom_error_code") or ex.custom_error_code is None:
435
+ attach_custom_error_code(
436
+ ex,
437
+ ErrorCodes.INTERNAL_ERROR
438
+ if custom_error_code_from_msg is None
439
+ else custom_error_code_from_msg,
440
+ )
441
+
442
+ # Get the custom error code from the exception or thread-local storage
443
+ custom_error_code = getattr(ex, "custom_error_code", None) or getattr(
444
+ _thread_local, "pending_error_code", None
445
+ )
446
+
447
+ # Clear thread-local storage after retrieving the error code
448
+ if hasattr(_thread_local, "pending_error_code"):
449
+ delattr(_thread_local, "pending_error_code")
450
+
451
+ separator = "==========================================="
452
+ error_code_added_message = f"\n{separator}\nSNOWPARK CONNECT ERROR CODE: {custom_error_code}\n{separator}\n{message}"
453
+
454
+ detail = any_pb2.Any()
455
+ detail.Pack(error_info)
456
+
308
457
  rich_status = status_pb2.Status(
309
- code=code_pb2.INTERNAL, message=message, details=[detail]
458
+ code=code_pb2.INTERNAL, message=error_code_added_message, details=[detail]
310
459
  )
311
460
  return rich_status
312
461
 
@@ -2,27 +2,36 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
 
5
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
6
+
5
7
 
6
8
  class SnowparkConnectException(Exception):
7
9
  """Parent class to all SnowparkConnect related exceptions."""
8
10
 
9
- def __init__(self, *args, **kwargs) -> None:
11
+ def __init__(self, *args, custom_error_code=None, **kwargs) -> None:
10
12
  super().__init__(*args, **kwargs)
13
+ self.custom_error_code = custom_error_code
11
14
 
12
15
 
13
16
  class MissingDatabase(SnowparkConnectException):
14
- def __init__(self) -> None:
17
+ def __init__(self, custom_error_code=None) -> None:
15
18
  super().__init__(
16
19
  "No default database found in session",
20
+ custom_error_code=custom_error_code or ErrorCodes.MISSING_DATABASE,
17
21
  )
18
22
 
19
23
 
20
24
  class MissingSchema(SnowparkConnectException):
21
- def __init__(self) -> None:
25
+ def __init__(self, custom_error_code=None) -> None:
22
26
  super().__init__(
23
27
  "No default schema found in session",
28
+ custom_error_code=custom_error_code or ErrorCodes.MISSING_SCHEMA,
24
29
  )
25
30
 
26
31
 
27
32
  class MaxRetryExceeded(SnowparkConnectException):
28
- ...
33
+ def __init__(
34
+ self,
35
+ message="Maximum retry attempts exceeded",
36
+ ) -> None:
37
+ super().__init__(message)
@@ -1,90 +1,29 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
- import re
5
- import uuid
6
- from collections import Counter
7
4
 
8
5
  import pyspark.sql.connect.proto.base_pb2 as proto_base
9
6
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
10
7
 
11
- from snowflake.snowpark import DataFrame, Session
12
- from snowflake.snowpark.exceptions import SnowparkSQLException
13
- from snowflake.snowpark_connect.column_name_handler import ColumnNames
14
- from snowflake.snowpark_connect.config import global_config, sessions_config
15
8
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
9
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
10
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
11
  from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
17
12
  from snowflake.snowpark_connect.expression import map_udf
18
13
  from snowflake.snowpark_connect.relation import map_udtf
19
14
  from snowflake.snowpark_connect.relation.map_relation import map_relation
20
15
  from snowflake.snowpark_connect.relation.map_sql import map_sql_to_pandas_df
21
- from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
22
- from snowflake.snowpark_connect.utils.context import get_session_id
23
- from snowflake.snowpark_connect.utils.identifiers import (
24
- spark_to_sf_single_id,
25
- spark_to_sf_single_id_with_unquoting,
16
+ from snowflake.snowpark_connect.relation.read.metadata_utils import (
17
+ without_internal_columns,
26
18
  )
19
+ from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
27
20
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
28
21
  from snowflake.snowpark_connect.utils.telemetry import (
29
22
  SnowparkConnectNotImplementedError,
30
23
  )
31
-
32
- _INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
33
-
34
- _CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
35
-
36
-
37
- def _create_column_rename_map(
38
- columns: list[ColumnNames], rename_duplicated: bool
39
- ) -> dict:
40
- if rename_duplicated is False:
41
- # if we are not renaming duplicated columns, we can just return the original names
42
- return {
43
- col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
44
- for col in columns
45
- }
46
-
47
- column_counts = Counter()
48
- not_renamed_cols = []
49
- renamed_cols = []
50
-
51
- for col in columns:
52
- new_column_name = col.spark_name
53
- normalized_name = new_column_name.lower()
54
- column_counts[normalized_name] += 1
55
-
56
- if column_counts[normalized_name] > 1:
57
- new_column_name = (
58
- f"{new_column_name}_DEDUP_{column_counts[normalized_name] - 1}"
59
- )
60
- renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
61
- else:
62
- not_renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
63
-
64
- if len(renamed_cols) == 0:
65
- return {
66
- col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
67
- for col in not_renamed_cols
68
- }
69
-
70
- # we need to make sure that we don't have duplicated names after renaming
71
- # columns that were not renamed in this iteration should have priority over renamed duplicates
72
- return _create_column_rename_map(not_renamed_cols + renamed_cols, True)
73
-
74
-
75
- def _find_duplicated_columns(
76
- columns: list[ColumnNames],
77
- ) -> (list[str], list[ColumnNames]):
78
- duplicates = []
79
- remaining_columns = []
80
- seen = set()
81
- for col in columns:
82
- if col.spark_name in seen:
83
- duplicates.append(col.snowpark_name)
84
- else:
85
- seen.add(col.spark_name)
86
- remaining_columns.append(col)
87
- return duplicates, remaining_columns
24
+ from snowflake.snowpark_connect.utils.temporary_view_helper import (
25
+ create_temporary_view_from_dataframe,
26
+ )
88
27
 
89
28
 
90
29
  def map_execution_command(
@@ -94,56 +33,10 @@ def map_execution_command(
94
33
  match request.plan.command.WhichOneof("command_type"):
95
34
  case "create_dataframe_view":
96
35
  req = request.plan.command.create_dataframe_view
97
- input_df_container = map_relation(req.input)
98
- input_df = input_df_container.dataframe
99
- column_map = input_df_container.column_map
100
-
101
- session_config = sessions_config[get_session_id()]
102
- duplicate_column_names_handling_mode = session_config[
103
- "snowpark.connect.views.duplicate_column_names_handling_mode"
104
- ]
105
-
106
- # rename columns to match spark names
107
- if duplicate_column_names_handling_mode == "rename":
108
- # deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
109
- input_df = input_df.rename(
110
- _create_column_rename_map(column_map.columns, True)
111
- )
112
- elif duplicate_column_names_handling_mode == "drop":
113
- # Drop duplicate column names by removing all but the first occurrence.
114
- duplicated_columns, remaining_columns = _find_duplicated_columns(
115
- column_map.columns
116
- )
117
- if len(duplicated_columns) > 0:
118
- input_df = input_df.drop(*duplicated_columns)
119
- input_df = input_df.rename(
120
- _create_column_rename_map(remaining_columns, False)
121
- )
122
- else:
123
- # rename columns without deduplication
124
- input_df = input_df.rename(
125
- _create_column_rename_map(column_map.columns, False)
126
- )
127
-
128
- if req.is_global:
129
- view_name = [global_config.spark_sql_globalTempDatabase, req.name]
130
- else:
131
- view_name = [req.name]
132
- view_name = [
133
- spark_to_sf_single_id_with_unquoting(part) for part in view_name
134
- ]
135
-
136
- if req.replace:
137
- try:
138
- input_df.create_or_replace_temp_view(view_name)
139
- except SnowparkSQLException as exc:
140
- if _is_error_caused_by_view_referencing_itself(exc):
141
- # This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
142
- _create_chained_view(input_df, view_name)
143
- else:
144
- raise
145
- else:
146
- input_df.create_temp_view(view_name)
36
+ input_df_container = without_internal_columns(map_relation(req.input))
37
+ create_temporary_view_from_dataframe(
38
+ input_df_container, req.name, req.is_global, req.replace
39
+ )
147
40
  case "write_stream_operation_start":
148
41
  match request.plan.command.write_stream_operation_start.format:
149
42
  case "console":
@@ -204,46 +97,8 @@ def map_execution_command(
204
97
  map_udtf.register_udtf(request.plan.command.register_table_function)
205
98
 
206
99
  case other:
207
- raise SnowparkConnectNotImplementedError(
100
+ exception = SnowparkConnectNotImplementedError(
208
101
  f"Command type {other} not implemented"
209
102
  )
210
-
211
-
212
- def _generate_random_builtin_view_name() -> str:
213
- return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
214
-
215
-
216
- def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
217
- return "view definition refers to view being defined" in str(exc).lower()
218
-
219
-
220
- def _create_chained_view(input_df: DataFrame, view_name: str) -> None:
221
- """
222
- In order to create a view, which references itself, Spark would here take the previous
223
- definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
224
- as a workaround, we create a chain of internal views instead. This function:
225
- 1. Renames previous definition of A to some internal name (instead of deleting).
226
- 2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
227
- """
228
-
229
- session = Session.get_active_session()
230
-
231
- view_name = ".".join(view_name)
232
-
233
- tmp_name = _generate_random_builtin_view_name()
234
- old_name_replacement = _generate_random_builtin_view_name()
235
-
236
- input_df.create_or_replace_temp_view(tmp_name)
237
-
238
- session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
239
-
240
- ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
241
-
242
- ddl = ddl.replace(view_name, old_name_replacement)
243
-
244
- # GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
245
- ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
246
-
247
- session.sql(ddl).collect()
248
-
249
- session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
103
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
104
+ raise exception