snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,8 @@
24
24
  from pyspark.errors.exceptions.base import DateTimeException
25
25
 
26
26
  from snowflake.snowpark.types import DataType, StringType
27
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
28
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
27
29
 
28
30
 
29
31
  # TODO: There are more patterns where spark may throw an error.
@@ -229,9 +231,11 @@ def convert_spark_format_to_snowflake(
229
231
  timestamp_input_type: DataType | None = None,
230
232
  ):
231
233
  if spark_format in {"Y", "w", "W"}:
232
- raise DateTimeException(
234
+ exception = DateTimeException(
233
235
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter."
234
236
  )
237
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
238
+ raise exception
235
239
  snowflake_format = ""
236
240
  i = 0
237
241
  n = len(spark_format)
@@ -299,28 +303,46 @@ def convert_spark_format_to_snowflake(
299
303
  # Spark's 'a' would be at most 1 times
300
304
  is_valid_a_pattern = spark_format[i : i + 2] != char * 2
301
305
  if not is_valid_a_pattern:
302
- raise DateTimeException(
306
+ exception = DateTimeException(
303
307
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
304
308
  )
309
+ attach_custom_error_code(
310
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
311
+ )
312
+ raise exception
305
313
  case "h" | "K" | "k" | "H" | "m" | "s" | "d":
306
314
  # Spark's characters would be at most 2 times
307
315
  is_valid_2_patterns = spark_format[i : i + 3] != char * 3
308
316
  if not is_valid_2_patterns:
309
- raise DateTimeException(
317
+ exception = DateTimeException(
310
318
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
311
319
  )
320
+ attach_custom_error_code(
321
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
322
+ )
323
+ raise exception
312
324
  case "D":
313
325
  # Spark's 'D'' would be at most 3 times
314
326
  is_valid_D_patterns = spark_format[i : i + 4] != char * 4
315
327
  if not is_valid_D_patterns:
316
- raise DateTimeException(
328
+ exception = DateTimeException(
317
329
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
318
330
  )
331
+ attach_custom_error_code(
332
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
333
+ )
334
+ raise exception
319
335
  case "V":
320
336
  # Spark's 'V' for Zone ID requires 'VV'. A single 'V' is invalid.
321
337
  is_valid_vv_pattern = spark_format[i : i + 2] == "VV"
322
338
  if not is_valid_vv_pattern:
323
- raise DateTimeException("Pattern letter count must be 2: V")
339
+ exception = DateTimeException(
340
+ "Pattern letter count must be 2: V"
341
+ )
342
+ attach_custom_error_code(
343
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
344
+ )
345
+ raise exception
324
346
  case "O":
325
347
  # Spark's 'O' would be either 1 or 4.
326
348
  is_valid_o_or_oooo_pattern = spark_format[i : i + 2] != "OO" or (
@@ -328,28 +350,52 @@ def convert_spark_format_to_snowflake(
328
350
  and spark_format[i : i + 5] != "OOOOO"
329
351
  )
330
352
  if not is_valid_o_or_oooo_pattern:
331
- raise DateTimeException(
353
+ exception = DateTimeException(
332
354
  "Pattern letter count must be 1 or 4: O"
333
355
  )
356
+ attach_custom_error_code(
357
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
358
+ )
359
+ raise exception
334
360
  case "q" | "Q" | "z" | "E":
335
361
  # Spark's characters would be at most 4 times
336
362
  is_valid_4_patterns = spark_format[i : i + 5] != char * 5
337
363
  if not is_valid_4_patterns:
338
- raise DateTimeException(f"Too many pattern letters: {char}")
364
+ exception = DateTimeException(
365
+ f"Too many pattern letters: {char}"
366
+ )
367
+ attach_custom_error_code(
368
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
369
+ )
370
+ raise exception
339
371
  case "x" | "X" | "Z":
340
372
  # Spark's 'x' or 'X' or 'z' or 'Z' would be at most 5 times
341
373
  is_valid_xz_pattern = spark_format[i : i + 6] != char * 6
342
374
  if not is_valid_xz_pattern:
343
- raise DateTimeException(f"Too many pattern letters: {char}")
375
+ exception = DateTimeException(
376
+ f"Too many pattern letters: {char}"
377
+ )
378
+ attach_custom_error_code(
379
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
380
+ )
381
+ raise exception
344
382
  case "y":
345
383
  # Spark's 'y' would be at most 6 times
346
384
  is_valid_y_pattern = spark_format[i : i + 7] != char * 7
347
385
  if not is_valid_y_pattern:
348
- raise DateTimeException(
386
+ exception = DateTimeException(
349
387
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
350
388
  )
389
+ attach_custom_error_code(
390
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
391
+ )
392
+ raise exception
351
393
  case "C" | "I":
352
- raise DateTimeException(f"Unknown pattern letter: {char}")
394
+ exception = DateTimeException(f"Unknown pattern letter: {char}")
395
+ attach_custom_error_code(
396
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
397
+ )
398
+ raise exception
353
399
 
354
400
  if (
355
401
  spark_format[i : i + 5] in {"M" * 5, "L" * 5}
@@ -362,9 +408,13 @@ def convert_spark_format_to_snowflake(
362
408
  or spark_format[i : i + 3] in {"kkk", "KKK"}
363
409
  or spark_format[i : i + 10] == "SSSSSSSSSS"
364
410
  ):
365
- raise DateTimeException(
411
+ exception = DateTimeException(
366
412
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter."
367
413
  )
414
+ attach_custom_error_code(
415
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
416
+ )
417
+ raise exception
368
418
 
369
419
  matched_pattern = False
370
420
 
@@ -375,7 +425,11 @@ def convert_spark_format_to_snowflake(
375
425
  spark_key
376
426
  ]
377
427
  if isinstance(snowflake_equivalent, _UnsupportedSparkFormatPattern):
378
- raise DateTimeException(snowflake_equivalent.message)
428
+ exception = DateTimeException(snowflake_equivalent.message)
429
+ attach_custom_error_code(
430
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
431
+ )
432
+ raise exception
379
433
  if snowflake_equivalent is not None:
380
434
  snowflake_format += snowflake_equivalent
381
435
  i += len(spark_key)
@@ -389,7 +443,11 @@ def convert_spark_format_to_snowflake(
389
443
  isinstance(timestamp_input_type, StringType)
390
444
  and char not in snowflake_time_format_separator
391
445
  ):
392
- raise DateTimeException(f"Illegal pattern character: {char}")
446
+ exception = DateTimeException(f"Illegal pattern character: {char}")
447
+ attach_custom_error_code(
448
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
449
+ )
450
+ raise exception
393
451
 
394
452
  snowflake_format += f'"{char}"'
395
453
  i += 1
@@ -0,0 +1,50 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ """
6
+ Error code constants for Snowpark Connect.
7
+
8
+ This module defines custom error codes that can be attached to exceptions
9
+ and included in gRPC error responses.
10
+ """
11
+
12
+
13
+ class ErrorCodes:
14
+ """Constants for Snowpark Connect custom error codes."""
15
+
16
+ # 1000-1999: Startup related errors
17
+ MISSING_DATABASE = 1001
18
+ MISSING_SCHEMA = 1002
19
+ RESOURCE_INITIALIZATION_FAILED = 1003
20
+ TCP_PORT_ALREADY_IN_USE = 1004
21
+ INVALID_SPARK_CONNECT_URL = 1005
22
+ INVALID_STARTUP_INPUT = 1006
23
+ INVALID_STARTUP_OPERATION = 1007
24
+ STARTUP_CONNECTION_FAILED = 1008
25
+
26
+ # 2000-2999: Configuration related errors
27
+ INVALID_CONFIG_VALUE = 2001
28
+ CONFIG_CHANGE_NOT_ALLOWED = 2002
29
+ CONFIG_NOT_ENABLED = 2003
30
+
31
+ # 3000-3999: User code errors
32
+ INVALID_SQL_SYNTAX = 3001
33
+ TYPE_MISMATCH = 3002
34
+ INVALID_CAST = 3003
35
+ INVALID_FUNCTION_ARGUMENT = 3004
36
+ ARRAY_INDEX_OUT_OF_BOUNDS = 3005
37
+ DIVISION_BY_ZERO = 3006
38
+ INVALID_INPUT = 3007
39
+ INVALID_OPERATION = 3008
40
+ INSUFFICIENT_INPUT = 3009
41
+
42
+ # 4000-4999: What we don't support
43
+ UNSUPPORTED_OPERATION = 4001
44
+ UNSUPPORTED_TYPE = 4002
45
+
46
+ # 5000-5999: Internal errors
47
+ INTERNAL_ERROR = 5001
48
+ TABLE_NOT_FOUND = 5002
49
+ COLUMN_NOT_FOUND = 5003
50
+ AMBIGUOUS_COLUMN_NAME = 5004
@@ -12,11 +12,13 @@ https://github.com/apache/spark/blob/master/common/utils/src/main/resources/erro
12
12
  import json
13
13
  import pathlib
14
14
  import re
15
+ import threading
15
16
  import traceback
16
17
 
17
18
  import jpype
18
19
  from google.protobuf import any_pb2
19
20
  from google.rpc import code_pb2, error_details_pb2, status_pb2
21
+ from pyspark.errors import TempTableAlreadyExistsException
20
22
  from pyspark.errors.error_classes import ERROR_CLASSES_MAP
21
23
  from pyspark.errors.exceptions.base import (
22
24
  AnalysisException,
@@ -35,9 +37,12 @@ from snowflake.core.exceptions import NotFoundError
35
37
 
36
38
  from snowflake.connector.errors import ProgrammingError
37
39
  from snowflake.snowpark.exceptions import SnowparkClientException, SnowparkSQLException
38
- from snowflake.snowpark_connect.config import global_config
40
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
39
41
  from snowflake.snowpark_connect.error.error_mapping import ERROR_MAPPINGS_JSON
40
42
 
43
+ # Thread-local storage for custom error codes when we can't attach them directly to exceptions
44
+ _thread_local = threading.local()
45
+
41
46
  # The JSON string in error_mapping.py is a copy of https://github.com/apache/spark/blob/master/common/utils/src/main/resources/error/error-conditions.json.
42
47
  # The file doesn't have to be synced with spark latest main. Just update it when required.
43
48
  current_dir = pathlib.Path(__file__).parent.resolve()
@@ -54,8 +59,11 @@ SPARK_PYTHON_TO_JAVA_EXCEPTION = {
54
59
  SparkConnectGrpcException: "pyspark.errors.exceptions.connect.SparkConnectGrpcException",
55
60
  PythonException: "org.apache.spark.api.python.PythonException",
56
61
  UnsupportedOperationException: "java.lang.UnsupportedOperationException",
62
+ TempTableAlreadyExistsException: "org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException",
57
63
  }
58
64
 
65
+ TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS = "TABLE_OR_VIEW_NOT_FOUND"
66
+
59
67
  WINDOW_FUNCTION_ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {1005, 2303}
60
68
  ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {
61
69
  904,
@@ -79,6 +87,23 @@ invalid_bit_pattern = re.compile(
79
87
  r"Invalid bit position: \d+ exceeds the bit (?:upper|lower) limit",
80
88
  re.IGNORECASE,
81
89
  )
90
+ CREATE_SCHEMA_PATTERN = re.compile(r"create\s+schema", re.IGNORECASE)
91
+ CREATE_TABLE_PATTERN = re.compile(r"create\s+table", re.IGNORECASE)
92
+
93
+
94
+ def attach_custom_error_code(exception: Exception, custom_error_code: int) -> Exception:
95
+ """
96
+ Attach a custom error code to any exception instance.
97
+ This allows us to add custom error codes to existing PySpark exceptions.
98
+ """
99
+ if not hasattr(exception, "custom_error_code"):
100
+ try:
101
+ exception.custom_error_code = custom_error_code
102
+ except (AttributeError, TypeError):
103
+ # Some exception types (like Java exceptions) don't allow setting custom attributes
104
+ # Store the error code in thread-local storage for later retrieval
105
+ _thread_local.pending_error_code = custom_error_code
106
+ return exception
82
107
 
83
108
 
84
109
  def contains_udtf_select(sql_string):
@@ -100,20 +125,29 @@ def _get_converted_known_sql_or_custom_exception(
100
125
 
101
126
  # custom exception
102
127
  if "[snowpark_connect::invalid_array_index]" in msg:
103
- return ArrayIndexOutOfBoundsException(
128
+ exception = ArrayIndexOutOfBoundsException(
104
129
  message='The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
105
130
  )
131
+ attach_custom_error_code(exception, ErrorCodes.ARRAY_INDEX_OUT_OF_BOUNDS)
132
+ return exception
106
133
  if "[snowpark_connect::invalid_index_of_zero]" in msg:
107
- return SparkRuntimeException(
134
+ exception = SparkRuntimeException(
108
135
  message="[INVALID_INDEX_OF_ZERO] The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)."
109
136
  )
137
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
138
+ return exception
110
139
  if "[snowpark_connect::invalid_index_of_zero_in_slice]" in msg:
111
- return SparkRuntimeException(
140
+ exception = SparkRuntimeException(
112
141
  message="Unexpected value for start in function slice: SQL array indices start at 1."
113
142
  )
143
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
144
+ return exception
145
+
114
146
  invalid_bit = invalid_bit_pattern.search(msg)
115
147
  if invalid_bit:
116
- return IllegalArgumentException(message=invalid_bit.group(0))
148
+ exception = IllegalArgumentException(message=invalid_bit.group(0))
149
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
150
+ return exception
117
151
  match = snowpark_connect_exception_pattern.search(
118
152
  ex.message if hasattr(ex, "message") else str(ex)
119
153
  )
@@ -125,71 +159,136 @@ def _get_converted_known_sql_or_custom_exception(
125
159
  if class_name
126
160
  else SparkConnectGrpcException
127
161
  )
128
- return exception_class(message=message)
162
+ exception = exception_class(message=message)
163
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
164
+ return exception
129
165
 
130
166
  if "select with no columns" in msg and contains_udtf_select(query):
131
167
  # We try our best to detect if the SQL string contains a UDTF call and the output schema is empty.
132
- return PythonException(message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}")
168
+ exception = PythonException(
169
+ message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}"
170
+ )
171
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
172
+ return exception
133
173
 
134
174
  # known sql exception
135
175
  if ex.sql_error_code not in (100038, 100037, 100035, 100357):
136
176
  return None
137
177
 
138
178
  if "(22018): numeric value" in msg:
139
- return NumberFormatException(
179
+ exception = NumberFormatException(
140
180
  message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
141
181
  )
182
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
183
+ return exception
142
184
  if "(22018): boolean value" in msg:
143
- return SparkRuntimeException(
185
+ exception = SparkRuntimeException(
144
186
  message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
145
187
  )
188
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
189
+ return exception
146
190
  if "(22007): timestamp" in msg:
147
- return AnalysisException(
191
+ exception = AnalysisException(
148
192
  "[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Data type mismatch"
149
193
  )
194
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
195
+ return exception
150
196
 
151
197
  if getattr(ex, "sql_error_code", None) == 100357:
152
198
  if re.search(init_multi_args_exception_pattern, msg):
153
- return PythonException(
199
+ exception = PythonException(
154
200
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the init method {ex.message}"
155
201
  )
202
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
203
+ return exception
156
204
  if re.search(terminate_multi_args_exception_pattern, msg):
157
- return PythonException(
205
+ exception = PythonException(
158
206
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the terminate method: {ex.message}"
159
207
  )
208
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
209
+ return exception
160
210
 
161
211
  if "failed to split string, provided pattern:" in msg:
162
- return IllegalArgumentException(
212
+ exception = IllegalArgumentException(
163
213
  message=f"Failed to split string using provided pattern. {ex.message}"
164
214
  )
215
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
216
+ return exception
165
217
 
166
218
  if "100357" in msg and "wrong tuple size for returned value" in msg:
167
- return PythonException(
219
+ exception = PythonException(
168
220
  message=f"[UDTF_RETURN_SCHEMA_MISMATCH] The number of columns in the result does not match the specified schema. {ex.message}"
169
221
  )
222
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
223
+ return exception
170
224
 
171
225
  if "100357 (p0000): python interpreter error:" in msg:
172
226
  if "in eval" in msg:
173
- return PythonException(
227
+ exception = PythonException(
174
228
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'eval' method: error. {ex.message}"
175
229
  )
230
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
231
+ return exception
176
232
 
177
233
  if "in terminate" in msg:
178
- return PythonException(
234
+ exception = PythonException(
179
235
  message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'terminate' method: terminate error. {ex.message}"
180
236
  )
237
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
238
+ return exception
181
239
 
182
240
  if "object is not iterable" in msg and contains_udtf_select(query):
183
- return PythonException(
241
+ exception = PythonException(
184
242
  message=f"[UDTF_RETURN_NOT_ITERABLE] {ex.message}"
185
243
  )
244
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
245
+ return exception
186
246
 
187
- return PythonException(message=f"{ex.message}")
247
+ exception = PythonException(message=f"{ex.message}")
248
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
249
+ return exception
188
250
 
189
251
  return None
190
252
 
191
253
 
254
+ def _sanitize_custom_error_message(msg):
255
+ if "[snowpark_connect::unsupported_operation]" in msg:
256
+ return (
257
+ msg.replace("[snowpark_connect::unsupported_operation] ", ""),
258
+ ErrorCodes.UNSUPPORTED_OPERATION,
259
+ )
260
+ if "[snowpark_connect::internal_error]" in msg:
261
+ return (
262
+ msg.replace("[snowpark_connect::internal_error] ", ""),
263
+ ErrorCodes.INTERNAL_ERROR,
264
+ )
265
+ if "[snowpark_connect::invalid_operation]" in msg:
266
+ return (
267
+ msg.replace("[snowpark_connect::invalid_operation] ", ""),
268
+ ErrorCodes.INVALID_OPERATION,
269
+ )
270
+ if "[snowpark_connect::type_mismatch]" in msg:
271
+ return (
272
+ msg.replace("[snowpark_connect::type_mismatch] ", ""),
273
+ ErrorCodes.TYPE_MISMATCH,
274
+ )
275
+ if "[snowpark_connect::invalid_input]" in msg:
276
+ return (
277
+ msg.replace("[snowpark_connect::invalid_input] ", ""),
278
+ ErrorCodes.INVALID_INPUT,
279
+ )
280
+ if "[snowpark_connect::unsupported_type]" in msg:
281
+ return (
282
+ msg.replace("[snowpark_connect::unsupported_type] ", ""),
283
+ ErrorCodes.UNSUPPORTED_TYPE,
284
+ )
285
+ return msg, None
286
+
287
+
192
288
  def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
289
+ # Lazy import to avoid circular dependency
290
+ from snowflake.snowpark_connect.config import global_config
291
+
193
292
  include_stack_trace = (
194
293
  global_config.get("spark.sql.pyspark.jvmStacktrace.enabled")
195
294
  if hasattr(global_config, "spark.sql.pyspark.jvmStacktrace.enabled")
@@ -203,6 +302,16 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
203
302
  match ex:
204
303
  case SnowparkSQLException():
205
304
  if ex.sql_error_code in ANALYSIS_EXCEPTION_SQL_ERROR_CODE:
305
+ # Creation of schema that already exists
306
+ if ex.sql_error_code == 2002 and "already exists" in str(ex):
307
+ if CREATE_SCHEMA_PATTERN.search(ex.query):
308
+ spark_java_classes.append(
309
+ "org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException"
310
+ )
311
+ elif CREATE_TABLE_PATTERN.search(ex.query):
312
+ spark_java_classes.append(
313
+ "org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException"
314
+ )
206
315
  # Data type mismatch, invalid window function
207
316
  spark_java_classes.append("org.apache.spark.sql.AnalysisException")
208
317
  elif ex.sql_error_code == 100051:
@@ -211,6 +320,7 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
211
320
  error_class="DIVIDE_BY_ZERO",
212
321
  message_parameters={"config": '"spark.sql.ansi.enabled"'},
213
322
  )
323
+ attach_custom_error_code(ex, ErrorCodes.DIVISION_BY_ZERO)
214
324
  elif ex.sql_error_code in (100096, 100040):
215
325
  # Spark seems to want the Java base class instead of org.apache.spark.sql.SparkDateTimeException
216
326
  # which is what should really be thrown
@@ -221,6 +331,9 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
221
331
  ex = spark_ex
222
332
  spark_java_classes.append(SPARK_PYTHON_TO_JAVA_EXCEPTION[type(ex)])
223
333
  elif ex.sql_error_code == 2043:
334
+ spark_java_classes.append(
335
+ "org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException"
336
+ )
224
337
  spark_java_classes.append("org.apache.spark.sql.AnalysisException")
225
338
  message = f"does_not_exist: {str(ex)}"
226
339
  else:
@@ -252,13 +365,23 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
252
365
  )
253
366
  elif isinstance(ex, PySparkException):
254
367
  # pyspark exceptions thrown in sas layer
368
+
369
+ error_derived_java_class = []
370
+ if ex.error_class == TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS:
371
+ error_derived_java_class.append(
372
+ "org.apache.spark.sql.catalyst.analysis.NoSuchTableException"
373
+ )
374
+
255
375
  classes = type(ex).__mro__
256
376
  spark_java_classes = [
257
377
  SPARK_PYTHON_TO_JAVA_EXCEPTION[clazz]
258
378
  for clazz in classes
259
379
  if clazz in SPARK_PYTHON_TO_JAVA_EXCEPTION
260
380
  ]
261
- metadata = {"classes": json.dumps(spark_java_classes)}
381
+
382
+ metadata = {
383
+ "classes": json.dumps(error_derived_java_class + spark_java_classes)
384
+ }
262
385
  if include_stack_trace:
263
386
  metadata["stackTrace"] = "".join(
264
387
  traceback.TracebackException.from_exception(ex).format()
@@ -299,14 +422,40 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
299
422
  domain="snowflake.sas",
300
423
  )
301
424
 
302
- detail = any_pb2.Any()
303
- detail.Pack(error_info)
304
-
305
425
  if message is None:
306
426
  message = str(ex)
307
427
 
428
+ custom_error_code = None
429
+
430
+ # attach error code using visa exception message
431
+ message, custom_error_code_from_msg = _sanitize_custom_error_message(message)
432
+
433
+ # Check if exception already has a custom error code, if not add INTERNAL_ERROR as default
434
+ if not hasattr(ex, "custom_error_code") or ex.custom_error_code is None:
435
+ attach_custom_error_code(
436
+ ex,
437
+ ErrorCodes.INTERNAL_ERROR
438
+ if custom_error_code_from_msg is None
439
+ else custom_error_code_from_msg,
440
+ )
441
+
442
+ # Get the custom error code from the exception or thread-local storage
443
+ custom_error_code = getattr(ex, "custom_error_code", None) or getattr(
444
+ _thread_local, "pending_error_code", None
445
+ )
446
+
447
+ # Clear thread-local storage after retrieving the error code
448
+ if hasattr(_thread_local, "pending_error_code"):
449
+ delattr(_thread_local, "pending_error_code")
450
+
451
+ separator = "==========================================="
452
+ error_code_added_message = f"\n{separator}\nSNOWPARK CONNECT ERROR CODE: {custom_error_code}\n{separator}\n{message}"
453
+
454
+ detail = any_pb2.Any()
455
+ detail.Pack(error_info)
456
+
308
457
  rich_status = status_pb2.Status(
309
- code=code_pb2.INTERNAL, message=message, details=[detail]
458
+ code=code_pb2.INTERNAL, message=error_code_added_message, details=[detail]
310
459
  )
311
460
  return rich_status
312
461
 
@@ -2,27 +2,36 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
 
5
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
6
+
5
7
 
6
8
  class SnowparkConnectException(Exception):
7
9
  """Parent class to all SnowparkConnect related exceptions."""
8
10
 
9
- def __init__(self, *args, **kwargs) -> None:
11
+ def __init__(self, *args, custom_error_code=None, **kwargs) -> None:
10
12
  super().__init__(*args, **kwargs)
13
+ self.custom_error_code = custom_error_code
11
14
 
12
15
 
13
16
  class MissingDatabase(SnowparkConnectException):
14
- def __init__(self) -> None:
17
+ def __init__(self, custom_error_code=None) -> None:
15
18
  super().__init__(
16
19
  "No default database found in session",
20
+ custom_error_code=custom_error_code or ErrorCodes.MISSING_DATABASE,
17
21
  )
18
22
 
19
23
 
20
24
  class MissingSchema(SnowparkConnectException):
21
- def __init__(self) -> None:
25
+ def __init__(self, custom_error_code=None) -> None:
22
26
  super().__init__(
23
27
  "No default schema found in session",
28
+ custom_error_code=custom_error_code or ErrorCodes.MISSING_SCHEMA,
24
29
  )
25
30
 
26
31
 
27
32
  class MaxRetryExceeded(SnowparkConnectException):
28
- ...
33
+ def __init__(
34
+ self,
35
+ message="Maximum retry attempts exceeded",
36
+ ) -> None:
37
+ super().__init__(message)