snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (78) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +150 -25
  2. snowflake/snowpark_connect/config.py +51 -16
  3. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  4. snowflake/snowpark_connect/error/error_codes.py +50 -0
  5. snowflake/snowpark_connect/error/error_utils.py +142 -22
  6. snowflake/snowpark_connect/error/exceptions.py +13 -4
  7. snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
  8. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  9. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  10. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  11. snowflake/snowpark_connect/expression/literal.py +7 -1
  12. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  13. snowflake/snowpark_connect/expression/map_expression.py +48 -4
  14. snowflake/snowpark_connect/expression/map_extension.py +25 -5
  15. snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
  16. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  17. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
  18. snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
  19. snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
  20. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  21. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  22. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  23. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
  24. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  25. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  26. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  27. snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
  28. snowflake/snowpark_connect/relation/map_extension.py +28 -8
  29. snowflake/snowpark_connect/relation/map_join.py +21 -10
  30. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  31. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  32. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  33. snowflake/snowpark_connect/relation/map_sql.py +91 -24
  34. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  35. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  36. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  37. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  38. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  39. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  40. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  41. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  42. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  43. snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
  44. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  45. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  46. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  48. snowflake/snowpark_connect/relation/write/map_write.py +131 -34
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  50. snowflake/snowpark_connect/resources_initializer.py +5 -1
  51. snowflake/snowpark_connect/server.py +72 -19
  52. snowflake/snowpark_connect/type_mapping.py +54 -17
  53. snowflake/snowpark_connect/utils/context.py +42 -1
  54. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  55. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  56. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  57. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  58. snowflake/snowpark_connect/utils/profiling.py +25 -8
  59. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  60. snowflake/snowpark_connect/utils/session.py +5 -1
  61. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  62. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  63. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  64. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  65. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  66. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  67. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  68. snowflake/snowpark_connect/version.py +1 -1
  69. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
  70. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +78 -77
  71. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
  72. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
  73. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
  74. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
  75. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
  78. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
@@ -56,7 +56,11 @@ from snowflake.snowpark_connect.analyze_plan.map_tree_string import map_tree_str
56
56
  from snowflake.snowpark_connect.config import route_config_proto
57
57
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
58
58
  from snowflake.snowpark_connect.control_server import ControlServicer
59
- from snowflake.snowpark_connect.error.error_utils import build_grpc_error_response
59
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
60
+ from snowflake.snowpark_connect.error.error_utils import (
61
+ attach_custom_error_code,
62
+ build_grpc_error_response,
63
+ )
60
64
  from snowflake.snowpark_connect.execute_plan.map_execution_command import (
61
65
  map_execution_command,
62
66
  )
@@ -96,7 +100,7 @@ from snowflake.snowpark_connect.utils.interrupt import (
96
100
  interrupt_queries_with_tag,
97
101
  interrupt_query,
98
102
  )
99
- from snowflake.snowpark_connect.utils.profiling import profile_method
103
+ from snowflake.snowpark_connect.utils.profiling import PROFILING_ENABLED, profile_method
100
104
  from snowflake.snowpark_connect.utils.session import (
101
105
  configure_snowpark_session,
102
106
  get_or_create_snowpark_session,
@@ -157,6 +161,7 @@ def _handle_exception(context, e: Exception):
157
161
 
158
162
  if tcm.TCM_MODE:
159
163
  # TODO: SNOW-2009834 gracefully return error back in TCM
164
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
160
165
  raise e
161
166
 
162
167
  from grpc_status import rpc_status
@@ -374,9 +379,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
374
379
  ),
375
380
  )
376
381
  case _:
377
- raise SnowparkConnectNotImplementedError(
382
+ exception = SnowparkConnectNotImplementedError(
378
383
  f"ANALYZE PLAN NOT IMPLEMENTED:\n{request}"
379
384
  )
385
+ attach_custom_error_code(
386
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
387
+ )
388
+ raise exception
380
389
  except Exception as e:
381
390
  _handle_exception(context, e)
382
391
  finally:
@@ -527,9 +536,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
527
536
  ),
528
537
  )
529
538
  case _:
530
- raise ValueError(
539
+ exception = ValueError(
531
540
  f"Unexpected payload type in AddArtifacts: {request.WhichOneof('payload')}"
532
541
  )
542
+ attach_custom_error_code(
543
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
544
+ )
545
+ raise exception
533
546
 
534
547
  for name, data in cache_data.items():
535
548
  _try_handle_local_relation(name, bytes(data))
@@ -635,9 +648,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
635
648
  case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID:
636
649
  interrupted_ids = interrupt_query(request.operation_id)
637
650
  case _:
638
- raise SnowparkConnectNotImplementedError(
651
+ exception = SnowparkConnectNotImplementedError(
639
652
  f"INTERRUPT NOT IMPLEMENTED:\n{request}"
640
653
  )
654
+ attach_custom_error_code(
655
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
656
+ )
657
+ raise exception
641
658
 
642
659
  return proto_base.InterruptResponse(
643
660
  session_id=request.session_id,
@@ -655,9 +672,11 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
655
672
  continue. If there is a ResultComplete, the client should use ReleaseExecute with
656
673
  """
657
674
  logger.info("ReattachExecute")
658
- raise SnowparkConnectNotImplementedError(
675
+ exception = SnowparkConnectNotImplementedError(
659
676
  "Spark client has detached, please resubmit request. In a future version, the server will be support the reattach."
660
677
  )
678
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
679
+ raise exception
661
680
 
662
681
  def ReleaseExecute(self, request: proto_base.ReleaseExecuteRequest, context):
663
682
  """Release an reattachable execution, or parts thereof.
@@ -760,8 +779,11 @@ def _serve(
760
779
 
761
780
  ChannelBuilder.MAX_MESSAGE_LENGTH = grpc_max_msg_size
762
781
 
782
+ # cProfile doesn't work correctly with multiple threads
783
+ max_workers = 1 if PROFILING_ENABLED else 10
784
+
763
785
  server = grpc.server(
764
- futures.ThreadPoolExecutor(max_workers=10), options=server_options
786
+ futures.ThreadPoolExecutor(max_workers=max_workers), options=server_options
765
787
  )
766
788
  control_servicer = ControlServicer(session)
767
789
  proto_base_grpc.add_SparkConnectServiceServicer_to_server(
@@ -791,6 +813,7 @@ def _serve(
791
813
  )
792
814
  else:
793
815
  logger.error("Error starting up Snowpark Connect server", exc_info=True)
816
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
794
817
  raise e
795
818
  finally:
796
819
  # flush the telemetry queue if possible
@@ -808,7 +831,9 @@ def _set_remote_url(remote_url: str):
808
831
  elif parsed_url.scheme == "unix":
809
832
  _server_url = remote_url.split("/;")[0]
810
833
  else:
811
- raise RuntimeError(f"Invalid Snowpark Connect URL: {remote_url}")
834
+ exception = RuntimeError(f"Invalid Snowpark Connect URL: {remote_url}")
835
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SPARK_CONNECT_URL)
836
+ raise exception
812
837
 
813
838
 
814
839
  def _set_server_tcp_port(server_port: int):
@@ -822,7 +847,9 @@ def _check_port_is_free(port: int) -> None:
822
847
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
823
848
  s.settimeout(1)
824
849
  if s.connect_ex(("127.0.0.1", port)) == 0:
825
- raise RuntimeError(f"TCP port {port} is already in use")
850
+ exception = RuntimeError(f"TCP port {port} is already in use")
851
+ attach_custom_error_code(exception, ErrorCodes.TCP_PORT_ALREADY_IN_USE)
852
+ raise exception
826
853
 
827
854
 
828
855
  def _set_server_unix_domain_socket(path: str):
@@ -834,14 +861,18 @@ def _set_server_unix_domain_socket(path: str):
834
861
  def get_server_url() -> str:
835
862
  global _server_url
836
863
  if not _server_url:
837
- raise RuntimeError("Server URL not set")
864
+ exception = RuntimeError("Server URL not set")
865
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
866
+ raise exception
838
867
  return _server_url
839
868
 
840
869
 
841
870
  def get_client_url() -> str:
842
871
  global _client_url
843
872
  if not _client_url:
844
- raise RuntimeError("Client URL not set")
873
+ exception = RuntimeError("Client URL not set")
874
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
875
+ raise exception
845
876
  return _client_url
846
877
 
847
878
 
@@ -871,12 +902,14 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
871
902
  if url is None:
872
903
  url = get_client_url()
873
904
  if url[:6] != "unix:/" or len(url) < 7:
874
- raise PySparkValueError(
905
+ exception = PySparkValueError(
875
906
  error_class="INVALID_CONNECT_URL",
876
907
  message_parameters={
877
908
  "detail": "The URL must start with 'unix://'. Please update the URL to follow the correct format, e.g., 'unix://unix_domain_socket_path'.",
878
909
  },
879
910
  )
911
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SPARK_CONNECT_URL)
912
+ raise exception
880
913
 
881
914
  # Rewrite the URL to use http as the scheme so that we can leverage
882
915
  # Python's built-in parser to parse URL parameters
@@ -919,7 +952,7 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
919
952
  for p in parts:
920
953
  kv = p.split("=")
921
954
  if len(kv) != 2:
922
- raise PySparkValueError(
955
+ exception = PySparkValueError(
923
956
  error_class="INVALID_CONNECT_URL",
924
957
  message_parameters={
925
958
  "detail": f"Parameter '{p}' should be provided as a "
@@ -927,6 +960,10 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
927
960
  f"the parameter to follow the correct format, e.g., 'key=value'.",
928
961
  },
929
962
  )
963
+ attach_custom_error_code(
964
+ exception, ErrorCodes.INVALID_SPARK_CONNECT_URL
965
+ )
966
+ raise exception
930
967
  self.params[kv[0]] = urllib.parse.unquote(kv[1])
931
968
 
932
969
  netloc = self.url.netloc.split(":")
@@ -942,7 +979,7 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
942
979
  self.host = netloc[0]
943
980
  self.port = int(netloc[1])
944
981
  else:
945
- raise PySparkValueError(
982
+ exception = PySparkValueError(
946
983
  error_class="INVALID_CONNECT_URL",
947
984
  message_parameters={
948
985
  "detail": f"Target destination '{self.url.netloc}' should match the "
@@ -950,6 +987,8 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
950
987
  f"the correct format, e.g., 'hostname:port'.",
951
988
  },
952
989
  )
990
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SPARK_CONNECT_URL)
991
+ raise exception
953
992
 
954
993
  # We override this to enable compatibility with Spark 4.0
955
994
  host = None
@@ -988,9 +1027,11 @@ def start_jvm():
988
1027
  if tcm.TCM_MODE:
989
1028
  # No-op if JVM is already started in TCM mode
990
1029
  return
991
- raise RuntimeError(
1030
+ exception = RuntimeError(
992
1031
  "JVM must not be running when starting the Spark Connect server"
993
1032
  )
1033
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
1034
+ raise exception
994
1035
 
995
1036
  pyspark_jars = (
996
1037
  pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
@@ -1065,14 +1106,18 @@ def start_session(
1065
1106
  _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
1066
1107
 
1067
1108
  if os.environ.get("SPARK_ENV_LOADED"):
1068
- raise RuntimeError(
1109
+ exception = RuntimeError(
1069
1110
  "Snowpark Connect cannot be run inside of a Spark environment"
1070
1111
  )
1112
+ attach_custom_error_code(exception, ErrorCodes.INVALID_STARTUP_OPERATION)
1113
+ raise exception
1071
1114
  if connection_parameters is not None:
1072
1115
  if snowpark_session is not None:
1073
- raise ValueError(
1116
+ exception = ValueError(
1074
1117
  "Only specify one of snowpark_session and connection_parameters"
1075
1118
  )
1119
+ attach_custom_error_code(exception, ErrorCodes.INVALID_STARTUP_INPUT)
1120
+ raise exception
1076
1121
  snowpark_session = snowpark.Session.builder.configs(
1077
1122
  connection_parameters
1078
1123
  ).create()
@@ -1084,9 +1129,11 @@ def start_session(
1084
1129
  return
1085
1130
 
1086
1131
  if len(list(filter(None, [remote_url, tcp_port, unix_domain_socket]))) > 1:
1087
- raise RuntimeError(
1132
+ exception = RuntimeError(
1088
1133
  "Can only set at most one of remote_url, tcp_port, and unix_domain_socket"
1089
1134
  )
1135
+ attach_custom_error_code(exception, ErrorCodes.INVALID_STARTUP_INPUT)
1136
+ raise exception
1090
1137
 
1091
1138
  url_from_env = os.environ.get("SPARK_REMOTE", None)
1092
1139
  if remote_url:
@@ -1124,7 +1171,11 @@ def start_session(
1124
1171
  server_thread.start()
1125
1172
  _server_running.wait()
1126
1173
  if _server_error:
1127
- raise RuntimeError("Snowpark Connect session failed to start")
1174
+ exception = RuntimeError("Snowpark Connect session failed to start")
1175
+ attach_custom_error_code(
1176
+ exception, ErrorCodes.STARTUP_CONNECTION_FAILED
1177
+ )
1178
+ raise exception
1128
1179
  return server_thread
1129
1180
  else:
1130
1181
  # Launch in the foreground.
@@ -1132,6 +1183,7 @@ def start_session(
1132
1183
  except Exception as e:
1133
1184
  _reset_server_run_state()
1134
1185
  logger.error(e, exc_info=True)
1186
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
1135
1187
  raise e
1136
1188
 
1137
1189
 
@@ -1165,6 +1217,7 @@ def get_session(url: Optional[str] = None, conf: SparkConf = None) -> SparkSessi
1165
1217
  except Exception as e:
1166
1218
  _reset_server_run_state()
1167
1219
  logger.error(e, exc_info=True)
1220
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
1168
1221
  raise e
1169
1222
 
1170
1223
 
@@ -29,12 +29,17 @@ from snowflake.snowpark_connect.constants import (
29
29
  from snowflake.snowpark_connect.date_time_format_mapping import (
30
30
  convert_spark_format_to_snowflake,
31
31
  )
32
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
33
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
32
34
  from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
33
35
  from snowflake.snowpark_connect.expression.map_sql_expression import (
34
36
  _INTERVAL_DAYTIME_PATTERN_RE,
35
37
  _INTERVAL_YEARMONTH_PATTERN_RE,
36
38
  )
37
- from snowflake.snowpark_connect.utils.context import get_is_evaluating_sql
39
+ from snowflake.snowpark_connect.utils.context import (
40
+ get_is_evaluating_sql,
41
+ get_jpype_jclass_lock,
42
+ )
38
43
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
39
44
  from snowflake.snowpark_connect.utils.telemetry import (
40
45
  SnowparkConnectNotImplementedError,
@@ -61,12 +66,14 @@ SNOWPARK_TYPE_NAME_TO_PYSPARK_TYPE_NAME = {
61
66
 
62
67
  @cache
63
68
  def _get_struct_type_class():
64
- return jpype.JClass("org.apache.spark.sql.types.StructType")
69
+ with get_jpype_jclass_lock():
70
+ return jpype.JClass("org.apache.spark.sql.types.StructType")
65
71
 
66
72
 
67
73
  @cache
68
74
  def get_python_sql_utils_class():
69
- return jpype.JClass("org.apache.spark.sql.api.python.PythonSQLUtils")
75
+ with get_jpype_jclass_lock():
76
+ return jpype.JClass("org.apache.spark.sql.api.python.PythonSQLUtils")
70
77
 
71
78
 
72
79
  def _parse_ddl_with_spark_scala(ddl_string: str) -> pyspark.sql.types.DataType:
@@ -291,9 +298,11 @@ def snowpark_to_proto_type(
291
298
  )
292
299
  }
293
300
  case _:
294
- raise SnowparkConnectNotImplementedError(
301
+ exception = SnowparkConnectNotImplementedError(
295
302
  f"Unsupported snowpark data type: {data_type}"
296
303
  )
304
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
305
+ raise exception
297
306
 
298
307
 
299
308
  def cast_to_match_snowpark_type(
@@ -333,7 +342,9 @@ def cast_to_match_snowpark_type(
333
342
  with suppress(TypeError):
334
343
  date = datetime.strptime(content, format)
335
344
  return date
336
- raise ValueError(f"Date casting error for {str(content)}")
345
+ exception = ValueError(f"Date casting error for {str(content)}")
346
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
347
+ raise exception
337
348
  case snowpark.types.ShortType:
338
349
  return int(content)
339
350
  case snowpark.types.StringType:
@@ -363,9 +374,11 @@ def cast_to_match_snowpark_type(
363
374
  case snowpark.types.DayTimeIntervalType:
364
375
  return str(content)
365
376
  case _:
366
- raise SnowparkConnectNotImplementedError(
377
+ exception = SnowparkConnectNotImplementedError(
367
378
  f"Unsupported snowpark data type in casting: {data_type}"
368
379
  )
380
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
381
+ raise exception
369
382
 
370
383
 
371
384
  def snowpark_to_iceberg_type(data_type: snowpark.types.DataType) -> str:
@@ -398,9 +411,11 @@ def snowpark_to_iceberg_type(data_type: snowpark.types.DataType) -> str:
398
411
  case snowpark.types.TimestampType:
399
412
  return "timestamp"
400
413
  case _:
401
- raise SnowparkConnectNotImplementedError(
414
+ exception = SnowparkConnectNotImplementedError(
402
415
  f"Unsupported snowpark data type for iceber: {data_type}"
403
416
  )
417
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
418
+ raise exception
404
419
 
405
420
 
406
421
  def proto_to_snowpark_type(
@@ -487,9 +502,11 @@ def map_snowpark_types_to_pyarrow_types(
487
502
  )
488
503
  )
489
504
  else:
490
- raise AnalysisException(
505
+ exception = AnalysisException(
491
506
  f"Unsupported arrow type {pa_type} for snowpark ArrayType."
492
507
  )
508
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
509
+ raise exception
493
510
  case snowpark.types.BinaryType:
494
511
  return pa.binary()
495
512
  case snowpark.types.BooleanType:
@@ -530,9 +547,11 @@ def map_snowpark_types_to_pyarrow_types(
530
547
  ),
531
548
  )
532
549
  else:
533
- raise AnalysisException(
550
+ exception = AnalysisException(
534
551
  f"Unsupported arrow type {pa_type} for snowpark MapType."
535
552
  )
553
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
554
+ raise exception
536
555
  case snowpark.types.NullType:
537
556
  return pa.string()
538
557
  case snowpark.types.ShortType:
@@ -557,9 +576,11 @@ def map_snowpark_types_to_pyarrow_types(
557
576
  ]
558
577
  )
559
578
  else:
560
- raise AnalysisException(
579
+ exception = AnalysisException(
561
580
  f"Unsupported arrow type {pa_type} for snowpark StructType."
562
581
  )
582
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
583
+ raise exception
563
584
  case snowpark.types.TimestampType:
564
585
  unit = pa_type.unit
565
586
  tz = pa_type.tz
@@ -576,9 +597,11 @@ def map_snowpark_types_to_pyarrow_types(
576
597
  # Return string type so formatted intervals are preserved in display
577
598
  return pa.string()
578
599
  case _:
579
- raise SnowparkConnectNotImplementedError(
600
+ exception = SnowparkConnectNotImplementedError(
580
601
  f"Unsupported snowpark data type: {snowpark_type}"
581
602
  )
603
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
604
+ raise exception
582
605
 
583
606
 
584
607
  def map_pyarrow_to_snowpark_types(pa_type: pa.DataType) -> snowpark.types.DataType:
@@ -648,9 +671,11 @@ def map_pyarrow_to_snowpark_types(pa_type: pa.DataType) -> snowpark.types.DataTy
648
671
  elif pa.types.is_null(pa_type):
649
672
  return snowpark.types.NullType()
650
673
  else:
651
- raise SnowparkConnectNotImplementedError(
674
+ exception = SnowparkConnectNotImplementedError(
652
675
  f"Unsupported PyArrow data type: {pa_type}"
653
676
  )
677
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
678
+ raise exception
654
679
 
655
680
 
656
681
  def map_pyspark_types_to_snowpark_types(
@@ -736,9 +761,11 @@ def map_pyspark_types_to_snowpark_types(
736
761
  return snowpark.types.DayTimeIntervalType(
737
762
  type_to_map.startField, type_to_map.endField
738
763
  )
739
- raise SnowparkConnectNotImplementedError(
764
+ exception = SnowparkConnectNotImplementedError(
740
765
  f"Unsupported spark data type: {type_to_map}"
741
766
  )
767
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
768
+ raise exception
742
769
 
743
770
 
744
771
  def map_snowpark_to_pyspark_types(
@@ -811,7 +838,11 @@ def map_snowpark_to_pyspark_types(
811
838
  return pyspark.sql.types.DayTimeIntervalType(
812
839
  type_to_map.start_field, type_to_map.end_field
813
840
  )
814
- raise SnowparkConnectNotImplementedError(f"Unsupported data type: {type_to_map}")
841
+ exception = SnowparkConnectNotImplementedError(
842
+ f"Unsupported data type: {type_to_map}"
843
+ )
844
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
845
+ raise exception
815
846
 
816
847
 
817
848
  def map_simple_types(simple_type: str) -> snowpark.types.DataType:
@@ -866,9 +897,11 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
866
897
  precision = int(simple_type.split("(")[1].split(",")[0])
867
898
  scale = int(simple_type.split(",")[1].split(")")[0])
868
899
  return snowpark.types.DecimalType(precision, scale)
869
- raise SnowparkConnectNotImplementedError(
900
+ exception = SnowparkConnectNotImplementedError(
870
901
  f"Unsupported simple type: {simple_type}"
871
902
  )
903
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
904
+ raise exception
872
905
 
873
906
 
874
907
  def map_json_schema_to_snowpark(
@@ -1009,9 +1042,11 @@ def map_spark_timestamp_format_expression(
1009
1042
  lit_value, _ = get_literal_field_and_name(arguments.literal)
1010
1043
  return convert_spark_format_to_snowflake(lit_value, timestamp_input_type)
1011
1044
  case other:
1012
- raise SnowparkConnectNotImplementedError(
1045
+ exception = SnowparkConnectNotImplementedError(
1013
1046
  f"Unsupported expression type {other} in timestamp format argument"
1014
1047
  )
1048
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1049
+ raise exception
1015
1050
 
1016
1051
 
1017
1052
  def map_spark_number_format_expression(
@@ -1030,9 +1065,11 @@ def map_spark_number_format_expression(
1030
1065
  case "literal":
1031
1066
  lit_value, _ = get_literal_field_and_name(arguments.literal)
1032
1067
  case other:
1033
- raise SnowparkConnectNotImplementedError(
1068
+ exception = SnowparkConnectNotImplementedError(
1034
1069
  f"Unsupported expression type {other} in number format argument"
1035
1070
  )
1071
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
1072
+ raise exception
1036
1073
 
1037
1074
  return _map_spark_to_snowflake_number_format(lit_value)
1038
1075
 
@@ -2,10 +2,12 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
 
5
+ import os
5
6
  import re
7
+ import threading
6
8
  from contextlib import contextmanager
7
9
  from contextvars import ContextVar
8
- from typing import Mapping, Optional
10
+ from typing import Iterator, Mapping, Optional
9
11
 
10
12
  import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
11
13
 
@@ -25,6 +27,7 @@ _is_evaluating_sql = ContextVar[bool]("_is_evaluating_sql", default=False)
25
27
  _is_evaluating_join_condition = ContextVar(
26
28
  "_is_evaluating_join_condition", default=("default", False, [], [])
27
29
  )
30
+ _is_processing_order_by = ContextVar[bool]("_is_processing_order_by", default=False)
28
31
 
29
32
  _sql_aggregate_function_count = ContextVar[int](
30
33
  "_contains_aggregate_function", default=0
@@ -56,6 +59,23 @@ _is_in_pivot = ContextVar[bool]("_is_in_pivot", default=False)
56
59
  _is_in_udtf_context = ContextVar[bool]("_is_in_udtf_context", default=False)
57
60
  _accessing_temp_object = ContextVar[bool]("_accessing_temp_object", default=False)
58
61
 
62
+ # Thread-safe lock for JPype JClass creation to prevent access violations
63
+ _jpype_jclass_lock = threading.Lock()
64
+
65
+
66
+ @contextmanager
67
+ def get_jpype_jclass_lock() -> Iterator[None]:
68
+ """
69
+ Context manager that acquires the JPype JClass lock on Windows platforms.
70
+ On non-Windows (os.name != 'nt'), it yields without acquiring the lock.
71
+ """
72
+ if os.name == "nt":
73
+ with _jpype_jclass_lock:
74
+ yield
75
+ else:
76
+ yield
77
+
78
+
59
79
  # Lateral Column Alias helpers
60
80
  # We keep a thread-local mapping from alias name -> TypedColumn that is
61
81
  # populated incrementally while the projection list is being processed.
@@ -207,6 +227,27 @@ def push_evaluating_sql_scope():
207
227
  _is_evaluating_sql.set(prev)
208
228
 
209
229
 
230
+ def get_is_processing_order_by() -> bool:
231
+ """
232
+ Gets the value of _is_processing_order_by for the current context, defaults to False.
233
+ """
234
+ return _is_processing_order_by.get()
235
+
236
+
237
+ @contextmanager
238
+ def push_processing_order_by_scope():
239
+ """
240
+ Context manager that sets a flag indicating if ORDER BY expressions are being evaluated.
241
+ This enables optimizations like reusing already-computed UDF columns.
242
+ """
243
+ prev = _is_processing_order_by.get()
244
+ try:
245
+ _is_processing_order_by.set(True)
246
+ yield
247
+ finally:
248
+ _is_processing_order_by.set(prev)
249
+
250
+
210
251
  def get_is_evaluating_join_condition() -> tuple[str, bool, list, list]:
211
252
  """
212
253
  Gets the value of _is_evaluating_join_condition for the current context, defaults to False.
@@ -12,6 +12,8 @@ from typing import Any
12
12
  from snowflake import snowpark
13
13
  from snowflake.connector.cursor import ResultMetadataV2
14
14
  from snowflake.snowpark._internal.server_connection import ServerConnection
15
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
16
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
15
17
  from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
16
18
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
17
19
  from snowflake.snowpark_connect.utils.telemetry import telemetry
@@ -148,6 +150,7 @@ def instrument_session_for_describe_cache(session: snowpark.Session):
148
150
  telemetry.report_query(result, **kwargs)
149
151
  except Exception as e:
150
152
  telemetry.report_query(e, **kwargs)
153
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
151
154
  raise e
152
155
  return result
153
156
 
@@ -8,6 +8,8 @@ Environment variable utilities for Snowpark Connect.
8
8
 
9
9
  import os
10
10
 
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
12
14
 
13
15
 
@@ -37,9 +39,11 @@ def get_int_from_env(env_var: str, default: int) -> int:
37
39
  """
38
40
  # Validate that default is actually an integer
39
41
  if not isinstance(default, int):
40
- raise TypeError(
42
+ exception = TypeError(
41
43
  f"Default value must be an integer, got {type(default).__name__}: {default}"
42
44
  )
45
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
46
+ raise exception
43
47
 
44
48
  value = os.getenv(env_var)
45
49
  if value is None:
@@ -13,6 +13,8 @@ from snowflake.snowpark_connect.config import (
13
13
  auto_uppercase_column_identifiers,
14
14
  auto_uppercase_non_column_identifiers,
15
15
  )
16
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
17
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
18
 
17
19
  QUOTED_SPARK_IDENTIFIER = re.compile(r"^`[^`]*(?:``[^`]*)*`$")
18
20
  UNQUOTED_SPARK_IDENTIFIER = re.compile(r"^\w+$")
@@ -25,7 +27,9 @@ def unquote_spark_identifier_if_quoted(spark_name: str) -> str:
25
27
  if QUOTED_SPARK_IDENTIFIER.match(spark_name):
26
28
  return spark_name[1:-1].replace("``", "`")
27
29
 
28
- raise AnalysisException(f"Invalid name: {spark_name}")
30
+ exception = AnalysisException(f"Invalid name: {spark_name}")
31
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
32
+ raise exception
29
33
 
30
34
 
31
35
  def spark_to_sf_single_id_with_unquoting(
@@ -191,7 +195,9 @@ class FQN:
191
195
 
192
196
  def __eq__(self, other: Any) -> bool:
193
197
  if not isinstance(other, FQN):
194
- raise AnalysisException(f"{other} is not a valid FQN")
198
+ exception = AnalysisException(f"{other} is not a valid FQN")
199
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
200
+ raise exception
195
201
  return self.identifier == other.identifier
196
202
 
197
203
  @classmethod
@@ -209,7 +215,9 @@ class FQN:
209
215
  result = re.fullmatch(qualifier_pattern, identifier)
210
216
 
211
217
  if result is None:
212
- raise AnalysisException(f"{identifier} is not a valid identifier")
218
+ exception = AnalysisException(f"{identifier} is not a valid identifier")
219
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
220
+ raise exception
213
221
 
214
222
  unqualified_name = result.group("name")
215
223
  if result.group("second_qualifier") is not None: