snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,8 @@ Environment variable utilities for Snowpark Connect.
8
8
 
9
9
  import os
10
10
 
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
12
14
 
13
15
 
@@ -37,9 +39,11 @@ def get_int_from_env(env_var: str, default: int) -> int:
37
39
  """
38
40
  # Validate that default is actually an integer
39
41
  if not isinstance(default, int):
40
- raise TypeError(
42
+ exception = TypeError(
41
43
  f"Default value must be an integer, got {type(default).__name__}: {default}"
42
44
  )
45
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
46
+ raise exception
43
47
 
44
48
  value = os.getenv(env_var)
45
49
  if value is None:
@@ -13,6 +13,8 @@ from snowflake.snowpark_connect.config import (
13
13
  auto_uppercase_column_identifiers,
14
14
  auto_uppercase_non_column_identifiers,
15
15
  )
16
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
17
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
18
 
17
19
  QUOTED_SPARK_IDENTIFIER = re.compile(r"^`[^`]*(?:``[^`]*)*`$")
18
20
  UNQUOTED_SPARK_IDENTIFIER = re.compile(r"^\w+$")
@@ -25,7 +27,9 @@ def unquote_spark_identifier_if_quoted(spark_name: str) -> str:
25
27
  if QUOTED_SPARK_IDENTIFIER.match(spark_name):
26
28
  return spark_name[1:-1].replace("``", "`")
27
29
 
28
- raise AnalysisException(f"Invalid name: {spark_name}")
30
+ exception = AnalysisException(f"Invalid name: {spark_name}")
31
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
32
+ raise exception
29
33
 
30
34
 
31
35
  def spark_to_sf_single_id_with_unquoting(
@@ -191,7 +195,9 @@ class FQN:
191
195
 
192
196
  def __eq__(self, other: Any) -> bool:
193
197
  if not isinstance(other, FQN):
194
- raise AnalysisException(f"{other} is not a valid FQN")
198
+ exception = AnalysisException(f"{other} is not a valid FQN")
199
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
200
+ raise exception
195
201
  return self.identifier == other.identifier
196
202
 
197
203
  @classmethod
@@ -209,7 +215,9 @@ class FQN:
209
215
  result = re.fullmatch(qualifier_pattern, identifier)
210
216
 
211
217
  if result is None:
212
- raise AnalysisException(f"{identifier} is not a valid identifier")
218
+ exception = AnalysisException(f"{identifier} is not a valid identifier")
219
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
220
+ raise exception
213
221
 
214
222
  unqualified_name = result.group("name")
215
223
  if result.group("second_qualifier") is not None:
@@ -13,6 +13,10 @@ import snowflake.snowpark.functions as snowpark_fn
13
13
  from snowflake import snowpark
14
14
  from snowflake.snowpark.types import IntegerType, PandasDataFrameType, StructType
15
15
 
16
+ # Removed error imports to avoid UDF serialization issues
17
+ # from snowflake.snowpark_connect.error.error_codes import ErrorCodes
18
+ # from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
+
16
20
 
17
21
  def get_map_in_arrow_udtf(
18
22
  user_function: Callable,
@@ -60,14 +64,14 @@ def get_map_in_arrow_udtf(
60
64
  result_iterator, "__iter__"
61
65
  ):
62
66
  raise RuntimeError(
63
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should be "
67
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
64
68
  f"iterator of pyarrow.RecordBatch, but is {type(result_iterator).__name__}"
65
69
  )
66
70
 
67
71
  for batch in result_iterator:
68
72
  if not isinstance(batch, pa.RecordBatch):
69
73
  raise RuntimeError(
70
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should "
74
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should "
71
75
  f"be iterator of pyarrow.RecordBatch, but is iterator of {type(batch).__name__}"
72
76
  )
73
77
  if batch.num_rows > 0:
@@ -121,7 +125,7 @@ def create_pandas_udtf(
121
125
  result_iterator, "__iter__"
122
126
  ):
123
127
  raise RuntimeError(
124
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should be "
128
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
125
129
  f"iterator of pandas.DataFrame, but is {type(result_iterator).__name__}"
126
130
  )
127
131
 
@@ -140,7 +144,7 @@ def create_pandas_udtf(
140
144
  if column not in self.output_column_original_names
141
145
  ]
142
146
  raise RuntimeError(
143
- f"[RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
147
+ f"[snowpark_connect::invalid_operation] [RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
144
148
  "."
145
149
  )
146
150
  reordered_df = output_df[self.output_column_original_names]
@@ -10,6 +10,7 @@
10
10
 
11
11
  import cProfile
12
12
  import functools
13
+ import inspect
13
14
  import os
14
15
  from datetime import datetime
15
16
  from typing import Any, Callable
@@ -35,13 +36,29 @@ def profile_method(method: Callable) -> Callable:
35
36
  profile_filename = f"{PROFILE_OUTPUT_DIR}/{method_name}_{timestamp}.prof"
36
37
 
37
38
  profiler = cProfile.Profile()
38
- profiler.enable()
39
-
40
- try:
41
- result = method(*args, **kwargs)
42
- return result
43
- finally:
44
- profiler.disable()
45
- profiler.dump_stats(profile_filename)
39
+
40
+ if inspect.isgeneratorfunction(method):
41
+
42
+ def profiled_generator():
43
+ profiler.enable()
44
+ try:
45
+ generator = method(*args, **kwargs)
46
+ for item in generator:
47
+ profiler.disable()
48
+ yield item
49
+ profiler.enable()
50
+ finally:
51
+ profiler.disable()
52
+ profiler.dump_stats(profile_filename)
53
+
54
+ return profiled_generator()
55
+ else:
56
+ profiler.enable()
57
+ try:
58
+ result = method(*args, **kwargs)
59
+ return result
60
+ finally:
61
+ profiler.disable()
62
+ profiler.dump_stats(profile_filename)
46
63
 
47
64
  return wrapper
@@ -22,6 +22,8 @@ from typing import List, Union
22
22
 
23
23
  import snowflake.snowpark.types as snowpark_type
24
24
  import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
25
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
26
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
25
27
  from snowflake.snowpark_connect.resources_initializer import RESOURCE_PATH
26
28
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
27
29
  from snowflake.snowpark_connect.utils.udf_utils import (
@@ -467,7 +469,9 @@ def map_type_to_scala_type(
467
469
  case snowpark_type.VariantType:
468
470
  return "Variant"
469
471
  case _:
470
- raise ValueError(f"Unsupported Snowpark type: {t}")
472
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
473
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
474
+ raise exception
471
475
 
472
476
 
473
477
  def map_type_to_snowflake_type(
@@ -533,7 +537,9 @@ def map_type_to_snowflake_type(
533
537
  case snowpark_type.VariantType:
534
538
  return "VARIANT"
535
539
  case _:
536
- raise ValueError(f"Unsupported Snowpark type: {t}")
540
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
541
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
542
+ raise exception
537
543
 
538
544
 
539
545
  def cast_scala_map_args_from_given_type(
@@ -573,7 +579,9 @@ def cast_scala_map_args_from_given_type(
573
579
  case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
574
580
  return "java.sql.Timestamp.valueOf({arg_name})"
575
581
  case _:
576
- raise ValueError(f"Unsupported Snowpark type: {t}")
582
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
583
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
584
+ raise exception
577
585
 
578
586
  if (is_snowpark_type and isinstance(input_type, snowpark_type.MapType)) or (
579
587
  not is_snowpark_type and input_type.WhichOneof("kind") == "map"
@@ -11,6 +11,8 @@ from snowflake import snowpark
11
11
  from snowflake.snowpark.exceptions import SnowparkClientException
12
12
  from snowflake.snowpark.session import _get_active_session
13
13
  from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.utils.describe_query_cache import (
15
17
  instrument_session_for_describe_cache,
16
18
  )
@@ -21,6 +23,13 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
21
23
  from snowflake.snowpark_connect.utils.telemetry import telemetry
22
24
  from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
23
25
 
26
+ SKIP_SESSION_CONFIGURATION = False
27
+
28
+
29
+ def skip_session_configuration(skip: bool):
30
+ global SKIP_SESSION_CONFIGURATION
31
+ SKIP_SESSION_CONFIGURATION = skip
32
+
24
33
 
25
34
  # Suppress experimental warnings from snowflake.snowpark logger
26
35
  def _filter_experimental_warnings(record):
@@ -55,6 +64,8 @@ def configure_snowpark_session(session: snowpark.Session):
55
64
  global_config,
56
65
  )
57
66
 
67
+ global SKIP_SESSION_CONFIGURATION
68
+
58
69
  logger.info(f"Configuring session {session}")
59
70
 
60
71
  telemetry.initialize(session)
@@ -122,9 +133,16 @@ def configure_snowpark_session(session: snowpark.Session):
122
133
  "QUERY_TAG": f"'{query_tag}'",
123
134
  }
124
135
 
125
- session.sql(
126
- f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
127
- ).collect()
136
+ # SNOW-2245971: Stored procedures inside Native Apps run as Execute As Owner and hence cannot set session params.
137
+ if not SKIP_SESSION_CONFIGURATION:
138
+ session.sql(
139
+ f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
140
+ ).collect()
141
+ else:
142
+ session_param_names = ", ".join(session_params.keys())
143
+ logger.info(
144
+ f"Skipping Snowpark Connect session configuration as requested. Please make sure following session parameters are set correctly: {session_param_names}"
145
+ )
128
146
 
129
147
  # Instrument the snowpark session to use a cache for describe queries.
130
148
  instrument_session_for_describe_cache(session)
@@ -191,7 +209,9 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
191
209
  """Sets Snowpark session query_tag value to the tag from the Spark request."""
192
210
 
193
211
  if any("," in tag for tag in spark_tags):
194
- raise ValueError("Tags cannot contain ','.")
212
+ exception = ValueError("Tags cannot contain ','.")
213
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
214
+ raise exception
195
215
 
196
216
  # TODO: Tags might not be set correctly in parallel workloads or multi-threaded code.
197
217
  snowpark_session = get_or_create_snowpark_session()
@@ -27,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
27
27
  from snowflake.snowpark import Session
28
28
  from snowflake.snowpark._internal.utils import get_os_name, get_python_version
29
29
  from snowflake.snowpark.version import VERSION as snowpark_version
30
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
30
31
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
31
32
  from snowflake.snowpark_connect.version import VERSION as sas_version
32
33
 
@@ -346,6 +347,11 @@ class Telemetry:
346
347
  summary["error_message"] = str(e)
347
348
  summary["error_type"] = type(e).__name__
348
349
 
350
+ if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
351
+ summary["error_code"] = ErrorCodes.INTERNAL_ERROR
352
+ else:
353
+ summary["error_code"] = e.custom_error_code
354
+
349
355
  error_location = _error_location(e)
350
356
  if error_location:
351
357
  summary["error_location"] = error_location
@@ -7,6 +7,8 @@ from typing import Optional, Tuple
7
7
  from pyspark.errors import AnalysisException
8
8
 
9
9
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
11
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
10
12
  from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
11
13
  from snowflake.snowpark_connect.utils.context import get_session_id
12
14
 
@@ -22,9 +24,11 @@ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None
22
24
  _temp_views.remove(key)
23
25
  break
24
26
  else:
25
- raise AnalysisException(
27
+ exception = AnalysisException(
26
28
  f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
27
29
  )
30
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
31
+ raise exception
28
32
 
29
33
  _temp_views[(name, current_session_id)] = df
30
34
 
@@ -118,7 +118,7 @@ def cached_udaf(
118
118
 
119
119
  if class_type is None:
120
120
  raise ValueError(
121
- "Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
121
+ "[snowpark_connect::internal_error] Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
122
122
  )
123
123
  else:
124
124
  # return udaf
@@ -379,7 +379,9 @@ def register_cached_java_udf(
379
379
 
380
380
  with zipfile.ZipFile(zip_path, "r") as zip_ref:
381
381
  if jar_path_in_zip not in zip_ref.namelist():
382
- raise FileNotFoundError(f"{jar_path_in_zip} not found")
382
+ raise FileNotFoundError(
383
+ f"[snowpark_connect::invalid_input] {jar_path_in_zip} not found"
384
+ )
383
385
  zip_ref.extract(jar_path_in_zip, temp_dir)
384
386
 
385
387
  jar_path = f"{temp_dir}/{jar_path_in_zip}"
@@ -388,7 +390,7 @@ def register_cached_java_udf(
388
390
 
389
391
  if upload_result[0].status != "UPLOADED":
390
392
  raise RuntimeError(
391
- f"Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
393
+ f"[snowpark_connect::internal_error] Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
392
394
  )
393
395
 
394
396
  udf_is_cached = function_name in cache
@@ -18,6 +18,8 @@ from snowflake.snowpark import Column, Session
18
18
  from snowflake.snowpark.types import DataType, _parse_datatype_json_value
19
19
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
20
20
  from snowflake.snowpark_connect.config import global_config
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
21
23
  from snowflake.snowpark_connect.expression.map_expression import (
22
24
  map_single_column_expression,
23
25
  )
@@ -233,19 +235,25 @@ def _check_supported_udf(
233
235
 
234
236
  session = get_or_create_snowpark_session()
235
237
  if udf_proto.java_udf.class_name not in session._cached_java_udfs:
236
- raise AnalysisException(
238
+ exception = AnalysisException(
237
239
  f"Can not load class {udf_proto.java_udf.class_name}"
238
240
  )
241
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
242
+ raise exception
239
243
  else:
240
- raise ValueError(
244
+ exception = ValueError(
241
245
  "Function type java_udf not supported for common inline user-defined function"
242
246
  )
247
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
248
+ raise exception
243
249
  case "scalar_scala_udf":
244
250
  pass
245
251
  case _ as function_type:
246
- raise ValueError(
252
+ exception = ValueError(
247
253
  f"Function type {function_type} not supported for common inline user-defined function"
248
254
  )
255
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
256
+ raise exception
249
257
 
250
258
 
251
259
  def _aggregate_function_check(
@@ -253,9 +261,11 @@ def _aggregate_function_check(
253
261
  ):
254
262
  name, is_aggregate_function = get_is_aggregate_function()
255
263
  if not udf_proto.deterministic and name != "default" and is_aggregate_function:
256
- raise AnalysisException(
264
+ exception = AnalysisException(
257
265
  f"[AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION] Non-deterministic expression {name}({udf_proto.function_name}) should not appear in the arguments of an aggregate function."
258
266
  )
267
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
268
+ raise exception
259
269
 
260
270
 
261
271
  def _join_checks(snowpark_udf_arg_names: list[str]):
@@ -282,23 +292,27 @@ def _join_checks(snowpark_udf_arg_names: list[str]):
282
292
  and is_left_evaluable
283
293
  and is_right_evaluable
284
294
  ):
285
- raise AnalysisException(
295
+ exception = AnalysisException(
286
296
  f"Detected implicit cartesian product for {is_evaluating_join_condition[0]} join between logical plans. \n"
287
297
  f"Join condition is missing or trivial. \n"
288
298
  f"Either: use the CROSS JOIN syntax to allow cartesian products between those relations, or; "
289
299
  f"enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=True."
290
300
  )
301
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
302
+ raise exception
291
303
  if (
292
304
  is_evaluating_join_condition[0] != "INNER"
293
305
  and is_evaluating_join_condition[1]
294
306
  and is_left_evaluable
295
307
  and is_right_evaluable
296
308
  ):
297
- raise AnalysisException(
309
+ exception = AnalysisException(
298
310
  f"[UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE] The feature is not supported: "
299
311
  f"Python UDF in the ON clause of a {is_evaluating_join_condition[0]} JOIN. "
300
312
  f"In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause."
301
313
  )
314
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
315
+ raise exception
302
316
 
303
317
 
304
318
  def infer_snowpark_arguments(
@@ -103,7 +103,7 @@ class ProcessCommonInlineUserDefinedFunction:
103
103
  )
104
104
  case _:
105
105
  raise ValueError(
106
- f"Function type {self._function_type} not supported for common inline user-defined function"
106
+ f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
107
107
  )
108
108
 
109
109
  @property
@@ -112,7 +112,7 @@ class ProcessCommonInlineUserDefinedFunction:
112
112
  return self._snowpark_udf_args
113
113
  else:
114
114
  raise ValueError(
115
- "Column mapping is not provided, cannot get snowpark udf args"
115
+ "[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf args"
116
116
  )
117
117
 
118
118
  @property
@@ -121,7 +121,7 @@ class ProcessCommonInlineUserDefinedFunction:
121
121
  return self._snowpark_udf_arg_names
122
122
  else:
123
123
  raise ValueError(
124
- "Column mapping is not provided, cannot get snowpark udf arg names"
124
+ "[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf arg names"
125
125
  )
126
126
 
127
127
  def _create_python_udf(self):
@@ -291,5 +291,5 @@ class ProcessCommonInlineUserDefinedFunction:
291
291
  return create_scala_udf(self)
292
292
  case _:
293
293
  raise ValueError(
294
- f"Function type {self._function_type} not supported for common inline user-defined function"
294
+ f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
295
295
  )
@@ -16,6 +16,8 @@ import snowflake.snowpark_connect.tcm as tcm
16
16
  from snowflake import snowpark
17
17
  from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
18
18
  from snowflake.snowpark.types import DataType, StructType, _parse_datatype_json_value
19
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
20
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
21
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
20
22
  from snowflake.snowpark_connect.utils import pandas_udtf_utils, udtf_utils
21
23
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
@@ -37,7 +39,9 @@ def udtf_check(
37
39
  udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
38
40
  ) -> None:
39
41
  if udtf_proto.WhichOneof("function") != "python_udtf":
40
- raise ValueError(f"Not python udtf {udtf_proto.function}")
42
+ exception = ValueError(f"Not python udtf {udtf_proto.function}")
43
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
44
+ raise exception
41
45
 
42
46
 
43
47
  def require_creating_udtf_in_sproc(