snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (78) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +150 -25
  2. snowflake/snowpark_connect/config.py +51 -16
  3. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  4. snowflake/snowpark_connect/error/error_codes.py +50 -0
  5. snowflake/snowpark_connect/error/error_utils.py +142 -22
  6. snowflake/snowpark_connect/error/exceptions.py +13 -4
  7. snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
  8. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  9. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  10. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  11. snowflake/snowpark_connect/expression/literal.py +7 -1
  12. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  13. snowflake/snowpark_connect/expression/map_expression.py +48 -4
  14. snowflake/snowpark_connect/expression/map_extension.py +25 -5
  15. snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
  16. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  17. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
  18. snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
  19. snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
  20. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  21. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  22. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  23. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
  24. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  25. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  26. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  27. snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
  28. snowflake/snowpark_connect/relation/map_extension.py +28 -8
  29. snowflake/snowpark_connect/relation/map_join.py +21 -10
  30. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  31. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  32. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  33. snowflake/snowpark_connect/relation/map_sql.py +91 -24
  34. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  35. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  36. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  37. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  38. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  39. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  40. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  41. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  42. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  43. snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
  44. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  45. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  46. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  48. snowflake/snowpark_connect/relation/write/map_write.py +131 -34
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  50. snowflake/snowpark_connect/resources_initializer.py +5 -1
  51. snowflake/snowpark_connect/server.py +72 -19
  52. snowflake/snowpark_connect/type_mapping.py +54 -17
  53. snowflake/snowpark_connect/utils/context.py +42 -1
  54. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  55. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  56. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  57. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  58. snowflake/snowpark_connect/utils/profiling.py +25 -8
  59. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  60. snowflake/snowpark_connect/utils/session.py +5 -1
  61. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  62. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  63. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  64. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  65. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  66. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  67. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  68. snowflake/snowpark_connect/version.py +1 -1
  69. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
  70. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +78 -77
  71. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
  72. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
  73. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
  74. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
  75. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
  78. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,10 @@ import snowflake.snowpark.functions as snowpark_fn
13
13
  from snowflake import snowpark
14
14
  from snowflake.snowpark.types import IntegerType, PandasDataFrameType, StructType
15
15
 
16
+ # Removed error imports to avoid UDF serialization issues
17
+ # from snowflake.snowpark_connect.error.error_codes import ErrorCodes
18
+ # from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
+
16
20
 
17
21
  def get_map_in_arrow_udtf(
18
22
  user_function: Callable,
@@ -60,14 +64,14 @@ def get_map_in_arrow_udtf(
60
64
  result_iterator, "__iter__"
61
65
  ):
62
66
  raise RuntimeError(
63
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should be "
67
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
64
68
  f"iterator of pyarrow.RecordBatch, but is {type(result_iterator).__name__}"
65
69
  )
66
70
 
67
71
  for batch in result_iterator:
68
72
  if not isinstance(batch, pa.RecordBatch):
69
73
  raise RuntimeError(
70
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should "
74
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should "
71
75
  f"be iterator of pyarrow.RecordBatch, but is iterator of {type(batch).__name__}"
72
76
  )
73
77
  if batch.num_rows > 0:
@@ -121,7 +125,7 @@ def create_pandas_udtf(
121
125
  result_iterator, "__iter__"
122
126
  ):
123
127
  raise RuntimeError(
124
- f"snowpark_connect::UDF_RETURN_TYPE Return type of the user-defined function should be "
128
+ f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
125
129
  f"iterator of pandas.DataFrame, but is {type(result_iterator).__name__}"
126
130
  )
127
131
 
@@ -140,7 +144,7 @@ def create_pandas_udtf(
140
144
  if column not in self.output_column_original_names
141
145
  ]
142
146
  raise RuntimeError(
143
- f"[RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
147
+ f"[snowpark_connect::invalid_operation] [RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
144
148
  "."
145
149
  )
146
150
  reordered_df = output_df[self.output_column_original_names]
@@ -10,6 +10,7 @@
10
10
 
11
11
  import cProfile
12
12
  import functools
13
+ import inspect
13
14
  import os
14
15
  from datetime import datetime
15
16
  from typing import Any, Callable
@@ -35,13 +36,29 @@ def profile_method(method: Callable) -> Callable:
35
36
  profile_filename = f"{PROFILE_OUTPUT_DIR}/{method_name}_{timestamp}.prof"
36
37
 
37
38
  profiler = cProfile.Profile()
38
- profiler.enable()
39
-
40
- try:
41
- result = method(*args, **kwargs)
42
- return result
43
- finally:
44
- profiler.disable()
45
- profiler.dump_stats(profile_filename)
39
+
40
+ if inspect.isgeneratorfunction(method):
41
+
42
+ def profiled_generator():
43
+ profiler.enable()
44
+ try:
45
+ generator = method(*args, **kwargs)
46
+ for item in generator:
47
+ profiler.disable()
48
+ yield item
49
+ profiler.enable()
50
+ finally:
51
+ profiler.disable()
52
+ profiler.dump_stats(profile_filename)
53
+
54
+ return profiled_generator()
55
+ else:
56
+ profiler.enable()
57
+ try:
58
+ result = method(*args, **kwargs)
59
+ return result
60
+ finally:
61
+ profiler.disable()
62
+ profiler.dump_stats(profile_filename)
46
63
 
47
64
  return wrapper
@@ -22,6 +22,8 @@ from typing import List, Union
22
22
 
23
23
  import snowflake.snowpark.types as snowpark_type
24
24
  import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
25
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
26
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
25
27
  from snowflake.snowpark_connect.resources_initializer import RESOURCE_PATH
26
28
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
27
29
  from snowflake.snowpark_connect.utils.udf_utils import (
@@ -467,7 +469,9 @@ def map_type_to_scala_type(
467
469
  case snowpark_type.VariantType:
468
470
  return "Variant"
469
471
  case _:
470
- raise ValueError(f"Unsupported Snowpark type: {t}")
472
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
473
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
474
+ raise exception
471
475
 
472
476
 
473
477
  def map_type_to_snowflake_type(
@@ -533,7 +537,9 @@ def map_type_to_snowflake_type(
533
537
  case snowpark_type.VariantType:
534
538
  return "VARIANT"
535
539
  case _:
536
- raise ValueError(f"Unsupported Snowpark type: {t}")
540
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
541
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
542
+ raise exception
537
543
 
538
544
 
539
545
  def cast_scala_map_args_from_given_type(
@@ -573,7 +579,9 @@ def cast_scala_map_args_from_given_type(
573
579
  case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
574
580
  return "java.sql.Timestamp.valueOf({arg_name})"
575
581
  case _:
576
- raise ValueError(f"Unsupported Snowpark type: {t}")
582
+ exception = ValueError(f"Unsupported Snowpark type: {t}")
583
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
584
+ raise exception
577
585
 
578
586
  if (is_snowpark_type and isinstance(input_type, snowpark_type.MapType)) or (
579
587
  not is_snowpark_type and input_type.WhichOneof("kind") == "map"
@@ -11,6 +11,8 @@ from snowflake import snowpark
11
11
  from snowflake.snowpark.exceptions import SnowparkClientException
12
12
  from snowflake.snowpark.session import _get_active_session
13
13
  from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
14
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
15
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
16
  from snowflake.snowpark_connect.utils.describe_query_cache import (
15
17
  instrument_session_for_describe_cache,
16
18
  )
@@ -191,7 +193,9 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
191
193
  """Sets Snowpark session query_tag value to the tag from the Spark request."""
192
194
 
193
195
  if any("," in tag for tag in spark_tags):
194
- raise ValueError("Tags cannot contain ','.")
196
+ exception = ValueError("Tags cannot contain ','.")
197
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
198
+ raise exception
195
199
 
196
200
  # TODO: Tags might not be set correctly in parallel workloads or multi-threaded code.
197
201
  snowpark_session = get_or_create_snowpark_session()
@@ -27,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
27
27
  from snowflake.snowpark import Session
28
28
  from snowflake.snowpark._internal.utils import get_os_name, get_python_version
29
29
  from snowflake.snowpark.version import VERSION as snowpark_version
30
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
30
31
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
31
32
  from snowflake.snowpark_connect.version import VERSION as sas_version
32
33
 
@@ -346,6 +347,11 @@ class Telemetry:
346
347
  summary["error_message"] = str(e)
347
348
  summary["error_type"] = type(e).__name__
348
349
 
350
+ if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
351
+ summary["error_code"] = ErrorCodes.INTERNAL_ERROR
352
+ else:
353
+ summary["error_code"] = e.custom_error_code
354
+
349
355
  error_location = _error_location(e)
350
356
  if error_location:
351
357
  summary["error_location"] = error_location
@@ -7,6 +7,8 @@ from typing import Optional, Tuple
7
7
  from pyspark.errors import AnalysisException
8
8
 
9
9
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
11
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
10
12
  from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
11
13
  from snowflake.snowpark_connect.utils.context import get_session_id
12
14
 
@@ -22,9 +24,11 @@ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None
22
24
  _temp_views.remove(key)
23
25
  break
24
26
  else:
25
- raise AnalysisException(
27
+ exception = AnalysisException(
26
28
  f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
27
29
  )
30
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
31
+ raise exception
28
32
 
29
33
  _temp_views[(name, current_session_id)] = df
30
34
 
@@ -118,7 +118,7 @@ def cached_udaf(
118
118
 
119
119
  if class_type is None:
120
120
  raise ValueError(
121
- "Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
121
+ "[snowpark_connect::internal_error] Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
122
122
  )
123
123
  else:
124
124
  # return udaf
@@ -379,7 +379,9 @@ def register_cached_java_udf(
379
379
 
380
380
  with zipfile.ZipFile(zip_path, "r") as zip_ref:
381
381
  if jar_path_in_zip not in zip_ref.namelist():
382
- raise FileNotFoundError(f"{jar_path_in_zip} not found")
382
+ raise FileNotFoundError(
383
+ f"[snowpark_connect::invalid_input] {jar_path_in_zip} not found"
384
+ )
383
385
  zip_ref.extract(jar_path_in_zip, temp_dir)
384
386
 
385
387
  jar_path = f"{temp_dir}/{jar_path_in_zip}"
@@ -388,7 +390,7 @@ def register_cached_java_udf(
388
390
 
389
391
  if upload_result[0].status != "UPLOADED":
390
392
  raise RuntimeError(
391
- f"Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
393
+ f"[snowpark_connect::internal_error] Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
392
394
  )
393
395
 
394
396
  udf_is_cached = function_name in cache
@@ -18,6 +18,8 @@ from snowflake.snowpark import Column, Session
18
18
  from snowflake.snowpark.types import DataType, _parse_datatype_json_value
19
19
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
20
20
  from snowflake.snowpark_connect.config import global_config
21
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
22
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
21
23
  from snowflake.snowpark_connect.expression.map_expression import (
22
24
  map_single_column_expression,
23
25
  )
@@ -233,19 +235,25 @@ def _check_supported_udf(
233
235
 
234
236
  session = get_or_create_snowpark_session()
235
237
  if udf_proto.java_udf.class_name not in session._cached_java_udfs:
236
- raise AnalysisException(
238
+ exception = AnalysisException(
237
239
  f"Can not load class {udf_proto.java_udf.class_name}"
238
240
  )
241
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
242
+ raise exception
239
243
  else:
240
- raise ValueError(
244
+ exception = ValueError(
241
245
  "Function type java_udf not supported for common inline user-defined function"
242
246
  )
247
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
248
+ raise exception
243
249
  case "scalar_scala_udf":
244
250
  pass
245
251
  case _ as function_type:
246
- raise ValueError(
252
+ exception = ValueError(
247
253
  f"Function type {function_type} not supported for common inline user-defined function"
248
254
  )
255
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
256
+ raise exception
249
257
 
250
258
 
251
259
  def _aggregate_function_check(
@@ -253,9 +261,11 @@ def _aggregate_function_check(
253
261
  ):
254
262
  name, is_aggregate_function = get_is_aggregate_function()
255
263
  if not udf_proto.deterministic and name != "default" and is_aggregate_function:
256
- raise AnalysisException(
264
+ exception = AnalysisException(
257
265
  f"[AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION] Non-deterministic expression {name}({udf_proto.function_name}) should not appear in the arguments of an aggregate function."
258
266
  )
267
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
268
+ raise exception
259
269
 
260
270
 
261
271
  def _join_checks(snowpark_udf_arg_names: list[str]):
@@ -282,23 +292,27 @@ def _join_checks(snowpark_udf_arg_names: list[str]):
282
292
  and is_left_evaluable
283
293
  and is_right_evaluable
284
294
  ):
285
- raise AnalysisException(
295
+ exception = AnalysisException(
286
296
  f"Detected implicit cartesian product for {is_evaluating_join_condition[0]} join between logical plans. \n"
287
297
  f"Join condition is missing or trivial. \n"
288
298
  f"Either: use the CROSS JOIN syntax to allow cartesian products between those relations, or; "
289
299
  f"enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=True."
290
300
  )
301
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
302
+ raise exception
291
303
  if (
292
304
  is_evaluating_join_condition[0] != "INNER"
293
305
  and is_evaluating_join_condition[1]
294
306
  and is_left_evaluable
295
307
  and is_right_evaluable
296
308
  ):
297
- raise AnalysisException(
309
+ exception = AnalysisException(
298
310
  f"[UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE] The feature is not supported: "
299
311
  f"Python UDF in the ON clause of a {is_evaluating_join_condition[0]} JOIN. "
300
312
  f"In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause."
301
313
  )
314
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
315
+ raise exception
302
316
 
303
317
 
304
318
  def infer_snowpark_arguments(
@@ -103,7 +103,7 @@ class ProcessCommonInlineUserDefinedFunction:
103
103
  )
104
104
  case _:
105
105
  raise ValueError(
106
- f"Function type {self._function_type} not supported for common inline user-defined function"
106
+ f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
107
107
  )
108
108
 
109
109
  @property
@@ -112,7 +112,7 @@ class ProcessCommonInlineUserDefinedFunction:
112
112
  return self._snowpark_udf_args
113
113
  else:
114
114
  raise ValueError(
115
- "Column mapping is not provided, cannot get snowpark udf args"
115
+ "[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf args"
116
116
  )
117
117
 
118
118
  @property
@@ -121,7 +121,7 @@ class ProcessCommonInlineUserDefinedFunction:
121
121
  return self._snowpark_udf_arg_names
122
122
  else:
123
123
  raise ValueError(
124
- "Column mapping is not provided, cannot get snowpark udf arg names"
124
+ "[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf arg names"
125
125
  )
126
126
 
127
127
  def _create_python_udf(self):
@@ -291,5 +291,5 @@ class ProcessCommonInlineUserDefinedFunction:
291
291
  return create_scala_udf(self)
292
292
  case _:
293
293
  raise ValueError(
294
- f"Function type {self._function_type} not supported for common inline user-defined function"
294
+ f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
295
295
  )
@@ -16,6 +16,8 @@ import snowflake.snowpark_connect.tcm as tcm
16
16
  from snowflake import snowpark
17
17
  from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
18
18
  from snowflake.snowpark.types import DataType, StructType, _parse_datatype_json_value
19
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
20
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
19
21
  from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
20
22
  from snowflake.snowpark_connect.utils import pandas_udtf_utils, udtf_utils
21
23
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
@@ -37,7 +39,9 @@ def udtf_check(
37
39
  udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
38
40
  ) -> None:
39
41
  if udtf_proto.WhichOneof("function") != "python_udtf":
40
- raise ValueError(f"Not python udtf {udtf_proto.function}")
42
+ exception = ValueError(f"Not python udtf {udtf_proto.function}")
43
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
44
+ raise exception
41
45
 
42
46
 
43
47
  def require_creating_udtf_in_sproc(
@@ -108,7 +108,9 @@ def create_udtf(
108
108
  imports=imports,
109
109
  )
110
110
  case _:
111
- raise NotImplementedError(f"{called_from}")
111
+ raise NotImplementedError(
112
+ f"[snowpark_connect::unsupported_operation] {called_from}"
113
+ )
112
114
 
113
115
 
114
116
  def artifacts_reader_wrapper(user_udtf_cls: type) -> type:
@@ -196,17 +198,19 @@ def _create_convert_table_argument_to_row():
196
198
  # Named access: row["col1"], row["col2"]
197
199
  if key in self._field_to_index:
198
200
  return self._values[self._field_to_index[key]]
199
- raise KeyError(key)
201
+ raise KeyError(f"[snowpark_connect::invalid_operation] {key}")
200
202
  else:
201
- raise TypeError(f"Invalid key type: {type(key)}")
203
+ raise TypeError(
204
+ f"[snowpark_connect::type_mismatch] Invalid key type: {type(key)}"
205
+ )
202
206
 
203
207
  def __getattr__(self, name):
204
208
  # Attribute access: row.col1, row.col2
205
209
  if name.startswith("_"):
206
- raise AttributeError(name)
210
+ raise AttributeError(f"[snowpark_connect::invalid_operation] {name}")
207
211
  if name in self._field_to_index:
208
212
  return self._values[self._field_to_index[name]]
209
- raise AttributeError(name)
213
+ raise AttributeError(f"[snowpark_connect::invalid_operation] {name}")
210
214
 
211
215
  def __len__(self):
212
216
  return len(self._values)
@@ -280,7 +284,9 @@ def spark_compatible_udtf_wrapper(
280
284
  return val
281
285
  if isinstance(val, datetime.datetime):
282
286
  return val.date()
283
- raise AttributeError(f"Invalid date value {val}")
287
+ raise AttributeError(
288
+ f"[snowpark_connect::invalid_input] Invalid date value {val}"
289
+ )
284
290
 
285
291
  def _coerce_to_binary(val: object, target_type_name: str = "byte") -> bytes | None:
286
292
  if target_type_name == "binary":
@@ -344,7 +350,9 @@ def spark_compatible_udtf_wrapper(
344
350
  def _coerce_to_timestamp(val: object) -> datetime.datetime | None:
345
351
  if isinstance(val, datetime.datetime):
346
352
  return val
347
- raise AttributeError(f"Invalid time stamp value {val}")
353
+ raise AttributeError(
354
+ f"[snowpark_connect::invalid_input] Invalid time stamp value {val}"
355
+ )
348
356
 
349
357
  SCALAR_COERCERS = {
350
358
  "bool": _coerce_to_bool,
@@ -448,7 +456,7 @@ def spark_compatible_udtf_wrapper(
448
456
 
449
457
  if not isinstance(raw_row_tuple, (tuple, list)):
450
458
  raise TypeError(
451
- f"[UDTF_INVALID_OUTPUT_ROW_TYPE] return value should be an iterable object containing tuples, but got {type(raw_row_tuple)}"
459
+ f"[snowpark_connect::type_mismatch] [UDTF_INVALID_OUTPUT_ROW_TYPE] return value should be an iterable object containing tuples, but got {type(raw_row_tuple)}"
452
460
  )
453
461
 
454
462
  if len(raw_row_tuple) != len(expected_types):
@@ -468,7 +476,7 @@ def spark_compatible_udtf_wrapper(
468
476
  and val is not None
469
477
  ):
470
478
  raise RuntimeError(
471
- f"[UNEXPECTED_TUPLE_WITH_STRUCT] Expected a struct for column at position {i}, but got a primitive value of type {type(val)}"
479
+ f"[snowpark_connect::type_mismatch] [UNEXPECTED_TUPLE_WITH_STRUCT] Expected a struct for column at position {i}, but got a primitive value of type {type(val)}"
472
480
  )
473
481
 
474
482
  coerced_row_list = [None] * len(expected_types)
@@ -534,7 +542,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
534
542
  return pa.map_(key_type, value_type)
535
543
  case _, _:
536
544
  raise TypeError(
537
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported Python scalar type for Arrow conversion: {target_py_type}"
545
+ f"[snowpark_connect::unsupported_type] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported Python scalar type for Arrow conversion: {target_py_type}"
538
546
  )
539
547
  elif kind == "array":
540
548
  element_type_info = type_marker
@@ -544,7 +552,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
544
552
  struct_fields_info = type_marker
545
553
  if not isinstance(struct_fields_info, dict):
546
554
  raise TypeError(
547
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Invalid struct definition for Arrow: expected dict, got {type(struct_fields_info)}"
555
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Invalid struct definition for Arrow: expected dict, got {type(struct_fields_info)}"
548
556
  )
549
557
  fields = []
550
558
  for field_name, field_type_info in struct_fields_info.items():
@@ -553,7 +561,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
553
561
  return pa.struct(fields)
554
562
  else:
555
563
  raise TypeError(
556
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported data kind for Arrow conversion: {kind}"
564
+ f"[snowpark_connect::unsupported_type] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported data kind for Arrow conversion: {kind}"
557
565
  )
558
566
 
559
567
  def _convert_to_arrow_value(
@@ -577,7 +585,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
577
585
  ]
578
586
  if not isinstance(obj, (list, tuple)):
579
587
  raise TypeError(
580
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected list or tuple for Arrow array type, got {type(obj).__name__}"
588
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected list or tuple for Arrow array type, got {type(obj).__name__}"
581
589
  )
582
590
  element_type = arrow_type.value_type
583
591
  return [_convert_to_arrow_value(e, element_type, "array") for e in obj]
@@ -585,7 +593,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
585
593
  if pa.types.is_map(arrow_type):
586
594
  if not isinstance(obj, dict):
587
595
  raise TypeError(
588
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected dict for Arrow map type, got {type(obj).__name__}"
596
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected dict for Arrow map type, got {type(obj).__name__}"
589
597
  )
590
598
  key_type = arrow_type.key_type
591
599
  value_type = arrow_type.item_type
@@ -611,7 +619,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
611
619
  else:
612
620
  # If the UDTF yields a list/tuple (or anything not a dict) for a struct column, it's an error.
613
621
  raise TypeError(
614
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected a dictionary for Arrow struct type column, but got {type(obj).__name__}"
622
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected a dictionary for Arrow struct type column, but got {type(obj).__name__}"
615
623
  )
616
624
 
617
625
  # Check if a scalar type is expected and if obj is a collection; if so, error out.
@@ -623,7 +631,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
623
631
  ):
624
632
  if isinstance(obj, (list, tuple, dict)):
625
633
  raise TypeError(
626
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert Python collection type {type(obj).__name__} to scalar Arrow type {arrow_type}"
634
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert Python collection type {type(obj).__name__} to scalar Arrow type {arrow_type}"
627
635
  )
628
636
 
629
637
  if pa.types.is_boolean(arrow_type):
@@ -639,7 +647,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
639
647
  elif obj == 1:
640
648
  return True
641
649
  raise TypeError(
642
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {obj} to Arrow boolean"
650
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {obj} to Arrow boolean"
643
651
  )
644
652
  if isinstance(obj, str):
645
653
  v_str = obj.strip().lower()
@@ -648,7 +656,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
648
656
  if v_str == "false":
649
657
  return False
650
658
  raise TypeError(
651
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow boolean"
659
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow boolean"
652
660
  )
653
661
 
654
662
  if pa.types.is_integer(arrow_type):
@@ -664,7 +672,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
664
672
  except ValueError:
665
673
  pass
666
674
  raise TypeError(
667
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow integer"
675
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow integer"
668
676
  )
669
677
 
670
678
  if pa.types.is_floating(arrow_type):
@@ -676,7 +684,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
676
684
  except ValueError:
677
685
  pass
678
686
  raise TypeError(
679
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow float"
687
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow float"
680
688
  )
681
689
 
682
690
  if pa.types.is_string(arrow_type):
@@ -688,7 +696,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
688
696
  if isinstance(obj, str):
689
697
  return obj
690
698
  raise TypeError(
691
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow string"
699
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow string"
692
700
  )
693
701
 
694
702
  if pa.types.is_binary(arrow_type) or pa.types.is_fixed_size_binary(arrow_type):
@@ -699,21 +707,21 @@ def spark_compatible_udtf_wrapper_with_arrow(
699
707
  if isinstance(obj, int):
700
708
  return bytearray([obj])
701
709
  raise TypeError(
702
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow binary"
710
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow binary"
703
711
  )
704
712
 
705
713
  if pa.types.is_date(arrow_type):
706
714
  if isinstance(obj, datetime.date):
707
715
  return obj
708
716
  raise TypeError(
709
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow date. Expected datetime.date."
717
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow date. Expected datetime.date."
710
718
  )
711
719
 
712
720
  if pa.types.is_timestamp(arrow_type):
713
721
  if isinstance(obj, datetime.datetime):
714
722
  return obj
715
723
  raise TypeError(
716
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow timestamp. Expected datetime.datetime."
724
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow timestamp. Expected datetime.datetime."
717
725
  )
718
726
 
719
727
  if pa.types.is_decimal(arrow_type):
@@ -728,11 +736,11 @@ def spark_compatible_udtf_wrapper_with_arrow(
728
736
  pass
729
737
 
730
738
  raise TypeError(
731
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow decimal. Expected decimal.Decimal or compatible int/str."
739
+ f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow decimal. Expected decimal.Decimal or compatible int/str."
732
740
  )
733
741
 
734
742
  raise TypeError(
735
- f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported type conversion for {type(obj).__name__} to Arrow type {arrow_type}"
743
+ f"[snowpark_connect::unsupported_operation] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported type conversion for {type(obj).__name__} to Arrow type {arrow_type}"
736
744
  )
737
745
 
738
746
  class WrappedUDTF:
@@ -2,4 +2,4 @@
2
2
  #
3
3
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
4
4
  #
5
- VERSION = (0,30,1)
5
+ VERSION = (0,31,0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-connect
3
- Version: 0.30.1
3
+ Version: 0.31.0
4
4
  Summary: Snowpark Connect for Spark
5
5
  Author: Snowflake, Inc
6
6
  License: Apache License, Version 2.0
@@ -16,7 +16,8 @@ Requires-Dist: jpype1
16
16
  Requires-Dist: protobuf<5.0,>=4.25.3
17
17
  Requires-Dist: s3fs>=2025.3.0
18
18
  Requires-Dist: snowflake.core<2,>=1.0.5
19
- Requires-Dist: snowflake-snowpark-python[pandas]<1.40.0,==1.39.1
19
+ Requires-Dist: snowflake-snowpark-python[pandas]<1.41.0,==1.40.0
20
+ Requires-Dist: snowflake-connector-python<4.0.0,>=3.18.0
20
21
  Requires-Dist: sqlglot>=26.3.8
21
22
  Requires-Dist: jaydebeapi
22
23
  Requires-Dist: aiobotocore~=2.23.0