snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +200 -102
  3. snowflake/snowpark_connect/column_qualifier.py +47 -0
  4. snowflake/snowpark_connect/config.py +51 -16
  5. snowflake/snowpark_connect/dataframe_container.py +3 -2
  6. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  7. snowflake/snowpark_connect/error/error_codes.py +50 -0
  8. snowflake/snowpark_connect/error/error_utils.py +142 -22
  9. snowflake/snowpark_connect/error/exceptions.py +13 -4
  10. snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
  11. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  12. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  13. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  14. snowflake/snowpark_connect/expression/literal.py +7 -1
  15. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  16. snowflake/snowpark_connect/expression/map_expression.py +53 -8
  17. snowflake/snowpark_connect/expression/map_extension.py +37 -11
  18. snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
  19. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  20. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
  21. snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
  22. snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
  23. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  24. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  25. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  26. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
  27. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  28. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  29. snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
  30. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  31. snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
  32. snowflake/snowpark_connect/relation/map_extension.py +38 -17
  33. snowflake/snowpark_connect/relation/map_join.py +26 -12
  34. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  35. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  36. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  37. snowflake/snowpark_connect/relation/map_sql.py +124 -25
  38. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  39. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  40. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  41. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  50. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  51. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  53. snowflake/snowpark_connect/relation/write/map_write.py +160 -48
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  55. snowflake/snowpark_connect/resources_initializer.py +5 -1
  56. snowflake/snowpark_connect/server.py +73 -21
  57. snowflake/snowpark_connect/type_mapping.py +90 -20
  58. snowflake/snowpark_connect/typed_column.py +8 -6
  59. snowflake/snowpark_connect/utils/context.py +42 -1
  60. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  61. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  62. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  63. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  64. snowflake/snowpark_connect/utils/profiling.py +25 -8
  65. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  66. snowflake/snowpark_connect/utils/session.py +24 -4
  67. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  68. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  69. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  70. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  71. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  72. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  73. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  74. snowflake/snowpark_connect/version.py +1 -1
  75. snowflake/snowpark_decoder/dp_session.py +1 -1
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
  78. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  79. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  80. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
  85. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
  86. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
  87. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ from contextlib import suppress
20
20
  from decimal import ROUND_HALF_EVEN, ROUND_HALF_UP, Context, Decimal
21
21
  from functools import partial, reduce
22
22
  from pathlib import Path
23
- from typing import List, Optional, Union
23
+ from typing import List, Optional
24
24
  from urllib.parse import quote, unquote
25
25
 
26
26
  import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
@@ -66,6 +66,7 @@ from snowflake.snowpark.types import (
66
66
  TimestampType,
67
67
  VariantType,
68
68
  YearMonthIntervalType,
69
+ _AnsiIntervalType,
69
70
  _FractionalType,
70
71
  _IntegralType,
71
72
  _NumericType,
@@ -74,6 +75,7 @@ from snowflake.snowpark_connect.column_name_handler import (
74
75
  ColumnNameMap,
75
76
  set_schema_getter,
76
77
  )
78
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
77
79
  from snowflake.snowpark_connect.config import (
78
80
  get_boolean_session_config_param,
79
81
  get_timestamp_type,
@@ -83,6 +85,8 @@ from snowflake.snowpark_connect.constants import (
83
85
  DUPLICATE_KEY_FOUND_ERROR_TEMPLATE,
84
86
  STRUCTURED_TYPES_ENABLED,
85
87
  )
88
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
89
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
86
90
  from snowflake.snowpark_connect.expression.function_defaults import (
87
91
  inject_function_defaults,
88
92
  )
@@ -146,7 +150,11 @@ from snowflake.snowpark_connect.utils.xxhash64 import (
146
150
  MAX_UINT64 = 2**64 - 1
147
151
  MAX_INT64 = 2**63 - 1
148
152
  MIN_INT64 = -(2**63)
149
- MAX_ARRAY_SIZE = 2_147_483_647
153
+ MAX_32BIT_SIGNED_INT = 2_147_483_647
154
+
155
+ # Interval arithmetic precision limits
156
+ MAX_DAY_TIME_DAYS = 106751991 # Maximum days for day-time intervals
157
+ MAX_10_DIGIT_LIMIT = 1000000000 # 10-digit limit (1 billion) for interval operands
150
158
 
151
159
  NAN, INFINITY = float("nan"), float("inf")
152
160
 
@@ -188,7 +196,9 @@ def _validate_numeric_args(
188
196
  TypeError: If arguments cannot be converted to numeric types
189
197
  """
190
198
  if len(typed_args) < 2:
191
- raise ValueError(f"{function_name} requires at least 2 arguments")
199
+ exception = ValueError(f"{function_name} requires at least 2 arguments")
200
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
201
+ raise exception
192
202
 
193
203
  modified_args = list(snowpark_args)
194
204
 
@@ -204,9 +214,11 @@ def _validate_numeric_args(
204
214
  # https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala#L204
205
215
  modified_args[i] = snowpark_fn.try_cast(snowpark_args[i], DoubleType())
206
216
  case _:
207
- raise TypeError(
217
+ exception = TypeError(
208
218
  f"Data type mismatch: {function_name} requires numeric types, but got {typed_args[0].typ} and {typed_args[1].typ}."
209
219
  )
220
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
221
+ raise exception
210
222
 
211
223
  return modified_args
212
224
 
@@ -266,6 +278,40 @@ def _coerce_for_comparison(
266
278
  return left_col, right_col
267
279
 
268
280
 
281
+ def _preprocess_not_equals_expression(exp: expressions_proto.Expression) -> str:
282
+ """
283
+ Transform NOT(col1 = col2) expressions to col1 != col2 for Snowflake compatibility.
284
+
285
+ Snowflake has issues with NOT (col1 = col2) in subqueries, so we rewrite
286
+ not(==(a, b)) to a != b by modifying the protobuf expression early.
287
+
288
+ Returns:
289
+ The (potentially modified) function name as a lowercase string.
290
+ """
291
+ function_name = exp.unresolved_function.function_name.lower()
292
+
293
+ # Snowflake has issues with NOT (col1 = col2) in subqueries.
294
+ # Transform not(==(a, b)) to a!=b by modifying the protobuf early.
295
+ if (
296
+ function_name in ("not", "!")
297
+ and len(exp.unresolved_function.arguments) == 1
298
+ and exp.unresolved_function.arguments[0].WhichOneof("expr_type")
299
+ == "unresolved_function"
300
+ and exp.unresolved_function.arguments[0].unresolved_function.function_name
301
+ == "=="
302
+ ):
303
+ inner_eq_func = exp.unresolved_function.arguments[0].unresolved_function
304
+ inner_args = list(inner_eq_func.arguments)
305
+
306
+ exp.unresolved_function.function_name = "!="
307
+ exp.unresolved_function.ClearField("arguments")
308
+ exp.unresolved_function.arguments.extend(inner_args)
309
+
310
+ function_name = "!="
311
+
312
+ return function_name
313
+
314
+
269
315
  def map_unresolved_function(
270
316
  exp: expressions_proto.Expression,
271
317
  column_mapping: ColumnNameMap,
@@ -294,6 +340,9 @@ def map_unresolved_function(
294
340
  # Inject default parameters for functions that need them (especially for Scala clients)
295
341
  inject_function_defaults(exp.unresolved_function)
296
342
 
343
+ # Transform NOT(col = col) to col != col for Snowflake compatibility
344
+ function_name = _preprocess_not_equals_expression(exp)
345
+
297
346
  def _resolve_args_expressions(exp: expressions_proto.Expression):
298
347
  def _resolve_fn_arg(exp):
299
348
  with resolving_fun_args():
@@ -349,7 +398,7 @@ def map_unresolved_function(
349
398
  function_name = exp.unresolved_function.function_name.lower()
350
399
  telemetry.report_function_usage(function_name)
351
400
  result_type: Optional[DataType | List[DateType]] = None
352
- qualifiers: List[str] = []
401
+ qualifier_parts: List[str] = []
353
402
 
354
403
  pyspark_func = getattr(pyspark_functions, function_name, None)
355
404
  if pyspark_func and pyspark_func.__doc__.lstrip().startswith("Aggregate function:"):
@@ -407,9 +456,11 @@ def map_unresolved_function(
407
456
  expected_arity = str(valid_arity)
408
457
 
409
458
  if invalid:
410
- raise AnalysisException(
459
+ exception = AnalysisException(
411
460
  f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `{function_name}` requires {expected_arity} parameters but the actual number is {arity}."
412
461
  )
462
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
463
+ raise exception
413
464
 
414
465
  def _like_util(column, patterns, mode, negate=False):
415
466
  """
@@ -422,9 +473,13 @@ def map_unresolved_function(
422
473
  :return: A Snowpark condition.
423
474
  """
424
475
  if len(patterns) == 0:
425
- raise ParseException("Expected something between '(' and ')'")
476
+ exception = ParseException("Expected something between '(' and ')'")
477
+ attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
478
+ raise exception
426
479
  if mode not in ["any", "all"]:
427
- raise ValueError("Mode must be 'any' or 'all'.")
480
+ exception = ValueError("Mode must be 'any' or 'all'.")
481
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
482
+ raise exception
428
483
 
429
484
  if mode == "any":
430
485
  condition = snowpark_fn.lit(False)
@@ -446,9 +501,13 @@ def map_unresolved_function(
446
501
  def _check_percentile_percentage(exp: expressions_proto.Expression) -> Column:
447
502
  perc = unwrap_literal(exp)
448
503
  if perc is None:
449
- raise AnalysisException("The percentage must not be null.")
504
+ exception = AnalysisException("The percentage must not be null.")
505
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
506
+ raise exception
450
507
  if not 0.0 <= perc <= 1.0:
451
- raise AnalysisException("The percentage must be between [0.0, 1.0].")
508
+ exception = AnalysisException("The percentage must be between [0.0, 1.0].")
509
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
510
+ raise exception
452
511
  return snowpark_fn.lit(perc)
453
512
 
454
513
  def _handle_structured_aggregate_result(
@@ -497,9 +556,17 @@ def map_unresolved_function(
497
556
  )
498
557
  result_type = [f.datatype for f in udtf.output_schema]
499
558
  case "!=":
500
- result_exp = TypedColumn(
501
- snowpark_args[0] != snowpark_args[1], lambda: [BooleanType()]
559
+ _check_interval_string_comparison(
560
+ "!=", snowpark_typed_args, snowpark_arg_names
561
+ )
562
+ # Make the function name same as spark connect. a != b translate's to not(a=b)
563
+ spark_function_name = (
564
+ f"(NOT ({snowpark_arg_names[0]} = {snowpark_arg_names[1]}))"
565
+ )
566
+ left, right = _coerce_for_comparison(
567
+ snowpark_typed_args[0], snowpark_typed_args[1]
502
568
  )
569
+ result_exp = TypedColumn(left != right, lambda: [BooleanType()])
503
570
  case "%" | "mod":
504
571
  if spark_sql_ansi_enabled:
505
572
  result_exp = snowpark_args[0] % snowpark_args[1]
@@ -548,9 +615,11 @@ def map_unresolved_function(
548
615
  result_exp = snowpark_fn.lit(None)
549
616
  case (StringType(), StringType()):
550
617
  if spark_sql_ansi_enabled:
551
- raise AnalysisException(
618
+ exception = AnalysisException(
552
619
  f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
553
620
  )
621
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
622
+ raise exception
554
623
  else:
555
624
  result_type = DoubleType()
556
625
  result_exp = snowpark_args[0].try_cast(
@@ -598,12 +667,87 @@ def map_unresolved_function(
598
667
  result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
599
668
  result_type
600
669
  )
601
- case (_NumericType() as t, NullType()) | (
602
- NullType(),
603
- _NumericType() as t,
670
+ case (StringType(), t) | (t, StringType()) if isinstance(
671
+ t, _AnsiIntervalType
672
+ ):
673
+ if isinstance(snowpark_typed_args[0].typ, StringType):
674
+ result_type = type(
675
+ t
676
+ )() # YearMonthIntervalType() or DayTimeIntervalType()
677
+ result_exp = snowpark_args[1] * snowpark_args[0].try_cast(
678
+ LongType()
679
+ )
680
+ spark_function_name = (
681
+ f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
682
+ )
683
+ else:
684
+ result_type = type(
685
+ t
686
+ )() # YearMonthIntervalType() or DayTimeIntervalType()
687
+ result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
688
+ LongType()
689
+ )
690
+ spark_function_name = (
691
+ f"({snowpark_arg_names[0]} * {snowpark_arg_names[1]})"
692
+ )
693
+ case (
694
+ (_NumericType() as t, NullType())
695
+ | (NullType(), _NumericType() as t)
604
696
  ):
605
697
  result_type = t
606
698
  result_exp = snowpark_fn.lit(None)
699
+ case (NullType(), t) | (t, NullType()) if isinstance(
700
+ t, _AnsiIntervalType
701
+ ):
702
+ result_type = (
703
+ YearMonthIntervalType()
704
+ if isinstance(t, YearMonthIntervalType)
705
+ else DayTimeIntervalType()
706
+ )
707
+ result_exp = snowpark_fn.lit(None)
708
+ if isinstance(snowpark_typed_args[0].typ, NullType):
709
+ spark_function_name = (
710
+ f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
711
+ )
712
+ else:
713
+ spark_function_name = (
714
+ f"({snowpark_arg_names[0]} * {snowpark_arg_names[1]})"
715
+ )
716
+ case (DecimalType(), t) | (t, DecimalType()) if isinstance(
717
+ t, _AnsiIntervalType
718
+ ):
719
+ result_type = (
720
+ YearMonthIntervalType()
721
+ if isinstance(t, YearMonthIntervalType)
722
+ else DayTimeIntervalType()
723
+ )
724
+ if isinstance(snowpark_typed_args[0].typ, DecimalType):
725
+ result_exp = snowpark_args[1] * snowpark_args[0]
726
+ spark_function_name = (
727
+ f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
728
+ )
729
+ else:
730
+ result_exp = snowpark_args[0] * snowpark_args[1]
731
+ spark_function_name = (
732
+ f"({snowpark_arg_names[0]} * {snowpark_arg_names[1]})"
733
+ )
734
+ case (t, _NumericType()) if isinstance(t, _AnsiIntervalType):
735
+ result_type = (
736
+ YearMonthIntervalType()
737
+ if isinstance(t, YearMonthIntervalType)
738
+ else DayTimeIntervalType()
739
+ )
740
+ result_exp = snowpark_args[0] * snowpark_args[1]
741
+ case (_NumericType(), t) if isinstance(t, _AnsiIntervalType):
742
+ result_type = (
743
+ YearMonthIntervalType()
744
+ if isinstance(t, YearMonthIntervalType)
745
+ else DayTimeIntervalType()
746
+ )
747
+ result_exp = snowpark_args[1] * snowpark_args[0]
748
+ spark_function_name = (
749
+ f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
750
+ )
607
751
  case (_NumericType(), _NumericType()):
608
752
  result_type = _find_common_type(
609
753
  [arg.typ for arg in snowpark_typed_args]
@@ -612,9 +756,11 @@ def map_unresolved_function(
612
756
  1
613
757
  ].cast(result_type)
614
758
  case _:
615
- raise AnalysisException(
759
+ exception = AnalysisException(
616
760
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
617
761
  )
762
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
763
+ raise exception
618
764
  case "+":
619
765
  spark_function_name = _get_spark_function_name(
620
766
  snowpark_typed_args[0],
@@ -642,7 +788,14 @@ def map_unresolved_function(
642
788
  result_type = DateType()
643
789
  result_exp = snowpark_args[0] + snowpark_args[1]
644
790
  elif isinstance(t, (DayTimeIntervalType, YearMonthIntervalType)):
645
- result_type = TimestampType()
791
+ result_type = (
792
+ TimestampType()
793
+ if isinstance(
794
+ snowpark_typed_args[t_param_index].typ,
795
+ DayTimeIntervalType,
796
+ )
797
+ else DateType()
798
+ )
646
799
  result_exp = (
647
800
  snowpark_args[date_param_index]
648
801
  + snowpark_args[t_param_index]
@@ -660,14 +813,47 @@ def map_unresolved_function(
660
813
  + snowpark_args[t_param_index]
661
814
  )
662
815
  else:
663
- raise AnalysisException(
816
+ exception = AnalysisException(
664
817
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[t_param_index]}" has the type "{t}".',
665
818
  )
819
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
820
+ raise exception
821
+ case (TimestampType(), t) | (t, TimestampType()):
822
+ timestamp_param_index = (
823
+ 0
824
+ if isinstance(snowpark_typed_args[0].typ, TimestampType)
825
+ else 1
826
+ )
827
+ t_param_index = 1 - timestamp_param_index
828
+ if isinstance(t, (DayTimeIntervalType, YearMonthIntervalType)):
829
+ result_type = TimestampType()
830
+ result_exp = (
831
+ snowpark_args[timestamp_param_index]
832
+ + snowpark_args[t_param_index]
833
+ )
834
+ elif (
835
+ hasattr(
836
+ snowpark_typed_args[t_param_index].col._expr1, "pretty_name"
837
+ )
838
+ and "INTERVAL"
839
+ == snowpark_typed_args[t_param_index].col._expr1.pretty_name
840
+ ):
841
+ result_type = TimestampType()
842
+ result_exp = (
843
+ snowpark_args[timestamp_param_index]
844
+ + snowpark_args[t_param_index]
845
+ )
846
+ else:
847
+ raise AnalysisException(
848
+ f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INTERVAL") type for timestamp operations, however "{snowpark_arg_names[t_param_index]}" has the type "{t}".',
849
+ )
666
850
  case (StringType(), StringType()):
667
851
  if spark_sql_ansi_enabled:
668
- raise AnalysisException(
852
+ exception = AnalysisException(
669
853
  f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
670
854
  )
855
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
856
+ raise exception
671
857
  else:
672
858
  result_type = DoubleType()
673
859
  result_exp = snowpark_fn.try_cast(
@@ -707,9 +893,91 @@ def map_unresolved_function(
707
893
  case (DecimalType(), t) | (t, DecimalType()) if isinstance(
708
894
  t, (BinaryType, TimestampType)
709
895
  ):
710
- raise AnalysisException(
896
+ exception = AnalysisException(
711
897
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
712
898
  )
899
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
900
+ raise exception
901
+ case (t1, t2) | (t2, t1) if isinstance(
902
+ t1, _AnsiIntervalType
903
+ ) and isinstance(t2, _AnsiIntervalType) and type(t1) == type(t2):
904
+ # Both operands are the same interval type
905
+ result_type = type(t1)(
906
+ min(t1.start_field, t2.start_field),
907
+ max(t1.end_field, t2.end_field),
908
+ )
909
+ result_exp = snowpark_args[0] + snowpark_args[1]
910
+ case (StringType(), t) | (t, StringType()) if isinstance(
911
+ t, YearMonthIntervalType
912
+ ):
913
+ # String + YearMonthInterval: Spark tries to cast string to double first, throws error if it fails
914
+ result_type = StringType()
915
+ if isinstance(snowpark_typed_args[0].typ, StringType):
916
+ result_exp = (
917
+ snowpark_fn.cast(snowpark_args[0], "double")
918
+ + snowpark_args[1]
919
+ )
920
+ else:
921
+ result_exp = snowpark_args[0] + snowpark_fn.cast(
922
+ snowpark_args[1], "double"
923
+ )
924
+ case (StringType(), t) | (t, StringType()) if isinstance(
925
+ t, DayTimeIntervalType
926
+ ):
927
+ # String + DayTimeInterval: try to parse string as timestamp, return NULL if it fails
928
+ # For time-only strings (like '10:00:00'), prepend current date to make it a full timestamp
929
+ result_type = StringType()
930
+ if isinstance(snowpark_typed_args[0].typ, StringType):
931
+ # Check if string looks like time-only (HH:MM:SS or HH:MM pattern)
932
+ # If so, prepend current date; otherwise use as-is
933
+ time_only_pattern = snowpark_fn.function("regexp_like")(
934
+ snowpark_args[0], r"^\d{1,2}:\d{2}(:\d{2})?$"
935
+ )
936
+ timestamp_expr = snowpark_fn.when(
937
+ time_only_pattern,
938
+ snowpark_fn.function("try_to_timestamp_ntz")(
939
+ snowpark_fn.function("concat")(
940
+ snowpark_fn.function("to_char")(
941
+ snowpark_fn.function("current_date")(),
942
+ "YYYY-MM-DD",
943
+ ),
944
+ snowpark_fn.lit(" "),
945
+ snowpark_args[0],
946
+ )
947
+ ),
948
+ ).otherwise(
949
+ snowpark_fn.function("try_to_timestamp_ntz")(
950
+ snowpark_args[0]
951
+ )
952
+ )
953
+ result_exp = timestamp_expr + snowpark_args[1]
954
+ else:
955
+ # interval + string case
956
+ time_only_pattern = snowpark_fn.function("regexp_like")(
957
+ snowpark_args[1], r"^\d{1,2}:\d{2}(:\d{2})?$"
958
+ )
959
+ timestamp_expr = snowpark_fn.when(
960
+ time_only_pattern,
961
+ snowpark_fn.function("try_to_timestamp_ntz")(
962
+ snowpark_fn.function("concat")(
963
+ snowpark_fn.function("to_char")(
964
+ snowpark_fn.function("current_date")(),
965
+ "'YYYY-MM-DD'",
966
+ ),
967
+ snowpark_fn.lit(" "),
968
+ snowpark_args[1],
969
+ )
970
+ ),
971
+ ).otherwise(
972
+ snowpark_fn.function("try_to_timestamp_ntz")(
973
+ snowpark_args[1]
974
+ )
975
+ )
976
+ result_exp = snowpark_args[0] + timestamp_expr
977
+ spark_function_name = (
978
+ f"{snowpark_arg_names[0]} + {snowpark_arg_names[1]}"
979
+ )
980
+
713
981
  case _:
714
982
  result_type, overflow_possible = _get_add_sub_result_type(
715
983
  snowpark_typed_args[0].typ,
@@ -755,7 +1023,11 @@ def map_unresolved_function(
755
1023
  DateType(),
756
1024
  YearMonthIntervalType(),
757
1025
  ):
758
- result_type = TimestampType()
1026
+ result_type = (
1027
+ TimestampType()
1028
+ if isinstance(snowpark_typed_args[1].typ, DayTimeIntervalType)
1029
+ else DateType()
1030
+ )
759
1031
  result_exp = snowpark_args[0] - snowpark_args[1]
760
1032
  case (DateType(), StringType()):
761
1033
  if (
@@ -773,6 +1045,23 @@ def map_unresolved_function(
773
1045
  result_exp = snowpark_args[0] - snowpark_args[1].cast(
774
1046
  input_type
775
1047
  )
1048
+ case (TimestampType(), DayTimeIntervalType()) | (
1049
+ TimestampType(),
1050
+ YearMonthIntervalType(),
1051
+ ):
1052
+ result_type = TimestampType()
1053
+ result_exp = snowpark_args[0] - snowpark_args[1]
1054
+ case (TimestampType(), StringType()):
1055
+ if (
1056
+ hasattr(snowpark_typed_args[1].col._expr1, "pretty_name")
1057
+ and "INTERVAL" == snowpark_typed_args[1].col._expr1.pretty_name
1058
+ ):
1059
+ result_type = TimestampType()
1060
+ result_exp = snowpark_args[0] - snowpark_args[1]
1061
+ else:
1062
+ raise AnalysisException(
1063
+ f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INTERVAL") type for timestamp operations, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
1064
+ )
776
1065
  case (StringType(), DateType()):
777
1066
  # TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
778
1067
  result_type = LongType()
@@ -782,18 +1071,24 @@ def map_unresolved_function(
782
1071
  result_type = DateType()
783
1072
  result_exp = snowpark_args[0] - snowpark_args[1]
784
1073
  case (DateType(), _):
785
- raise AnalysisException(
1074
+ exception = AnalysisException(
786
1075
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
787
1076
  )
1077
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1078
+ raise exception
788
1079
  case (_, DateType()):
789
- raise AnalysisException(
1080
+ exception = AnalysisException(
790
1081
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "DATE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".',
791
1082
  )
1083
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1084
+ raise exception
792
1085
  case (StringType(), StringType()):
793
1086
  if spark_sql_ansi_enabled:
794
- raise AnalysisException(
1087
+ exception = AnalysisException(
795
1088
  f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
796
1089
  )
1090
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1091
+ raise exception
797
1092
  else:
798
1093
  result_type = DoubleType()
799
1094
  result_exp = snowpark_fn.try_cast(
@@ -833,9 +1128,21 @@ def map_unresolved_function(
833
1128
  case (DecimalType(), t) | (t, DecimalType()) if isinstance(
834
1129
  t, (BinaryType, TimestampType)
835
1130
  ):
836
- raise AnalysisException(
1131
+ exception = AnalysisException(
837
1132
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
838
1133
  )
1134
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1135
+ raise exception
1136
+ case (StringType(), t) if isinstance(t, _AnsiIntervalType):
1137
+ # String - Interval: try to parse string as timestamp, return NULL if it fails
1138
+ result_type = StringType()
1139
+ result_exp = (
1140
+ snowpark_fn.function("try_to_timestamp")(snowpark_args[0])
1141
+ - snowpark_args[1]
1142
+ )
1143
+ spark_function_name = (
1144
+ f"{snowpark_arg_names[0]} - {snowpark_arg_names[1]}"
1145
+ )
839
1146
  case _:
840
1147
  result_type, overflow_possible = _get_add_sub_result_type(
841
1148
  snowpark_typed_args[0].typ,
@@ -879,9 +1186,11 @@ def map_unresolved_function(
879
1186
  result_exp = snowpark_fn.lit(None)
880
1187
  case (StringType(), StringType()):
881
1188
  if spark_sql_ansi_enabled:
882
- raise AnalysisException(
1189
+ exception = AnalysisException(
883
1190
  f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
884
1191
  )
1192
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1193
+ raise exception
885
1194
  else:
886
1195
  result_type = DoubleType()
887
1196
  result_exp = _divnull(
@@ -932,9 +1241,57 @@ def map_unresolved_function(
932
1241
  result_exp = _divnull(
933
1242
  snowpark_args[0], snowpark_args[1].try_cast(result_type)
934
1243
  )
1244
+ case (t, StringType()) if isinstance(t, _AnsiIntervalType):
1245
+ result_type = (
1246
+ YearMonthIntervalType()
1247
+ if isinstance(t, YearMonthIntervalType)
1248
+ else DayTimeIntervalType()
1249
+ )
1250
+ result_exp = snowpark_args[0] / snowpark_args[1].try_cast(
1251
+ LongType()
1252
+ )
1253
+ spark_function_name = (
1254
+ f"({snowpark_arg_names[0]} / {snowpark_arg_names[1]})"
1255
+ )
935
1256
  case (_NumericType(), NullType()) | (NullType(), _NumericType()):
936
1257
  result_type = DoubleType()
937
1258
  result_exp = snowpark_fn.lit(None)
1259
+ case (t, NullType()) if isinstance(t, _AnsiIntervalType):
1260
+ # Only allow interval / null, not null / interval
1261
+ result_type = (
1262
+ YearMonthIntervalType()
1263
+ if isinstance(t, YearMonthIntervalType)
1264
+ else DayTimeIntervalType()
1265
+ )
1266
+ result_exp = snowpark_fn.lit(None)
1267
+ spark_function_name = (
1268
+ f"({snowpark_arg_names[0]} / {snowpark_arg_names[1]})"
1269
+ )
1270
+ case (DecimalType(), t) | (t, DecimalType()) if isinstance(
1271
+ t, _AnsiIntervalType
1272
+ ):
1273
+ result_type = (
1274
+ YearMonthIntervalType()
1275
+ if isinstance(t, YearMonthIntervalType)
1276
+ else DayTimeIntervalType()
1277
+ )
1278
+ if isinstance(snowpark_typed_args[0].typ, DecimalType):
1279
+ result_exp = snowpark_args[1] / snowpark_args[0]
1280
+ spark_function_name = (
1281
+ f"({snowpark_arg_names[1]} / {snowpark_arg_names[0]})"
1282
+ )
1283
+ else:
1284
+ result_exp = snowpark_args[0] / snowpark_args[1]
1285
+ spark_function_name = (
1286
+ f"({snowpark_arg_names[0]} / {snowpark_arg_names[1]})"
1287
+ )
1288
+ case (t, _NumericType()) if isinstance(t, _AnsiIntervalType):
1289
+ result_type = (
1290
+ YearMonthIntervalType()
1291
+ if isinstance(t, YearMonthIntervalType)
1292
+ else DayTimeIntervalType()
1293
+ )
1294
+ result_exp = snowpark_args[0] / snowpark_args[1]
938
1295
  case (_NumericType(), _NumericType()):
939
1296
  result_type = DoubleType()
940
1297
  result_exp = _divnull(
@@ -942,9 +1299,11 @@ def map_unresolved_function(
942
1299
  snowpark_args[1].cast(result_type),
943
1300
  )
944
1301
  case _:
945
- raise AnalysisException(
1302
+ exception = AnalysisException(
946
1303
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
947
1304
  )
1305
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1306
+ raise exception
948
1307
  case "~":
949
1308
  result_exp = TypedColumn(
950
1309
  snowpark_fn.bitnot(snowpark_args[0]),
@@ -958,9 +1317,11 @@ def map_unresolved_function(
958
1317
  or isinstance(snowpark_typed_args[0].typ, BooleanType)
959
1318
  and isinstance(snowpark_typed_args[1].typ, DecimalType)
960
1319
  ):
961
- raise AnalysisException(
1320
+ exception = AnalysisException(
962
1321
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} < {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
963
1322
  )
1323
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1324
+ raise exception
964
1325
  # Check for interval-string comparisons
965
1326
  _check_interval_string_comparison(
966
1327
  "<", snowpark_typed_args, snowpark_arg_names
@@ -976,9 +1337,11 @@ def map_unresolved_function(
976
1337
  or isinstance(snowpark_typed_args[0].typ, BooleanType)
977
1338
  and isinstance(snowpark_typed_args[1].typ, DecimalType)
978
1339
  ):
979
- raise AnalysisException(
1340
+ exception = AnalysisException(
980
1341
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} <= {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
981
1342
  )
1343
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1344
+ raise exception
982
1345
  # Check for interval-string comparisons
983
1346
  _check_interval_string_comparison(
984
1347
  "<=", snowpark_typed_args, snowpark_arg_names
@@ -1017,9 +1380,11 @@ def map_unresolved_function(
1017
1380
  or isinstance(snowpark_typed_args[0].typ, BooleanType)
1018
1381
  and isinstance(snowpark_typed_args[1].typ, DecimalType)
1019
1382
  ):
1020
- raise AnalysisException(
1383
+ exception = AnalysisException(
1021
1384
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} > {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
1022
1385
  )
1386
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1387
+ raise exception
1023
1388
  # Check for interval-string comparisons
1024
1389
  _check_interval_string_comparison(
1025
1390
  ">", snowpark_typed_args, snowpark_arg_names
@@ -1035,9 +1400,11 @@ def map_unresolved_function(
1035
1400
  or isinstance(snowpark_typed_args[0].typ, BooleanType)
1036
1401
  and isinstance(snowpark_typed_args[1].typ, DecimalType)
1037
1402
  ):
1038
- raise AnalysisException(
1403
+ exception = AnalysisException(
1039
1404
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} >= {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
1040
1405
  )
1406
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1407
+ raise exception
1041
1408
  # Check for interval-string comparisons
1042
1409
  _check_interval_string_comparison(
1043
1410
  ">=", snowpark_typed_args, snowpark_arg_names
@@ -1134,9 +1501,11 @@ def map_unresolved_function(
1134
1501
  )
1135
1502
  case "any":
1136
1503
  if not isinstance(snowpark_typed_args[0].typ, (BooleanType, NullType)):
1137
- raise AnalysisException(
1504
+ exception = AnalysisException(
1138
1505
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "BOOLEAN" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}".'
1139
1506
  )
1507
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1508
+ raise exception
1140
1509
  result_exp = TypedColumn(
1141
1510
  snowpark_fn.max(snowpark_args[0]),
1142
1511
  lambda: [BooleanType()],
@@ -1151,9 +1520,13 @@ def map_unresolved_function(
1151
1520
  case [col]:
1152
1521
  result_exp = snowpark_fn.any_value(col)
1153
1522
  case _:
1154
- raise ValueError(
1523
+ exception = ValueError(
1155
1524
  f"Unexpected number of args for function any_value. Expected 1 or 2, received {len(snowpark_args)}"
1156
1525
  )
1526
+ attach_custom_error_code(
1527
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
1528
+ )
1529
+ raise exception
1157
1530
 
1158
1531
  spark_function_name = f"any_value({snowpark_arg_names[0]})"
1159
1532
  result_exp = _type_with_typer(result_exp)
@@ -1165,9 +1538,13 @@ def map_unresolved_function(
1165
1538
  lambda: [LongType()],
1166
1539
  )
1167
1540
  case [_, _]:
1168
- raise SnowparkConnectNotImplementedError(
1541
+ exception = SnowparkConnectNotImplementedError(
1169
1542
  "'rsd' parameter is not supported"
1170
1543
  )
1544
+ attach_custom_error_code(
1545
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
1546
+ )
1547
+ raise exception
1171
1548
  case "approx_percentile" | "percentile_approx":
1172
1549
  # SNOW-1955784: Support accuracy parameter
1173
1550
  # Use percentile_disc to return actual values from dataset (matches PySpark behavior)
@@ -1184,7 +1561,11 @@ def map_unresolved_function(
1184
1561
  # Even though the Spark function accepts a Column for percentage, it will fail unless it's a literal.
1185
1562
  # Therefore, we can do error checking right here.
1186
1563
  if not 0.0 <= percentage <= 1.0:
1187
- raise AnalysisException("percentage must be between [0.0, 1.0]")
1564
+ exception = AnalysisException(
1565
+ "percentage must be between [0.0, 1.0]"
1566
+ )
1567
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
1568
+ raise exception
1188
1569
 
1189
1570
  result = snowpark_fn.function("percentile_disc")(
1190
1571
  snowpark_fn.lit(percentage)
@@ -1251,9 +1632,11 @@ def map_unresolved_function(
1251
1632
  case "array_contains":
1252
1633
  array_type = snowpark_typed_args[0].typ
1253
1634
  if not isinstance(array_type, ArrayType):
1254
- raise AnalysisException(
1635
+ exception = AnalysisException(
1255
1636
  f"Expected argument '{snowpark_arg_names[0]}' to have an ArrayType."
1256
1637
  )
1638
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1639
+ raise exception
1257
1640
 
1258
1641
  def _compatible_types(type1: DataType, type2: DataType) -> bool:
1259
1642
  if type1 == type2:
@@ -1273,9 +1656,11 @@ def map_unresolved_function(
1273
1656
  if not _compatible_types(
1274
1657
  array_type.element_type, snowpark_typed_args[1].typ
1275
1658
  ):
1276
- raise AnalysisException(
1659
+ exception = AnalysisException(
1277
1660
  '[DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES] Cannot resolve "array_contains(arr, val)" due to data type mismatch: Input to `array_contains` should have been "ARRAY" followed by a value with same element type'
1278
1661
  )
1662
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1663
+ raise exception
1279
1664
  value = (
1280
1665
  snowpark_fn.cast(snowpark_args[1], array_type.element_type)
1281
1666
  if array_type.structured
@@ -1345,7 +1730,13 @@ def map_unresolved_function(
1345
1730
  )
1346
1731
  result_exp = snowpark_fn.array_to_string(data, delimiter)
1347
1732
  case _:
1348
- raise ValueError(f"Invalid number of arguments to {function_name}")
1733
+ exception = ValueError(
1734
+ f"Invalid number of arguments to {function_name}"
1735
+ )
1736
+ attach_custom_error_code(
1737
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
1738
+ )
1739
+ raise exception
1349
1740
  result_exp = TypedColumn(result_exp, lambda: [StringType()])
1350
1741
  case "array_max":
1351
1742
  result_exp = TypedColumn(
@@ -1437,9 +1828,11 @@ def map_unresolved_function(
1437
1828
  case "array_size":
1438
1829
  array_type = snowpark_typed_args[0].typ
1439
1830
  if not isinstance(array_type, ArrayType):
1440
- raise AnalysisException(
1831
+ exception = AnalysisException(
1441
1832
  f"Expected argument '{snowpark_arg_names[0]}' to have an ArrayType."
1442
1833
  )
1834
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1835
+ raise exception
1443
1836
  result_exp = TypedColumn(
1444
1837
  snowpark_fn.array_size(*snowpark_args), lambda: [LongType()]
1445
1838
  )
@@ -1450,9 +1843,11 @@ def map_unresolved_function(
1450
1843
  snowpark_fn.size(*snowpark_args), lambda: [LongType()]
1451
1844
  )
1452
1845
  else:
1453
- raise AnalysisException(
1846
+ exception = AnalysisException(
1454
1847
  f"Expected argument '{snowpark_arg_names[0]}' to have an ArrayType or MapType, but got {arg_type.simpleString()}."
1455
1848
  )
1849
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1850
+ raise exception
1456
1851
  case "array_sort":
1457
1852
  result_exp = TypedColumn(
1458
1853
  snowpark_fn.array_sort(*snowpark_args),
@@ -1538,9 +1933,13 @@ def map_unresolved_function(
1538
1933
  expr, snowpark_fn.lit(None)
1539
1934
  ).otherwise(raise_error(snowpark_fn.cast(message, StringType())))
1540
1935
  case _:
1541
- raise AnalysisException(
1936
+ exception = AnalysisException(
1542
1937
  f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `assert_true` requires 1 or 2 parameters but the actual number is {len(snowpark_args)}."
1543
1938
  )
1939
+ attach_custom_error_code(
1940
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
1941
+ )
1942
+ raise exception
1544
1943
  case "atan":
1545
1944
  spark_function_name = f"ATAN({snowpark_arg_names[0]})"
1546
1945
  result_exp = TypedColumn(
@@ -1578,9 +1977,11 @@ def map_unresolved_function(
1578
1977
  # Validate that input is StringType or BinaryType
1579
1978
  input_type = snowpark_typed_args[0].typ
1580
1979
  if not isinstance(input_type, (StringType, BinaryType)):
1581
- raise AnalysisException(
1980
+ exception = AnalysisException(
1582
1981
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "base64({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "BINARY" type, however "{snowpark_arg_names[0]}" has the type "{input_type.simpleString().upper()}".'
1583
1982
  )
1983
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
1984
+ raise exception
1584
1985
 
1585
1986
  base64_encoding_function = snowpark_fn.function("base64_encode")
1586
1987
  result_exp = TypedColumn(
@@ -1613,9 +2014,11 @@ def map_unresolved_function(
1613
2014
  if not isinstance(
1614
2015
  snowpark_typed_args[0].typ, (_IntegralType, BooleanType, NullType)
1615
2016
  ):
1616
- raise AnalysisException(
2017
+ exception = AnalysisException(
1617
2018
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INTEGRAL" or "BOOLEAN") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}"'
1618
2019
  )
2020
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2021
+ raise exception
1619
2022
 
1620
2023
  @cached_udf(
1621
2024
  input_types=[VariantType()],
@@ -1780,9 +2183,11 @@ def map_unresolved_function(
1780
2183
  result_type = BinaryType()
1781
2184
  case "bool_and" | "every":
1782
2185
  if not isinstance(snowpark_typed_args[0].typ, (BooleanType, NullType)):
1783
- raise AnalysisException(
2186
+ exception = AnalysisException(
1784
2187
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the \'BOOLEAN\' type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}".'
1785
2188
  )
2189
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2190
+ raise exception
1786
2191
  bool_and_agg_function = snowpark_fn.function("booland_agg")
1787
2192
  result_exp = TypedColumn(
1788
2193
  bool_and_agg_function(*snowpark_args), lambda: [BooleanType()]
@@ -1790,9 +2195,11 @@ def map_unresolved_function(
1790
2195
 
1791
2196
  case "bool_or" | "some":
1792
2197
  if not isinstance(snowpark_typed_args[0].typ, (BooleanType, NullType)):
1793
- raise AnalysisException(
2198
+ exception = AnalysisException(
1794
2199
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "BOOLEAN" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}".'
1795
2200
  )
2201
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2202
+ raise exception
1796
2203
  bool_or_agg_function = snowpark_fn.function("boolor_agg")
1797
2204
  result_exp = TypedColumn(
1798
2205
  bool_or_agg_function(*snowpark_args), lambda: [BooleanType()]
@@ -1825,9 +2232,11 @@ def map_unresolved_function(
1825
2232
  ),
1826
2233
  snowpark_typed_args[0].typ,
1827
2234
  ):
1828
- raise ArithmeticException(
2235
+ exception = ArithmeticException(
1829
2236
  '[ARITHMETIC_OVERFLOW] Overflow. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
1830
2237
  )
2238
+ attach_custom_error_code(exception, ErrorCodes.ARITHMETIC_ERROR)
2239
+ raise exception
1831
2240
 
1832
2241
  match snowpark_typed_args[0].typ:
1833
2242
  case DecimalType():
@@ -1892,9 +2301,11 @@ def map_unresolved_function(
1892
2301
  if not isinstance(
1893
2302
  snowpark_typed_args[1].typ, IntegerType
1894
2303
  ) and not isinstance(snowpark_typed_args[1].typ, LongType):
1895
- raise AnalysisException(
2304
+ exception = AnalysisException(
1896
2305
  f"The 'scale' parameter of function '{function_name}' needs to be a int literal."
1897
2306
  )
2307
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
2308
+ raise exception
1898
2309
  spark_function_name = (
1899
2310
  f"{fn_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]})"
1900
2311
  )
@@ -1911,9 +2322,13 @@ def map_unresolved_function(
1911
2322
  else:
1912
2323
  result_exp = TypedColumn(result_exp, lambda: [result_type])
1913
2324
  else:
1914
- raise AnalysisException(
2325
+ exception = AnalysisException(
1915
2326
  f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `{function_name}` requires 2 parameters but the actual number is {len(snowpark_args)}."
1916
2327
  )
2328
+ attach_custom_error_code(
2329
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
2330
+ )
2331
+ raise exception
1917
2332
  case "chr" | "char":
1918
2333
  result_exp = snowpark_fn.when(
1919
2334
  (snowpark_args[0] > 256), snowpark_fn.char(snowpark_args[0] % 256)
@@ -1933,11 +2348,6 @@ def map_unresolved_function(
1933
2348
  result_exp = snowpark_fn.coalesce(
1934
2349
  *[col.cast(result_type) for col in snowpark_args]
1935
2350
  )
1936
- case "col":
1937
- # TODO: assign type
1938
- result_exp = snowpark_fn.col(*snowpark_args)
1939
- result_exp = _type_with_typer(result_exp)
1940
- qualifiers = snowpark_args[0].get_qualifiers()
1941
2351
  case "collect_list" | "array_agg":
1942
2352
  # TODO: SNOW-1967177 - Support structured types in array_agg
1943
2353
  result_exp = snowpark_fn.array_agg(
@@ -1949,20 +2359,12 @@ def map_unresolved_function(
1949
2359
  spark_function_name = f"collect_list({snowpark_arg_names[0]})"
1950
2360
  case "collect_set":
1951
2361
  # Convert to a semi-structured type. TODO SNOW-1953065 - Support structured types in array_unique_agg.
1952
- result_exp = snowpark_fn.cast(
1953
- snowpark_fn.array_unique_agg(
1954
- snowpark_typed_args[0].column(to_semi_structure=True)
1955
- ),
1956
- ArrayType(snowpark_typed_args[0].typ),
2362
+ result_exp = snowpark_fn.array_unique_agg(
2363
+ snowpark_typed_args[0].column(to_semi_structure=True)
1957
2364
  )
1958
- result_exp = TypedColumn(
1959
- result_exp, lambda: [ArrayType(snowpark_typed_args[0].typ)]
2365
+ result_exp = _resolve_aggregate_exp(
2366
+ result_exp, ArrayType(snowpark_typed_args[0].typ)
1960
2367
  )
1961
- case "column":
1962
- # TODO: assign type
1963
- result_exp = snowpark_fn.column(*snowpark_args)
1964
- result_exp = _type_with_typer(result_exp)
1965
- qualifiers = snowpark_args[0].get_qualifiers()
1966
2368
  case "concat":
1967
2369
  if len(snowpark_args) == 0:
1968
2370
  result_exp = TypedColumn(snowpark_fn.lit(""), lambda: [StringType()])
@@ -2040,9 +2442,11 @@ def map_unresolved_function(
2040
2442
  ),
2041
2443
  ULongLong(),
2042
2444
  ):
2043
- raise ArithmeticException(
2445
+ exception = ArithmeticException(
2044
2446
  '[ARITHMETIC_OVERFLOW] Overflow in function conv(). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
2045
2447
  )
2448
+ attach_custom_error_code(exception, ErrorCodes.ARITHMETIC_ERROR)
2449
+ raise exception
2046
2450
 
2047
2451
  @cached_udf(
2048
2452
  input_types=[
@@ -2139,7 +2543,7 @@ def map_unresolved_function(
2139
2543
  snowpark_fn.col("*", _is_qualified_name=True)
2140
2544
  )
2141
2545
  else:
2142
- result_exp = snowpark_fn.count(*snowpark_args)
2546
+ result_exp = snowpark_fn.call_function("COUNT", *snowpark_args)
2143
2547
  result_exp = TypedColumn(result_exp, lambda: [LongType()])
2144
2548
  case "count_if":
2145
2549
  result_exp = snowpark_fn.call_function("COUNT_IF", snowpark_args[0])
@@ -2190,9 +2594,11 @@ def map_unresolved_function(
2190
2594
  seed = literal_value
2191
2595
 
2192
2596
  if column is None or eps is None or confidence is None or seed is None:
2193
- raise ValueError(
2597
+ exception = ValueError(
2194
2598
  "The required parameters for count_min_sketch have not been set."
2195
2599
  )
2600
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
2601
+ raise exception
2196
2602
 
2197
2603
  # Calculate depth and width based on eps and confidence
2198
2604
  depth = math.ceil(math.log(1.0 / (1.0 - confidence)))
@@ -2290,10 +2696,12 @@ def map_unresolved_function(
2290
2696
  if not isinstance(col1_type, _NumericType) or not isinstance(
2291
2697
  col2_type, _NumericType
2292
2698
  ):
2293
- raise TypeError(
2699
+ exception = TypeError(
2294
2700
  f"Data type mismatch: covar_pop requires numeric types, "
2295
2701
  f"but got {col1_type} and {col2_type}."
2296
2702
  )
2703
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2704
+ raise exception
2297
2705
  result_exp = snowpark_fn.covar_pop(
2298
2706
  snowpark_args[0],
2299
2707
  snowpark_args[1],
@@ -2305,10 +2713,12 @@ def map_unresolved_function(
2305
2713
  if not isinstance(col1_type, _NumericType) or not isinstance(
2306
2714
  col2_type, _NumericType
2307
2715
  ):
2308
- raise TypeError(
2716
+ exception = TypeError(
2309
2717
  f"Data type mismatch: covar_samp requires numeric types, "
2310
2718
  f"but got {col1_type} and {col2_type}."
2311
2719
  )
2720
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2721
+ raise exception
2312
2722
  result_exp = snowpark_fn.covar_samp(snowpark_args[0], snowpark_args[1])
2313
2723
  result_type = DoubleType()
2314
2724
  case "crc32":
@@ -2317,9 +2727,11 @@ def map_unresolved_function(
2317
2727
  and not isinstance(snowpark_typed_args[0].typ, StringType)
2318
2728
  and not isinstance(snowpark_typed_args[0].typ, VariantType)
2319
2729
  ):
2320
- raise AnalysisException(
2730
+ exception = AnalysisException(
2321
2731
  f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve crc32({snowpark_args[0]}) due to data type mismatch: Input requires the BINARY type, however {snowpark_args[0]} has the type {snowpark_typed_args[0].typ}."
2322
2732
  )
2733
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2734
+ raise exception
2323
2735
 
2324
2736
  # UDF to calculate the unsigned CRC32 value of data in bytes. Returns the CRC32 value
2325
2737
  # as a 32-bit INT, or None if the input is None.
@@ -2369,9 +2781,13 @@ def map_unresolved_function(
2369
2781
  spark_function_name = "current_database()"
2370
2782
  case "current_date" | "curdate":
2371
2783
  if len(snowpark_args) > 0:
2372
- raise AnalysisException(
2784
+ exception = AnalysisException(
2373
2785
  f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `{function_name}` requires 0 parameters but the actual number is {len(snowpark_args)}."
2374
2786
  )
2787
+ attach_custom_error_code(
2788
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
2789
+ )
2790
+ raise exception
2375
2791
  result_exp = TypedColumn(snowpark_fn.current_date(), lambda: [DateType()])
2376
2792
  spark_function_name = "current_date()"
2377
2793
  case "current_timestamp" | "now":
@@ -2387,7 +2803,11 @@ def map_unresolved_function(
2387
2803
  if len(snowpark_args) != 2:
2388
2804
  # SQL supports a 3-argument call that gets mapped to timestamp_add -
2389
2805
  # however, if the first argument is invalid, we end up here.
2390
- raise AnalysisException("date_add takes 2 arguments")
2806
+ exception = AnalysisException("date_add takes 2 arguments")
2807
+ attach_custom_error_code(
2808
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
2809
+ )
2810
+ raise exception
2391
2811
  arg_2 = snowpark_typed_args[1].typ
2392
2812
  if isinstance(arg_2, StringType):
2393
2813
  with suppress(Exception):
@@ -2395,9 +2815,11 @@ def map_unresolved_function(
2395
2815
  arg_2 = IntegerType()
2396
2816
 
2397
2817
  if not isinstance(arg_2, (_IntegralType, NullType)):
2398
- raise AnalysisException(
2818
+ exception = AnalysisException(
2399
2819
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_add({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT" or "NULL") type, however "{snowpark_arg_names[1]}" has the type "{str(arg_2)}".'
2400
2820
  )
2821
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2822
+ raise exception
2401
2823
 
2402
2824
  result_exp = _try_to_cast(
2403
2825
  "try_to_date",
@@ -2412,7 +2834,11 @@ def map_unresolved_function(
2412
2834
  if len(snowpark_args) != 2:
2413
2835
  # SQL supports a 3-argument call that gets mapped to timestamp_diff -
2414
2836
  # however, if the first argument is invalid, we end up here.
2415
- raise AnalysisException("date_diff takes 2 arguments")
2837
+ exception = AnalysisException("date_diff takes 2 arguments")
2838
+ attach_custom_error_code(
2839
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
2840
+ )
2841
+ raise exception
2416
2842
  result_exp = _try_to_cast(
2417
2843
  "try_to_date",
2418
2844
  snowpark_fn.datediff("day", snowpark_args[1], snowpark_args[0]),
@@ -2469,9 +2895,11 @@ def map_unresolved_function(
2469
2895
  arg_2 = IntegerType()
2470
2896
 
2471
2897
  if not isinstance(arg_2, (_IntegralType, NullType)):
2472
- raise AnalysisException(
2898
+ exception = AnalysisException(
2473
2899
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_sub({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT" or "NULL") type, however "{snowpark_arg_names[1]}" has the type "{str(arg_2)}".'
2474
2900
  )
2901
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
2902
+ raise exception
2475
2903
  result_exp = _try_to_cast(
2476
2904
  "try_to_date",
2477
2905
  snowpark_fn.to_date(
@@ -2553,14 +2981,6 @@ def map_unresolved_function(
2553
2981
  )
2554
2982
  result_type = LongType()
2555
2983
  case "date_part" | "datepart" | "extract":
2556
- # Check for interval types and throw NotImplementedError
2557
- if isinstance(
2558
- snowpark_typed_args[1].typ, (YearMonthIntervalType, DayTimeIntervalType)
2559
- ):
2560
- raise NotImplementedError(
2561
- f"{function_name} with interval types is not supported"
2562
- )
2563
-
2564
2984
  field_lit: str | None = unwrap_literal(exp.unresolved_function.arguments[0])
2565
2985
 
2566
2986
  if field_lit is None:
@@ -2605,16 +3025,51 @@ def map_unresolved_function(
2605
3025
  case "div":
2606
3026
  # Only called from SQL, either as `a div b` or `div(a, b)`
2607
3027
  # Convert it into `(a - a % b) / b`.
2608
- result_exp = snowpark_fn.cast(
2609
- (snowpark_args[0] - snowpark_args[0] % snowpark_args[1])
2610
- / snowpark_args[1],
2611
- LongType(),
2612
- )
2613
- if not spark_sql_ansi_enabled:
2614
- result_exp = snowpark_fn.when(
2615
- snowpark_args[1] == 0, snowpark_fn.lit(None)
2616
- ).otherwise(result_exp)
2617
- result_type = LongType()
3028
+ if isinstance(snowpark_typed_args[0].typ, YearMonthIntervalType):
3029
+ if isinstance(snowpark_typed_args[1].typ, YearMonthIntervalType):
3030
+ dividend_total = _calculate_total_months(snowpark_args[0])
3031
+ divisor_total = _calculate_total_months(snowpark_args[1])
3032
+
3033
+ # Handle division by zero interval
3034
+ if not spark_sql_ansi_enabled:
3035
+ result_exp = snowpark_fn.when(
3036
+ divisor_total == 0, snowpark_fn.lit(None)
3037
+ ).otherwise(snowpark_fn.trunc(dividend_total / divisor_total))
3038
+ else:
3039
+ result_exp = snowpark_fn.trunc(dividend_total / divisor_total)
3040
+ result_type = LongType()
3041
+ else:
3042
+ raise AnalysisException(
3043
+ f"""[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} div {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ({snowpark_typed_args[0].typ} and {snowpark_typed_args[1].typ}).;"""
3044
+ )
3045
+ elif isinstance(snowpark_typed_args[0].typ, DayTimeIntervalType):
3046
+ if isinstance(snowpark_typed_args[1].typ, DayTimeIntervalType):
3047
+ dividend_total = _calculate_total_seconds(snowpark_args[0])
3048
+ divisor_total = _calculate_total_seconds(snowpark_args[1])
3049
+
3050
+ # Handle division by zero interval
3051
+ if not spark_sql_ansi_enabled:
3052
+ result_exp = snowpark_fn.when(
3053
+ divisor_total == 0, snowpark_fn.lit(None)
3054
+ ).otherwise(snowpark_fn.trunc(dividend_total / divisor_total))
3055
+ else:
3056
+ result_exp = snowpark_fn.trunc(dividend_total / divisor_total)
3057
+ result_type = LongType()
3058
+ else:
3059
+ raise AnalysisException(
3060
+ f"""[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} div {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ({snowpark_typed_args[0].typ} and {snowpark_typed_args[1].typ}).;"""
3061
+ )
3062
+ else:
3063
+ result_exp = snowpark_fn.cast(
3064
+ (snowpark_args[0] - snowpark_args[0] % snowpark_args[1])
3065
+ / snowpark_args[1],
3066
+ LongType(),
3067
+ )
3068
+ if not spark_sql_ansi_enabled:
3069
+ result_exp = snowpark_fn.when(
3070
+ snowpark_args[1] == 0, snowpark_fn.lit(None)
3071
+ ).otherwise(result_exp)
3072
+ result_type = LongType()
2618
3073
  case "e":
2619
3074
  spark_function_name = "E()"
2620
3075
  result_exp = snowpark_fn.lit(math.e)
@@ -2637,9 +3092,13 @@ def map_unresolved_function(
2637
3092
  result_exp = snowpark_fn.element_at(data, spark_index)
2638
3093
  result_type = typ.value_type
2639
3094
  case _:
2640
- raise SnowparkConnectNotImplementedError(
3095
+ exception = SnowparkConnectNotImplementedError(
2641
3096
  f"Unsupported type {typ} for element_at function"
2642
3097
  )
3098
+ attach_custom_error_code(
3099
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
3100
+ )
3101
+ raise exception
2643
3102
  case "elt":
2644
3103
  n = snowpark_args[0]
2645
3104
  values = snowpark_fn.array_construct(*snowpark_args[1:])
@@ -2725,9 +3184,11 @@ def map_unresolved_function(
2725
3184
  result_type = [input_type.key_type, input_type.value_type]
2726
3185
  else:
2727
3186
  # Throw proper error for types without key_type/value_type attributes
2728
- raise AnalysisException(
3187
+ exception = AnalysisException(
2729
3188
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the ("ARRAY" or "MAP") type, however "{snowpark_arg_names[0]}" has the type "{str(input_type)}".'
2730
3189
  )
3190
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
3191
+ raise exception
2731
3192
  case "expm1":
2732
3193
  spark_function_name = f"EXPM1({snowpark_arg_names[0]})"
2733
3194
  result_exp = snowpark_fn.exp(*snowpark_args) - 1
@@ -2848,9 +3309,11 @@ def map_unresolved_function(
2848
3309
  if not isinstance(
2849
3310
  snowpark_typed_args[1].typ, IntegerType
2850
3311
  ) and not isinstance(snowpark_typed_args[1].typ, LongType):
2851
- raise AnalysisException(
3312
+ exception = AnalysisException(
2852
3313
  "The 'scale' parameter of function 'floor' needs to be a int literal."
2853
3314
  )
3315
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
3316
+ raise exception
2854
3317
  spark_function_name = (
2855
3318
  f"floor({snowpark_arg_names[0]}, {snowpark_arg_names[1]})"
2856
3319
  )
@@ -2867,17 +3330,23 @@ def map_unresolved_function(
2867
3330
  else:
2868
3331
  result_exp = TypedColumn(result_exp, lambda: [result_type])
2869
3332
  else:
2870
- raise AnalysisException(
3333
+ exception = AnalysisException(
2871
3334
  f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `floor` requires 2 parameters but the actual number is {len(snowpark_args)}."
2872
3335
  )
3336
+ attach_custom_error_code(
3337
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
3338
+ )
3339
+ raise exception
2873
3340
  case "format_number":
2874
3341
  col, scale = snowpark_args
2875
3342
  col_type = snowpark_typed_args[0].typ
2876
3343
 
2877
3344
  if not isinstance(col_type, _NumericType):
2878
- raise TypeError(
3345
+ exception = TypeError(
2879
3346
  f'Data type mismatch: Parameter 1 of format_number requires the "NUMERIC" type, however was {col_type}.'
2880
3347
  )
3348
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
3349
+ raise exception
2881
3350
 
2882
3351
  @cached_udf(
2883
3352
  input_types=[StringType(), LongType()],
@@ -2953,7 +3422,7 @@ def map_unresolved_function(
2953
3422
  if options is not None:
2954
3423
  if not isinstance(options, dict):
2955
3424
  raise TypeError(
2956
- "[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
3425
+ "[snowpark_connect::invalid_input] [INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
2957
3426
  )
2958
3427
 
2959
3428
  max_chars_per_column = options.get(
@@ -2970,7 +3439,7 @@ def map_unresolved_function(
2970
3439
  type(v).__name__, type(v).__name__.upper()
2971
3440
  )
2972
3441
  raise TypeError(
2973
- f'[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
3442
+ f'[snowpark_connect::type_mismatch] [INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
2974
3443
  )
2975
3444
 
2976
3445
  csv_data = csv_data.split(sep)
@@ -2993,7 +3462,7 @@ def map_unresolved_function(
2993
3462
  and len(str(csv_data[i])) > max_chars_per_column
2994
3463
  ):
2995
3464
  raise ValueError(
2996
- f"Max chars per column exceeded {max_chars_per_column}: {str(csv_data[i])}"
3465
+ f"[snowpark_connect::invalid_input] Max chars per column exceeded {max_chars_per_column}: {str(csv_data[i])}"
2997
3466
  )
2998
3467
 
2999
3468
  return results
@@ -3004,9 +3473,11 @@ def map_unresolved_function(
3004
3473
  if len(snowpark_arg_names) > 2 and snowpark_arg_names[2].startswith(
3005
3474
  "named_struct"
3006
3475
  ):
3007
- raise TypeError(
3476
+ exception = TypeError(
3008
3477
  "[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
3009
3478
  )
3479
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
3480
+ raise exception
3010
3481
 
3011
3482
  match snowpark_args:
3012
3483
  case [csv_data, schemas]:
@@ -3020,7 +3491,11 @@ def map_unresolved_function(
3020
3491
  snowpark_fn.cast(csv_data, StringType()), schemas, options
3021
3492
  )
3022
3493
  case _:
3023
- raise ValueError("Unrecognized from_csv parameters")
3494
+ exception = ValueError("Unrecognized from_csv parameters")
3495
+ attach_custom_error_code(
3496
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
3497
+ )
3498
+ raise exception
3024
3499
 
3025
3500
  result_exp = snowpark_fn.when(
3026
3501
  snowpark_args[0].is_null(), snowpark_fn.lit(None)
@@ -3029,15 +3504,19 @@ def map_unresolved_function(
3029
3504
  # TODO: support options.
3030
3505
  if len(snowpark_args) > 2:
3031
3506
  if not isinstance(snowpark_typed_args[2].typ, MapType):
3032
- raise AnalysisException(
3507
+ exception = AnalysisException(
3033
3508
  "[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
3034
3509
  )
3510
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
3511
+ raise exception
3035
3512
  if not isinstance(
3036
3513
  snowpark_typed_args[2].typ.key_type, StringType
3037
3514
  ) or not isinstance(snowpark_typed_args[2].typ.value_type, StringType):
3038
- raise AnalysisException(
3515
+ exception = AnalysisException(
3039
3516
  f"""[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "{snowpark_typed_args[2].typ.simpleString().upper()}"."""
3040
3517
  )
3518
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
3519
+ raise exception
3041
3520
 
3042
3521
  spark_function_name = f"from_json({snowpark_arg_names[0]})"
3043
3522
  lit_schema = unwrap_literal(exp.unresolved_function.arguments[1])
@@ -3172,9 +3651,11 @@ def map_unresolved_function(
3172
3651
  input_arg_type: DataType,
3173
3652
  format: str = "yyyy-MM-dd HH:mm:ss",
3174
3653
  ):
3175
- raise AnalysisException(
3654
+ exception = AnalysisException(
3176
3655
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "from_unixtime({input_arg_name}, {format})" due to data type mismatch: Parameter 1 requires the "BIGINT" type, however "{input_arg_name}" has the type "{input_arg_type}"'
3177
3656
  )
3657
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
3658
+ raise exception
3178
3659
 
3179
3660
  # Strip decimal part of the number to ensure proper result after calling snowflake counterparts
3180
3661
  match snowpark_typed_args[0].typ:
@@ -3240,15 +3721,20 @@ def map_unresolved_function(
3240
3721
  timestamp_format,
3241
3722
  )
3242
3723
  except AnalysisException as e:
3724
+ attach_custom_error_code(e, ErrorCodes.INVALID_INPUT)
3243
3725
  raise e
3244
3726
  except Exception:
3245
3727
  # The second argument must either be a string or none. It can't be a column.
3246
3728
  # So if it's anything that isn't a literal, we catch the error and just return NULL
3247
3729
  result_exp = snowpark_fn.lit(None)
3248
3730
  case _:
3249
- raise AnalysisException(
3731
+ exception = AnalysisException(
3250
3732
  f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `from_unixtime` requires [1, 2] parameters but the actual number is {len(snowpark_args)}."
3251
3733
  )
3734
+ attach_custom_error_code(
3735
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
3736
+ )
3737
+ raise exception
3252
3738
  result_type = StringType()
3253
3739
  case "from_utc_timestamp":
3254
3740
  target_tz = _map_from_spark_tz(snowpark_args[1])
@@ -3323,10 +3809,12 @@ def map_unresolved_function(
3323
3809
  for sp_col in snowpark_args
3324
3810
  ]
3325
3811
  if current_grouping_cols != spark_col_args:
3326
- raise AnalysisException(
3812
+ exception = AnalysisException(
3327
3813
  f"[GROUPING_ID_COLUMN_MISMATCH] Columns of grouping_id: {spark_col_args} doesnt match "
3328
3814
  f"Grouping columns: {current_grouping_cols}"
3329
3815
  )
3816
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
3817
+ raise exception
3330
3818
  if function_name == "grouping_id":
3331
3819
  result_exp = snowpark_fn.grouping_id(*snowpark_args)
3332
3820
  else:
@@ -3344,12 +3832,14 @@ def map_unresolved_function(
3344
3832
  if not snowflake_compat and not spark_sql_legacy_allow_hash_on_map_type:
3345
3833
  for arg in snowpark_typed_args:
3346
3834
  if any(isinstance(t, MapType) for t in arg.types):
3347
- raise AnalysisException(
3835
+ exception = AnalysisException(
3348
3836
  '[DATATYPE_MISMATCH.HASH_MAP_TYPE] Cannot resolve "hash(value)" due to data type mismatch: '
3349
3837
  'Input to the function `hash` cannot contain elements of the "MAP" type. '
3350
3838
  'In Spark, same maps may have different hashcode, thus hash expressions are prohibited on "MAP" elements. '
3351
3839
  'To restore previous behavior set "spark.sql.legacy.allowHashOnMapType" to "true".'
3352
3840
  )
3841
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
3842
+ raise exception
3353
3843
  result_exp = snowpark_fn.hash(*snowpark_args)
3354
3844
  result_type = LongType()
3355
3845
  case "hex":
@@ -3388,9 +3878,11 @@ def map_unresolved_function(
3388
3878
  if isinstance(aggregate_input_typ, DecimalType):
3389
3879
  # mimic bug from Spark 3.5.3.
3390
3880
  # In 3.5.5 it's fixed and this exception shouldn't be thrown
3391
- raise ValueError(
3881
+ exception = ValueError(
3392
3882
  "class org.apache.spark.sql.types.Decimal cannot be cast to class java.lang.Number (org.apache.spark.sql.types.Decimal is in unnamed module of loader 'app'; java.lang.Number is in module java.base of loader 'bootstrap')"
3393
3883
  )
3884
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
3885
+ raise exception
3394
3886
 
3395
3887
  histogram_return_type = ArrayType(
3396
3888
  StructType(
@@ -3409,8 +3901,6 @@ def map_unresolved_function(
3409
3901
  result should be either way good enough.
3410
3902
  """
3411
3903
 
3412
- from datetime import date, datetime, time, timedelta
3413
-
3414
3904
  def __init__(self) -> None:
3415
3905
 
3416
3906
  # init the RNG for breaking ties in histogram merging. A fixed seed is specified here
@@ -3554,7 +4044,8 @@ def map_unresolved_function(
3554
4044
  # just increment 'bin'. This is not done now because we don't want to make any
3555
4045
  # assumptions about the range of numeric data being analyzed.
3556
4046
  if bin < self.n_used_bins and self.bins[bin][0] == v:
3557
- self.bins[bin][1] += 1
4047
+ bin_x, bin_y = self.bins[bin]
4048
+ self.bins[bin] = (bin_x, bin_y + 1)
3558
4049
  else:
3559
4050
  self.bins.insert(bin + 1, (v, 1.0))
3560
4051
  self.n_used_bins += 1
@@ -3604,13 +4095,12 @@ def map_unresolved_function(
3604
4095
  input_types=[aggregate_input_typ, IntegerType()],
3605
4096
  )
3606
4097
 
3607
- result_exp = snowpark_fn.cast(
4098
+ result_exp = _resolve_aggregate_exp(
3608
4099
  _histogram_numeric_udaf(
3609
4100
  snowpark_args[0], snowpark_fn.lit(snowpark_args[1])
3610
4101
  ),
3611
4102
  histogram_return_type,
3612
4103
  )
3613
- result_type = histogram_return_type
3614
4104
  case "hll_sketch_agg":
3615
4105
  # check if input type is correct
3616
4106
  if type(snowpark_typed_args[0].typ) not in [
@@ -3620,9 +4110,11 @@ def map_unresolved_function(
3620
4110
  BinaryType,
3621
4111
  ]:
3622
4112
  type_str = snowpark_typed_args[0].typ.simpleString().upper()
3623
- raise AnalysisException(
4113
+ exception = AnalysisException(
3624
4114
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INT" or "BIGINT" or "STRING" or "BINARY") type, however "{snowpark_arg_names[0]}" has the type "{type_str}".'
3625
4115
  )
4116
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
4117
+ raise exception
3626
4118
 
3627
4119
  match snowpark_args:
3628
4120
  case [sketch]:
@@ -3761,10 +4253,12 @@ def map_unresolved_function(
3761
4253
  type_mismatched = True
3762
4254
 
3763
4255
  if type_mismatched:
3764
- raise AnalysisException(
4256
+ exception = AnalysisException(
3765
4257
  f'[DATATYPE_MISMATCH.DATA_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: '
3766
4258
  f'Input to `in` should all be the same type, but it\'s [{", ".join(type_names)}].'
3767
4259
  )
4260
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
4261
+ raise exception
3768
4262
 
3769
4263
  try:
3770
4264
  result_exp = snowpark_args[0].in_(snowpark_args[1:])
@@ -3797,9 +4291,11 @@ def map_unresolved_function(
3797
4291
  except Exception:
3798
4292
  type_str = str(input_type)
3799
4293
 
3800
- raise AnalysisException(
4294
+ exception = AnalysisException(
3801
4295
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "inline({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "ARRAY<STRUCT>" type, however "{snowpark_arg_names[0]}" has the type {type_str}.'
3802
4296
  )
4297
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
4298
+ raise exception
3803
4299
 
3804
4300
  is_outer = function_name == "inline_outer"
3805
4301
 
@@ -3891,11 +4387,13 @@ def map_unresolved_function(
3891
4387
  if arg_type not in allowed_arg_types:
3892
4388
  spark_type = map_snowpark_to_pyspark_types(arg_type)
3893
4389
 
3894
- raise AnalysisException(
4390
+ exception = AnalysisException(
3895
4391
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: """
3896
4392
  f"""Parameter {arg_idx+3} requires the ("BOOLEAN" or "TINYINT" or "SMALLINT" or "INT" or "BIGINT" or "FLOAT" or "DOUBLE" or "STRING") type, """
3897
4393
  f"""however "{snowpark_arg_names[arg_idx+2]}" has the type "{spark_type.simpleString()}"."""
3898
4394
  )
4395
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
4396
+ raise exception
3899
4397
 
3900
4398
  arg_values = snowpark_fn.cast(
3901
4399
  snowpark_fn.array_construct(
@@ -3924,9 +4422,11 @@ def map_unresolved_function(
3924
4422
  if not isinstance(
3925
4423
  snowpark_typed_args[0].typ, StringType
3926
4424
  ) and not isinstance(snowpark_typed_args[0].typ, NullType):
3927
- raise AnalysisException(
4425
+ exception = AnalysisException(
3928
4426
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "json_array_length({",".join(snowpark_arg_names)})" due to data type mismatch: Parameter 1 requires the "STRING" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}"."""
3929
4427
  )
4428
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
4429
+ raise exception
3930
4430
  arr_exp = snowpark_fn.function("TRY_PARSE_JSON")(snowpark_args[0])
3931
4431
  result_exp = snowpark_fn.array_size(arr_exp)
3932
4432
  result_type = LongType()
@@ -3934,9 +4434,11 @@ def map_unresolved_function(
3934
4434
  if not isinstance(
3935
4435
  snowpark_typed_args[0].typ, StringType
3936
4436
  ) and not isinstance(snowpark_typed_args[0].typ, NullType):
3937
- raise AnalysisException(
4437
+ exception = AnalysisException(
3938
4438
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "json_object_keys({",".join(snowpark_arg_names)})" due to data type mismatch: Parameter 1 requires the "STRING" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}"."""
3939
4439
  )
4440
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
4441
+ raise exception
3940
4442
  obj_exp = snowpark_fn.function("TRY_PARSE_JSON")(
3941
4443
  snowpark_args[0], snowpark_fn.lit("d")
3942
4444
  )
@@ -4080,9 +4582,11 @@ def map_unresolved_function(
4080
4582
  else snowpark_fn.builtin("try_to_date")(*snowpark_args)
4081
4583
  )
4082
4584
  case _:
4083
- raise AnalysisException(
4585
+ exception = AnalysisException(
4084
4586
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "last_day({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DATE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0]}".'
4085
4587
  )
4588
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
4589
+ raise exception
4086
4590
 
4087
4591
  result_exp = snowpark_fn.last_day(result_exp)
4088
4592
  result_type = DateType()
@@ -4156,7 +4660,13 @@ def map_unresolved_function(
4156
4660
  snowpark_fn.lit(None),
4157
4661
  ).otherwise(snowpark_fn.lit(-1))
4158
4662
  case _:
4159
- raise ValueError(f"Invalid number of arguments to {function_name}")
4663
+ exception = ValueError(
4664
+ f"Invalid number of arguments to {function_name}"
4665
+ )
4666
+ attach_custom_error_code(
4667
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
4668
+ )
4669
+ raise exception
4160
4670
  result_type = LongType()
4161
4671
  case "like":
4162
4672
  result_exp = snowpark_fn.call_function("like", *snowpark_args)
@@ -4329,6 +4839,17 @@ def map_unresolved_function(
4329
4839
  date_str_exp = snowpark_fn.concat(y, dash, m, dash, d)
4330
4840
  result_exp = snowpark_fn.builtin(snowpark_function)(date_str_exp)
4331
4841
  result_type = DateType()
4842
+ case "make_dt_interval":
4843
+ # Pad argument names for display purposes
4844
+ padded_arg_names = snowpark_arg_names.copy()
4845
+ while len(padded_arg_names) < 3: # days, hours, minutes are integers
4846
+ padded_arg_names.append("0")
4847
+ if len(padded_arg_names) < 4: # seconds can be decimal
4848
+ padded_arg_names.append("0.000000")
4849
+
4850
+ spark_function_name = f"make_dt_interval({', '.join(padded_arg_names)})"
4851
+ result_exp = snowpark_fn.interval_day_time_from_parts(*snowpark_args)
4852
+ result_type = DayTimeIntervalType()
4332
4853
  case "make_timestamp" | "make_timestamp_ltz" | "make_timestamp_ntz":
4333
4854
  y, m, d, h, mins = map(lambda col: col.cast(LongType()), snowpark_args[:5])
4334
4855
  y_abs = snowpark_fn.abs(y)
@@ -4382,6 +4903,15 @@ def map_unresolved_function(
4382
4903
  result_exp = snowpark_fn.when(
4383
4904
  snowpark_fn.is_null(parsed_str_exp), snowpark_fn.lit(None)
4384
4905
  ).otherwise(make_timestamp_res)
4906
+ case "make_ym_interval":
4907
+ # Pad argument names for display purposes
4908
+ padded_arg_names = snowpark_arg_names.copy()
4909
+ while len(padded_arg_names) < 2: # years, months
4910
+ padded_arg_names.append("0")
4911
+
4912
+ spark_function_name = f"make_ym_interval({', '.join(padded_arg_names)})"
4913
+ result_exp = snowpark_fn.interval_year_month_from_parts(*snowpark_args)
4914
+ result_type = YearMonthIntervalType()
4385
4915
  case "map":
4386
4916
  allow_duplicate_keys = (
4387
4917
  global_config.spark_sql_mapKeyDedupPolicy == "LAST_WIN"
@@ -4400,13 +4930,21 @@ def map_unresolved_function(
4400
4930
  )
4401
4931
  result_type = MapType(NullType(), NullType())
4402
4932
  elif (num_args % 2) == 1:
4403
- raise AnalysisException(
4933
+ exception = AnalysisException(
4404
4934
  f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `map` requires 2n (n > 0) parameters but the actual number is {num_args}"
4405
4935
  )
4936
+ attach_custom_error_code(
4937
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
4938
+ )
4939
+ raise exception
4406
4940
  elif key_type is None or isinstance(key_type, NullType):
4407
- raise SparkRuntimeException(
4941
+ exception = SparkRuntimeException(
4408
4942
  "[NULL_MAP_KEY] Cannot use null as map key."
4409
4943
  )
4944
+ attach_custom_error_code(
4945
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
4946
+ )
4947
+ raise exception
4410
4948
  else:
4411
4949
  value_type = value_type if value_type else NullType()
4412
4950
 
@@ -4452,7 +4990,7 @@ def map_unresolved_function(
4452
4990
  for key, value in m.items():
4453
4991
  if key in new_map and not allow_dups:
4454
4992
  raise ValueError(
4455
- DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)
4993
+ f"[snowpark_connect::invalid_operation] {DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)}"
4456
4994
  )
4457
4995
  else:
4458
4996
  new_map[key] = value
@@ -4483,13 +5021,17 @@ def map_unresolved_function(
4483
5021
  result_type = MapType(key_type, value_type)
4484
5022
  case "map_contains_key":
4485
5023
  if isinstance(snowpark_typed_args[0].typ, NullType):
4486
- raise AnalysisException(
5024
+ exception = AnalysisException(
4487
5025
  f"""[DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES] Cannot resolve "map_contains_key({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Input to `map_contains_key` should have been "MAP" followed by a value with same key type, but it's ["VOID", "INT"]."""
4488
5026
  )
5027
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5028
+ raise exception
4489
5029
  if isinstance(snowpark_typed_args[1].typ, NullType):
4490
- raise AnalysisException(
5030
+ exception = AnalysisException(
4491
5031
  f"""[DATATYPE_MISMATCH.NULL_TYPE] Cannot resolve "map_contains_key({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Null typed values cannot be used as arguments of `map_contains_key`."""
4492
5032
  )
5033
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5034
+ raise exception
4493
5035
  args = (
4494
5036
  [snowpark_args[1], snowpark_args[0]]
4495
5037
  if isinstance(snowpark_typed_args[0].typ, MapType)
@@ -4499,23 +5041,29 @@ def map_unresolved_function(
4499
5041
  result_type = BooleanType()
4500
5042
  case "map_entries":
4501
5043
  if not isinstance(snowpark_typed_args[0].typ, MapType):
4502
- raise AnalysisException(
5044
+ exception = AnalysisException(
4503
5045
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "map_entries({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "MAP" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
4504
5046
  )
5047
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5048
+ raise exception
4505
5049
  key_type = snowpark_typed_args[0].typ.key_type
4506
5050
  value_type = snowpark_typed_args[0].typ.value_type
4507
5051
 
4508
5052
  # SNOW-2040715
4509
5053
  def _map_entries(obj: dict):
4510
5054
  if obj is None:
4511
- raise TypeError(f"Expected MapType but received {obj} instead.")
5055
+ raise TypeError(
5056
+ f"[snowpark_connect::type_mismatch] Expected MapType but received {obj} instead."
5057
+ )
4512
5058
  return [{"key": key, "value": value} for key, value in obj.items()]
4513
5059
 
4514
5060
  arg_type = snowpark_typed_args[0].typ
4515
5061
  if not isinstance(arg_type, MapType):
4516
- raise TypeError(
5062
+ exception = TypeError(
4517
5063
  f"map_entries requires a MapType argument, got {arg_type}"
4518
5064
  )
5065
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5066
+ raise exception
4519
5067
 
4520
5068
  map_entries = snowpark_fn.udf(
4521
5069
  _map_entries,
@@ -4545,9 +5093,11 @@ def map_unresolved_function(
4545
5093
  if not isinstance(keys_type, ArrayType) or not isinstance(
4546
5094
  values_type, ArrayType
4547
5095
  ):
4548
- raise TypeError(
5096
+ exception = TypeError(
4549
5097
  f"map_from_arrays requires two arguments of type ArrayType, got {keys_type} and {values_type}"
4550
5098
  )
5099
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5100
+ raise exception
4551
5101
  key_type = keys_type.element_type if keys_type.structured else VariantType()
4552
5102
  value_type = (
4553
5103
  values_type.element_type if values_type.structured else VariantType()
@@ -4562,7 +5112,7 @@ def map_unresolved_function(
4562
5112
  return None
4563
5113
  if len(keys) != len(values):
4564
5114
  raise ValueError(
4565
- "The key array and value array of must have the same length"
5115
+ "[snowpark_connect::internal_error] The key array and value array of must have the same length"
4566
5116
  )
4567
5117
 
4568
5118
  if not allow_dups and len(set(keys)) != len(keys):
@@ -4570,7 +5120,7 @@ def map_unresolved_function(
4570
5120
  for key in keys:
4571
5121
  if key in seen:
4572
5122
  raise ValueError(
4573
- DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)
5123
+ f"[snowpark_connect::invalid_operation] {DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)}"
4574
5124
  )
4575
5125
  seen.add(key)
4576
5126
  # will overwrite the last occurrence if there are duplicates.
@@ -4592,9 +5142,11 @@ def map_unresolved_function(
4592
5142
  result_type = MapType(key_type, value_type)
4593
5143
  case "map_from_entries":
4594
5144
  if not isinstance(snowpark_typed_args[0].typ, ArrayType):
4595
- raise TypeError(
5145
+ exception = TypeError(
4596
5146
  f"map_from_entries requires an argument of type ArrayType, got {snowpark_typed_args[0].typ}"
4597
5147
  )
5148
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5149
+ raise exception
4598
5150
 
4599
5151
  entry_type = snowpark_typed_args[0].typ.element_type
4600
5152
 
@@ -4613,9 +5165,11 @@ def map_unresolved_function(
4613
5165
  value_type = entry_type.fields[1].datatype
4614
5166
  [key_field, value_field] = entry_type.names
4615
5167
  case _:
4616
- raise TypeError(
5168
+ exception = TypeError(
4617
5169
  f"map_from_entries requires an array of StructType, got array of {entry_type}"
4618
5170
  )
5171
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5172
+ raise exception
4619
5173
 
4620
5174
  last_win_dedup = global_config.spark_sql_mapKeyDedupPolicy == "LAST_WIN"
4621
5175
 
@@ -4656,7 +5210,11 @@ def map_unresolved_function(
4656
5210
  case "map_keys":
4657
5211
  arg_type = snowpark_typed_args[0].typ
4658
5212
  if not isinstance(arg_type, MapType):
4659
- raise TypeError(f"map_keys requires a MapType argument, got {arg_type}")
5213
+ exception = TypeError(
5214
+ f"map_keys requires a MapType argument, got {arg_type}"
5215
+ )
5216
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5217
+ raise exception
4660
5218
 
4661
5219
  if arg_type.structured:
4662
5220
  result_exp = snowpark_fn.map_keys(snowpark_args[0])
@@ -4669,9 +5227,11 @@ def map_unresolved_function(
4669
5227
  # technically this could be done with a lateral join, but it's probably not worth the effort
4670
5228
  arg_type = snowpark_typed_args[0].typ
4671
5229
  if not isinstance(arg_type, (MapType, NullType)):
4672
- raise AnalysisException(
5230
+ exception = AnalysisException(
4673
5231
  f"map_values requires a MapType argument, got {arg_type}"
4674
5232
  )
5233
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5234
+ raise exception
4675
5235
 
4676
5236
  def _map_values(obj: dict) -> list:
4677
5237
  if obj is None:
@@ -4770,20 +5330,30 @@ def map_unresolved_function(
4770
5330
  ):
4771
5331
  pass
4772
5332
  elif not isinstance(arg_type, StringType):
4773
- raise AnalysisException(
5333
+ exception = AnalysisException(
4774
5334
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter {i + 1} requires the "STRING" type, however "{arg_name}" has the type "{arg_type.simpleString().upper()}".;"""
4775
5335
  )
5336
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5337
+ raise exception
4776
5338
  elif (
4777
5339
  exp.unresolved_function.arguments[i].WhichOneof("expr_type")
4778
5340
  != "literal"
4779
5341
  ):
4780
- raise AnalysisException(
5342
+ exception = AnalysisException(
4781
5343
  f"""[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "{spark_function_name}" due to data type mismatch: the input {col_arg_names[i]} should be a foldable "STRING" expression; however, got "{arg_name}"."""
4782
5344
  )
5345
+ attach_custom_error_code(
5346
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
5347
+ )
5348
+ raise exception
4783
5349
  elif len(arg_name) != 1:
4784
- raise AnalysisException(
5350
+ exception = AnalysisException(
4785
5351
  f"""[DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE] Cannot resolve "{spark_function_name}" due to data type mismatch: Length of {col_arg_names[i]} should be 1."""
4786
5352
  )
5353
+ attach_custom_error_code(
5354
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
5355
+ )
5356
+ raise exception
4787
5357
 
4788
5358
  random_tag_suffix = "".join(random.sample(string.ascii_uppercase, 6))
4789
5359
  tags = [
@@ -4823,17 +5393,17 @@ def map_unresolved_function(
4823
5393
  # MD5 in Spark only accepts BinaryType or types that can be implicitly cast to it (StringType)
4824
5394
  if not snowflake_compat:
4825
5395
  if not isinstance(snowpark_typed_args[0].typ, (BinaryType, StringType)):
4826
- raise AnalysisException(
5396
+ exception = AnalysisException(
4827
5397
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "md5({snowpark_arg_names[0]})" due to data type mismatch: '
4828
5398
  f'Parameter 1 requires the "BINARY" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
4829
5399
  )
5400
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5401
+ raise exception
4830
5402
  result_exp = snowpark_fn.md5(snowpark_args[0])
4831
5403
  result_type = StringType(32)
4832
5404
  case "median":
4833
- result_exp = snowpark_fn.median(snowpark_args[0])
4834
- # TODO SNOW-2034495: can we resolve the result type?
4835
- result_exp = TypedColumn(
4836
- snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
5405
+ result_exp = _resolve_aggregate_exp(
5406
+ snowpark_fn.median(snowpark_args[0]), DoubleType()
4837
5407
  )
4838
5408
  case "min":
4839
5409
  result_exp = _handle_structured_aggregate_result(
@@ -4924,9 +5494,13 @@ def map_unresolved_function(
4924
5494
  expanded_args.append(arg_typed_column.col)
4925
5495
 
4926
5496
  if len(expanded_args) % 2 != 0:
4927
- raise ValueError(
5497
+ exception = ValueError(
4928
5498
  "Number of arguments must be even (a list of key-value pairs)."
4929
5499
  )
5500
+ attach_custom_error_code(
5501
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
5502
+ )
5503
+ raise exception
4930
5504
 
4931
5505
  # field types for the schema
4932
5506
  field_names = []
@@ -4992,27 +5566,37 @@ def map_unresolved_function(
4992
5566
  spark_function_name = f"(- {snowpark_arg_names[0]})"
4993
5567
  else:
4994
5568
  spark_function_name = f"negative({snowpark_arg_names[0]})"
4995
- if isinstance(arg_type, _NumericType):
5569
+ if (
5570
+ isinstance(arg_type, _NumericType)
5571
+ or isinstance(arg_type, YearMonthIntervalType)
5572
+ or isinstance(arg_type, DayTimeIntervalType)
5573
+ ):
4996
5574
  # Instead of using snowpark_fn.negate which can generate invalid SQL for nested minus operations,
4997
5575
  # use a direct multiplication by -1 which generates cleaner SQL
4998
5576
  result_exp = snowpark_args[0] * snowpark_fn.lit(-1)
4999
5577
  elif isinstance(arg_type, StringType):
5000
5578
  if spark_sql_ansi_enabled:
5001
- raise NumberFormatException(
5579
+ exception = NumberFormatException(
5002
5580
  f'The value \'{snowpark_args[0]}\' of the type {arg_type} cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
5003
5581
  )
5582
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
5583
+ raise exception
5004
5584
  else:
5005
5585
  result_exp = snowpark_fn.lit(None)
5006
5586
  elif isinstance(arg_type, NullType):
5007
5587
  result_exp = snowpark_fn.lit(None)
5008
5588
  else:
5009
- raise AnalysisException(
5589
+ exception = AnalysisException(
5010
5590
  f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve {spark_function_name} due to data type mismatch: "
5011
5591
  f'Parameter 1 requires the ("NUMERIC") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0]}".'
5012
5592
  )
5593
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5594
+ raise exception
5013
5595
  result_type = (
5014
5596
  snowpark_typed_args[0].types
5015
5597
  if isinstance(arg_type, _NumericType)
5598
+ or isinstance(arg_type, YearMonthIntervalType)
5599
+ or isinstance(arg_type, DayTimeIntervalType)
5016
5600
  else DoubleType()
5017
5601
  )
5018
5602
  case "next_day":
@@ -5020,9 +5604,11 @@ def map_unresolved_function(
5020
5604
  date = unwrap_literal(exp.unresolved_function.arguments[1])
5021
5605
  if date is None or date.lower() not in dates:
5022
5606
  if spark_sql_ansi_enabled:
5023
- raise IllegalArgumentException(
5607
+ exception = IllegalArgumentException(
5024
5608
  """Illegal input for day of week. If necessary set "spark.sql.ansi.enabled" to false to bypass this error."""
5025
5609
  )
5610
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
5611
+ raise exception
5026
5612
  else:
5027
5613
  result_exp = snowpark_fn.lit(None)
5028
5614
  else:
@@ -5081,9 +5667,11 @@ def map_unresolved_function(
5081
5667
  )
5082
5668
  case "octet_length":
5083
5669
  if isinstance(snowpark_typed_args[0].typ, (ArrayType, MapType)):
5084
- raise AnalysisException(
5670
+ exception = AnalysisException(
5085
5671
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "octet_length({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the ("STRING" or "BINARY") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}"."""
5086
5672
  )
5673
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5674
+ raise exception
5087
5675
  result_exp = snowpark_fn.octet_length(snowpark_args[0])
5088
5676
  if isinstance(snowpark_typed_args[0].typ, _FractionalType):
5089
5677
  # All decimal types have to have 3 characters at a minimum.
@@ -5215,9 +5803,11 @@ def map_unresolved_function(
5215
5803
  )
5216
5804
 
5217
5805
  if not isinstance(snowpark_typed_args[0].typ, (_NumericType, StringType)):
5218
- raise AnalysisException(
5806
+ exception = AnalysisException(
5219
5807
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, {snowpark_arg_names[2]})" due to data type mismatch: Parameter 1 requires the "NUMERIC" type, however "value" has the type "{snowpark_typed_args[0].typ}".;"""
5220
5808
  )
5809
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
5810
+ raise exception
5221
5811
  elif len(snowpark_args) == 3:
5222
5812
 
5223
5813
  class PercentileUDAF:
@@ -5237,7 +5827,9 @@ def map_unresolved_function(
5237
5827
  def accumulate(self, value, percentages, frequency: int):
5238
5828
 
5239
5829
  if frequency < 0:
5240
- raise ValueError(f"Negative values found in {frequency}")
5830
+ raise ValueError(
5831
+ f"[snowpark_connect::invalid_input] Negative values found in {frequency}"
5832
+ )
5241
5833
 
5242
5834
  if not self.percentages:
5243
5835
  self.percentages = percentages
@@ -5247,7 +5839,7 @@ def map_unresolved_function(
5247
5839
  for percentage in self.percentages
5248
5840
  ):
5249
5841
  raise ValueError(
5250
- "The percentage must be between [0.0, 1.0]"
5842
+ "[snowpark_connect::invalid_input] The percentage must be between [0.0, 1.0]"
5251
5843
  )
5252
5844
 
5253
5845
  if value is None:
@@ -5293,7 +5885,9 @@ def map_unresolved_function(
5293
5885
  Algorithm based on Spark code: https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala#L194
5294
5886
  """
5295
5887
  if not accumulated_counts:
5296
- raise ValueError("accumulated_counts cannot be empty")
5888
+ raise ValueError(
5889
+ "[snowpark_connect::internal_error] accumulated_counts cannot be empty"
5890
+ )
5297
5891
 
5298
5892
  total_count = accumulated_counts[-1][1]
5299
5893
  position = (total_count - 1) * percentile
@@ -5353,7 +5947,7 @@ def map_unresolved_function(
5353
5947
  )
5354
5948
  result_type = DoubleType()
5355
5949
 
5356
- result_exp = snowpark_fn.cast(
5950
+ result_exp = _resolve_aggregate_exp(
5357
5951
  _percentile_udaf(column_value, percentage, snowpark_args[2]),
5358
5952
  result_type,
5359
5953
  )
@@ -5371,26 +5965,45 @@ def map_unresolved_function(
5371
5965
  for arg in array_func.arguments
5372
5966
  ]
5373
5967
  )
5374
- result_exp = snowpark_fn.cast(
5375
- result_exp,
5376
- ArrayType(element_type=FloatType(), contains_null=False),
5377
- )
5378
5968
  result_type = ArrayType(element_type=DoubleType(), contains_null=False)
5969
+ result_exp = _resolve_aggregate_exp(result_exp, result_type)
5379
5970
  spark_function_name = f"{function_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, 1)"
5380
5971
  else:
5381
5972
  result_exp = snowpark_fn.function("percentile_cont")(
5382
5973
  _check_percentile_percentage(exp.unresolved_function.arguments[1])
5383
5974
  ).within_group(column_value)
5384
- result_exp = TypedColumn(
5385
- snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
5386
- )
5975
+ result_exp = _resolve_aggregate_exp(result_exp, DoubleType())
5387
5976
  spark_function_name = f"{function_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, 1)"
5388
5977
  case "percentile_cont" | "percentiledisc":
5389
5978
  if function_name == "percentiledisc":
5390
5979
  function_name = "percentile_disc"
5980
+ order_by_col = snowpark_args[0]
5981
+ args = exp.unresolved_function.arguments
5982
+ if len(args) != 3:
5983
+ exception = AssertionError(
5984
+ f"{function_name} expected 3 args but got {len(args)}"
5985
+ )
5986
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
5987
+ raise exception
5988
+ # literal value 0.0 - 1.0
5989
+ percentage_arg = args[1]
5990
+ sort_direction = args[2].sort_order.direction
5991
+ direction_str = "" # defaultValue
5992
+ if (
5993
+ sort_direction
5994
+ == expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
5995
+ ):
5996
+ direction_str = "DESC"
5997
+
5998
+ # Apply sort direction to the order_by column
5999
+ if direction_str == "DESC":
6000
+ order_by_col_with_direction = order_by_col.desc()
6001
+ else:
6002
+ order_by_col_with_direction = order_by_col.asc()
6003
+
5391
6004
  result_exp = snowpark_fn.function(function_name)(
5392
- _check_percentile_percentage(exp.unresolved_function.arguments[1])
5393
- ).within_group(snowpark_args[0])
6005
+ _check_percentile_percentage(percentage_arg)
6006
+ ).within_group(order_by_col_with_direction)
5394
6007
  result_exp = (
5395
6008
  TypedColumn(
5396
6009
  snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
@@ -5399,7 +6012,8 @@ def map_unresolved_function(
5399
6012
  else TypedColumnWithDeferredCast(result_exp, lambda: [DoubleType()])
5400
6013
  )
5401
6014
 
5402
- spark_function_name = f"{function_name}({unwrap_literal(exp.unresolved_function.arguments[1])}) WITHIN GROUP (ORDER BY {snowpark_arg_names[0]})"
6015
+ direction_part = f" {direction_str}" if direction_str else ""
6016
+ spark_function_name = f"{function_name}({unwrap_literal(percentage_arg)}) WITHIN GROUP (ORDER BY {snowpark_arg_names[0]}{direction_part})"
5403
6017
  case "pi":
5404
6018
  spark_function_name = "PI()"
5405
6019
  result_exp = snowpark_fn.lit(math.pi)
@@ -5428,9 +6042,11 @@ def map_unresolved_function(
5428
6042
  result_exp = snowpark_fn.cast(result_exp, result_type)
5429
6043
  result_exp = TypedColumn(result_exp, lambda: [result_type])
5430
6044
  else:
5431
- raise AnalysisException(
6045
+ exception = AnalysisException(
5432
6046
  f"""pyspark.errors.exceptions.captured.AnalysisException: [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{dividend_type}" and "{divisor_type}")."""
5433
6047
  )
6048
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
6049
+ raise exception
5434
6050
  case "posexplode" | "posexplode_outer":
5435
6051
  input_type = snowpark_typed_args[0].typ
5436
6052
  is_nullable = function_name == "posexplode_outer"
@@ -5510,9 +6126,11 @@ def map_unresolved_function(
5510
6126
  input_type.value_type,
5511
6127
  ]
5512
6128
  else:
5513
- raise TypeError(
6129
+ exception = TypeError(
5514
6130
  f"Data type mismatch: {function_name} requires an array or map input, but got {input_type}."
5515
6131
  )
6132
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
6133
+ raise exception
5516
6134
  result_exp = snowpark_fn.call_table_function(
5517
6135
  posexplode_udtf.name, snowpark_args[0], snowpark_fn.lit(function_name)
5518
6136
  )
@@ -5535,25 +6153,35 @@ def map_unresolved_function(
5535
6153
  case "positive":
5536
6154
  arg_type = snowpark_typed_args[0].typ
5537
6155
  spark_function_name = f"(+ {snowpark_arg_names[0]})"
5538
- if isinstance(arg_type, _NumericType):
6156
+ if (
6157
+ isinstance(arg_type, _NumericType)
6158
+ or isinstance(arg_type, YearMonthIntervalType)
6159
+ or isinstance(arg_type, DayTimeIntervalType)
6160
+ ):
5539
6161
  result_exp = snowpark_args[0]
5540
6162
  elif isinstance(arg_type, StringType):
5541
6163
  if spark_sql_ansi_enabled:
5542
- raise NumberFormatException(
6164
+ exception = NumberFormatException(
5543
6165
  f'The value \'{snowpark_args[0]}\' of the type {arg_type} cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
5544
6166
  )
6167
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
6168
+ raise exception
5545
6169
  else:
5546
6170
  result_exp = snowpark_fn.lit(None)
5547
6171
  elif isinstance(arg_type, NullType):
5548
6172
  result_exp = snowpark_fn.lit(None)
5549
6173
  else:
5550
- raise AnalysisException(
6174
+ exception = AnalysisException(
5551
6175
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "(+ {snowpark_arg_names[0]}" due to data type mismatch: '
5552
6176
  f'Parameter 1 requires the ("NUMERIC") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0]}".'
5553
6177
  )
6178
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
6179
+ raise exception
5554
6180
  result_type = (
5555
6181
  snowpark_typed_args[0].types
5556
6182
  if isinstance(arg_type, _NumericType)
6183
+ or isinstance(arg_type, YearMonthIntervalType)
6184
+ or isinstance(arg_type, DayTimeIntervalType)
5557
6185
  else DoubleType()
5558
6186
  )
5559
6187
 
@@ -5616,9 +6244,11 @@ def map_unresolved_function(
5616
6244
  if not isinstance(
5617
6245
  snowpark_typed_args[0].typ, (IntegerType, LongType, NullType)
5618
6246
  ):
5619
- raise AnalysisException(
6247
+ exception = AnalysisException(
5620
6248
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INT" or "BIGINT") type, however {snowpark_arg_names[0]} has the type "{snowpark_typed_args[0].typ}"""
5621
6249
  )
6250
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
6251
+ raise exception
5622
6252
  result_exp = snowpark_fn.random(unwrap_literal(args[0]))
5623
6253
  else:
5624
6254
  result_exp = snowpark_fn.random()
@@ -6006,9 +6636,11 @@ def map_unresolved_function(
6006
6636
  ),
6007
6637
  snowpark_typed_args[0].typ,
6008
6638
  ):
6009
- raise ArithmeticException(
6639
+ exception = ArithmeticException(
6010
6640
  '[ARITHMETIC_OVERFLOW] Overflow. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
6011
6641
  )
6642
+ attach_custom_error_code(exception, ErrorCodes.ARITHMETIC_ERROR)
6643
+ raise exception
6012
6644
  if len(snowpark_args) == 1:
6013
6645
  spark_function_name = f"{function_name}({snowpark_arg_names[0]}, 0)"
6014
6646
  result_exp = snowpark_fn.round(snowpark_args[0], snowpark_fn.lit(0))
@@ -6063,21 +6695,25 @@ def map_unresolved_function(
6063
6695
  exp.unresolved_function.arguments[0].WhichOneof("expr_type")
6064
6696
  != "literal"
6065
6697
  ):
6066
- raise AnalysisException(
6698
+ exception = AnalysisException(
6067
6699
  "[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "
6068
6700
  f'"schema_of_csv({snowpark_arg_names[0]})" due to data type mismatch: '
6069
6701
  'the input csv should be a foldable "STRING" expression; however, '
6070
6702
  f'got "{snowpark_arg_names[0]}".'
6071
6703
  )
6704
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
6705
+ raise exception
6072
6706
 
6073
6707
  if isinstance(snowpark_typed_args[0].typ, StringType):
6074
6708
  if exp.unresolved_function.arguments[0].literal.string == "":
6075
- raise AnalysisException(
6709
+ exception = AnalysisException(
6076
6710
  "[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "
6077
6711
  f'"schema_of_csv({snowpark_arg_names[0]})" due to data type mismatch: '
6078
6712
  'the input csv should be a foldable "STRING" expression; however, '
6079
6713
  f'got "{snowpark_arg_names[0]}".'
6080
6714
  )
6715
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
6716
+ raise exception
6081
6717
 
6082
6718
  snowpark_args = [
6083
6719
  typed_arg.column(to_semi_structure=True)
@@ -6132,7 +6768,11 @@ def map_unresolved_function(
6132
6768
  case [csv_data, options]:
6133
6769
  result_exp = _schema_of_csv(csv_data, options)
6134
6770
  case _:
6135
- raise ValueError("Unrecognized from_csv parameters")
6771
+ exception = ValueError("Unrecognized from_csv parameters")
6772
+ attach_custom_error_code(
6773
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
6774
+ )
6775
+ raise exception
6136
6776
  result_type = StringType()
6137
6777
  case "schema_of_json":
6138
6778
 
@@ -6250,15 +6890,19 @@ def map_unresolved_function(
6250
6890
  obj = json.loads(json_str)
6251
6891
  return _infer_pyspark_type(obj)
6252
6892
  except json.JSONDecodeError as e:
6253
- raise ValueError(f"Invalid JSON: {e}")
6893
+ raise ValueError(
6894
+ f"[snowpark_connect::invalid_input] Invalid JSON: {e}"
6895
+ )
6254
6896
 
6255
6897
  if (
6256
6898
  exp.unresolved_function.arguments[0].WhichOneof("expr_type")
6257
6899
  != "literal"
6258
6900
  ):
6259
- raise AnalysisException(
6901
+ exception = AnalysisException(
6260
6902
  f"""[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "schema_of_json({",".join(snowpark_arg_names)})" due to data type mismatch: the input json should be a foldable "STRING" expression; however, got "{",".join(snowpark_arg_names)}"."""
6261
6903
  )
6904
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
6905
+ raise exception
6262
6906
  result_exp = _infer_schema(snowpark_args[0])
6263
6907
  result_type = StringType()
6264
6908
  case "sec":
@@ -6299,12 +6943,14 @@ def map_unresolved_function(
6299
6943
  not isinstance(snowpark_typed_args[0].typ, _IntegralType)
6300
6944
  or not isinstance(snowpark_typed_args[1].typ, _IntegralType)
6301
6945
  ):
6302
- raise AnalysisException(
6946
+ exception = AnalysisException(
6303
6947
  f"""[DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES] Cannot resolve "sequence({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: `sequence` uses the wrong parameter type. The parameter type must conform to:
6304
6948
  1. The start and stop expressions must resolve to the same type.
6305
6949
  2. Otherwise, if start and stop expressions resolve to the "INTEGRAL" type, then the step expression must resolve to the same type.
6306
6950
  """
6307
6951
  )
6952
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
6953
+ raise exception
6308
6954
  result_exp = snowpark_fn.cast(
6309
6955
  snowpark_fn.sequence(*snowpark_args),
6310
6956
  ArrayType(LongType(), contains_null=False),
@@ -6322,15 +6968,21 @@ def map_unresolved_function(
6322
6968
  num_bits = unwrap_literal(exp.unresolved_function.arguments[1])
6323
6969
  if num_bits is None:
6324
6970
  if spark_sql_ansi_enabled:
6325
- raise NumberFormatException(
6971
+ exception = NumberFormatException(
6326
6972
  f"""[CAST_INVALID_INPUT] The value {snowpark_arg_names[0]} of the type "{snowpark_typed_args[0].typ}" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type."""
6327
6973
  )
6974
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
6975
+ raise exception
6328
6976
  result_exp = snowpark_fn.lit(None)
6329
6977
  result_type = StringType()
6330
6978
  elif num_bits not in bit_values:
6331
- raise IllegalArgumentException(
6979
+ exception = IllegalArgumentException(
6332
6980
  f"""requirement failed: numBits {num_bits} is not in the permitted values (0, 224, 256, 384, 512)"""
6333
6981
  )
6982
+ attach_custom_error_code(
6983
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
6984
+ )
6985
+ raise exception
6334
6986
  else:
6335
6987
  # 0 equivalent to 256 in PySpark, but is not allowed in Snowpark
6336
6988
  num_bits = 256 if num_bits == 0 else num_bits
@@ -6400,11 +7052,43 @@ def map_unresolved_function(
6400
7052
  fn_name = "sign"
6401
7053
 
6402
7054
  spark_function_name = f"{fn_name}({snowpark_arg_names[0]})"
6403
- result_exp = snowpark_fn.when(
6404
- snowpark_args[0] == NAN, snowpark_fn.lit(NAN)
6405
- ).otherwise(
6406
- snowpark_fn.cast(snowpark_fn.sign(snowpark_args[0]), DoubleType())
6407
- )
7055
+
7056
+ if isinstance(snowpark_typed_args[0].typ, YearMonthIntervalType):
7057
+ # Use SQL expression for zero year-month interval comparison
7058
+ result_exp = (
7059
+ snowpark_fn.when(
7060
+ snowpark_args[0]
7061
+ > snowpark_fn.sql_expr("INTERVAL '0-0' YEAR TO MONTH"),
7062
+ snowpark_fn.lit(1.0),
7063
+ )
7064
+ .when(
7065
+ snowpark_args[0]
7066
+ < snowpark_fn.sql_expr("INTERVAL '0-0' YEAR TO MONTH"),
7067
+ snowpark_fn.lit(-1.0),
7068
+ )
7069
+ .otherwise(snowpark_fn.lit(0.0))
7070
+ )
7071
+ elif isinstance(snowpark_typed_args[0].typ, DayTimeIntervalType):
7072
+ # Use SQL expression for zero day-time interval comparison
7073
+ result_exp = (
7074
+ snowpark_fn.when(
7075
+ snowpark_args[0]
7076
+ > snowpark_fn.sql_expr("INTERVAL '0 0:0:0' DAY TO SECOND"),
7077
+ snowpark_fn.lit(1.0),
7078
+ )
7079
+ .when(
7080
+ snowpark_args[0]
7081
+ < snowpark_fn.sql_expr("INTERVAL '0 0:0:0' DAY TO SECOND"),
7082
+ snowpark_fn.lit(-1.0),
7083
+ )
7084
+ .otherwise(snowpark_fn.lit(0.0))
7085
+ )
7086
+ else:
7087
+ result_exp = snowpark_fn.when(
7088
+ snowpark_args[0] == NAN, snowpark_fn.lit(NAN)
7089
+ ).otherwise(
7090
+ snowpark_fn.cast(snowpark_fn.sign(snowpark_args[0]), DoubleType())
7091
+ )
6408
7092
  result_type = DoubleType()
6409
7093
  case "sin":
6410
7094
  spark_function_name = f"SIN({snowpark_arg_names[0]})"
@@ -6504,9 +7188,11 @@ def map_unresolved_function(
6504
7188
  if len(snowpark_args) == 2 and not isinstance(
6505
7189
  snowpark_typed_args[1].typ, BooleanType
6506
7190
  ):
6507
- raise AnalysisException(
7191
+ exception = AnalysisException(
6508
7192
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the "BOOLEAN" type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ.simpleString().upper()}"'
6509
7193
  )
7194
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7195
+ raise exception
6510
7196
  sort_asc = (
6511
7197
  unwrap_literal(exp.unresolved_function.arguments[1])
6512
7198
  if len(snowpark_args) == 2
@@ -6557,10 +7243,10 @@ def map_unresolved_function(
6557
7243
  import re
6558
7244
 
6559
7245
  try:
6560
- re.compile(pattern)
7246
+ compiled_pattern = re.compile(pattern)
6561
7247
  except re.error:
6562
7248
  raise ValueError(
6563
- f"Failed to split string, provided pattern: {pattern} is invalid"
7249
+ f"[snowpark_connect::invalid_input] Failed to split string, provided pattern: {pattern} is invalid"
6564
7250
  )
6565
7251
 
6566
7252
  if limit == 1:
@@ -6579,7 +7265,7 @@ def map_unresolved_function(
6579
7265
 
6580
7266
  match pattern:
6581
7267
  case "|":
6582
- split_result = re.split(pattern, input, 0)
7268
+ split_result = compiled_pattern.split(input, 0)
6583
7269
  input_limit = limit + 1 if limit > 0 else len(split_result)
6584
7270
  return (
6585
7271
  split_result
@@ -6591,7 +7277,7 @@ def map_unresolved_function(
6591
7277
  case "^":
6592
7278
  return [input]
6593
7279
  case _:
6594
- return re.split(pattern, input, maxsplit)
7280
+ return compiled_pattern.split(input, maxsplit)
6595
7281
 
6596
7282
  def split_string(str_: Column, pattern: Column, limit: Column):
6597
7283
  native_split = _split(str_, pattern, limit)
@@ -6639,9 +7325,24 @@ def map_unresolved_function(
6639
7325
  case [str_, pattern, limit]: # noqa: F841
6640
7326
  result_exp = split_string(str_, pattern, limit)
6641
7327
  case _:
6642
- raise ValueError(f"Invalid number of arguments to {function_name}")
7328
+ exception = ValueError(
7329
+ f"Invalid number of arguments to {function_name}"
7330
+ )
7331
+ attach_custom_error_code(
7332
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
7333
+ )
7334
+ raise exception
6643
7335
  case "split_part":
6644
- result_exp = snowpark_fn.call_function("split_part", *snowpark_args)
7336
+ # Check for index 0 and throw error to match PySpark behavior
7337
+ raise_error = _raise_error_helper(StringType(), SparkRuntimeException)
7338
+ result_exp = snowpark_fn.when(
7339
+ snowpark_args[2] == 0,
7340
+ raise_error(
7341
+ snowpark_fn.lit(
7342
+ "[INVALID_INDEX_OF_ZERO] The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)."
7343
+ )
7344
+ ),
7345
+ ).otherwise(snowpark_fn.call_function("split_part", *snowpark_args))
6645
7346
  result_type = StringType()
6646
7347
  case "sqrt":
6647
7348
  spark_function_name = f"SQRT({snowpark_arg_names[0]})"
@@ -6649,9 +7350,11 @@ def map_unresolved_function(
6649
7350
  if isinstance(snowpark_typed_args[0].typ, StringType):
6650
7351
  sqrt_arg = snowpark_fn.try_cast(snowpark_args[0], DoubleType())
6651
7352
  elif not isinstance(snowpark_typed_args[0].typ, _NumericType):
6652
- raise AnalysisException(
7353
+ exception = AnalysisException(
6653
7354
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "SQRT({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}"."""
6654
7355
  )
7356
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7357
+ raise exception
6655
7358
  result_exp = (
6656
7359
  snowpark_fn.when(sqrt_arg < 0, NAN)
6657
7360
  .when(sqrt_arg.isNull(), snowpark_fn.lit(None))
@@ -6663,16 +7366,22 @@ def map_unresolved_function(
6663
7366
  # will depend on the input specified. All arguments in the input (apart from the first one that specifies
6664
7367
  # `num_rows`) must be the same type.
6665
7368
  if len(exp.unresolved_function.arguments) <= 1:
6666
- raise AnalysisException(
7369
+ exception = AnalysisException(
6667
7370
  f"""
6668
7371
  [WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `stack` requires > 1 parameters but the actual number is {len(exp.unresolved_function.arguments)}.
6669
7372
  """
6670
7373
  )
7374
+ attach_custom_error_code(
7375
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
7376
+ )
7377
+ raise exception
6671
7378
  num_rows = unwrap_literal(exp.unresolved_function.arguments[0])
6672
7379
  if not isinstance(snowpark_typed_args[0].typ, IntegerType):
6673
- raise AnalysisException(
7380
+ exception = AnalysisException(
6674
7381
  f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "INT" type, however "{num_rows}" has the type "{snowpark_typed_args[0].typ}"."""
6675
7382
  )
7383
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7384
+ raise exception
6676
7385
 
6677
7386
  num_arguments = len(snowpark_args) - 1
6678
7387
  num_cols = math.ceil(num_arguments / num_rows)
@@ -6683,9 +7392,11 @@ def map_unresolved_function(
6683
7392
  if arg != spark_col_types[i % num_cols] and not isinstance(
6684
7393
  arg, NullType
6685
7394
  ):
6686
- raise AnalysisException(
7395
+ exception = AnalysisException(
6687
7396
  f"""[DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES] Cannot resolve "stack({snowpark_arg_names[0]})" due to data type mismatch: The data type of the column ({snowpark_arg_names[0]}) do not have the same type."""
6688
7397
  )
7398
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7399
+ raise exception
6689
7400
  if isinstance(arg, NullType):
6690
7401
  spark_col_types[i] = VariantType()
6691
7402
  snowpark_args[i + 1] = snowpark_fn.cast(
@@ -6742,9 +7453,11 @@ def map_unresolved_function(
6742
7453
  snowpark_args[0], DoubleType()
6743
7454
  )
6744
7455
  else:
6745
- raise AnalysisException(
7456
+ exception = AnalysisException(
6746
7457
  f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "stddev({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
6747
7458
  )
7459
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7460
+ raise exception
6748
7461
  result_exp = snowpark_fn.stddev(stddev_argument)
6749
7462
  result_type = DoubleType()
6750
7463
  case "stddev_pop":
@@ -6755,9 +7468,11 @@ def map_unresolved_function(
6755
7468
  snowpark_args[0], DoubleType()
6756
7469
  )
6757
7470
  else:
6758
- raise AnalysisException(
7471
+ exception = AnalysisException(
6759
7472
  f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "stddev_pop({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
6760
7473
  )
7474
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7475
+ raise exception
6761
7476
  result_exp = snowpark_fn.stddev_pop(stddev_pop_argument)
6762
7477
  result_type = DoubleType()
6763
7478
  case "stddev_samp" | "std":
@@ -6768,9 +7483,11 @@ def map_unresolved_function(
6768
7483
  snowpark_args[0], DoubleType()
6769
7484
  )
6770
7485
  else:
6771
- raise AnalysisException(
7486
+ exception = AnalysisException(
6772
7487
  f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "stddev_samp({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
6773
7488
  )
7489
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7490
+ raise exception
6774
7491
  result_exp = snowpark_fn.stddev_samp(stddev_samp_argument)
6775
7492
  result_type = DoubleType()
6776
7493
  case "str_to_map":
@@ -6818,7 +7535,7 @@ def map_unresolved_function(
6818
7535
 
6819
7536
  if key in result_map and not allow_dups:
6820
7537
  raise ValueError(
6821
- DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)
7538
+ f"[snowpark_connect::invalid_input] {DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)}"
6822
7539
  )
6823
7540
 
6824
7541
  result_map[key] = val
@@ -6957,9 +7674,11 @@ def map_unresolved_function(
6957
7674
  result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
6958
7675
  case "timestamp_millis":
6959
7676
  if not isinstance(snowpark_typed_args[0].typ, _IntegralType):
6960
- raise AnalysisException(
7677
+ exception = AnalysisException(
6961
7678
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "timestamp_millis({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "INTEGRAL" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
6962
7679
  )
7680
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7681
+ raise exception
6963
7682
  result_exp = snowpark_fn.cast(
6964
7683
  snowpark_fn.to_timestamp(snowpark_args[0] * 1_000, 6),
6965
7684
  TimestampType(snowpark.types.TimestampTimeZone.LTZ),
@@ -6970,9 +7689,11 @@ def map_unresolved_function(
6970
7689
  # even though the documentation explicitly says that it does.
6971
7690
  # As a workaround, use integer milliseconds instead of fractional seconds.
6972
7691
  if not isinstance(snowpark_typed_args[0].typ, _NumericType):
6973
- raise AnalysisException(
7692
+ exception = AnalysisException(
6974
7693
  f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "NUMERIC" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
6975
7694
  )
7695
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
7696
+ raise exception
6976
7697
  result_exp = snowpark_fn.cast(
6977
7698
  snowpark_fn.to_timestamp(
6978
7699
  snowpark_fn.cast(snowpark_args[0] * 1_000_000, LongType()), 6
@@ -7145,7 +7866,7 @@ def map_unresolved_function(
7145
7866
  if options is not None:
7146
7867
  if not isinstance(options, dict):
7147
7868
  raise TypeError(
7148
- "[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
7869
+ "[snowpark_connect::invalid_input] [INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
7149
7870
  )
7150
7871
 
7151
7872
  python_to_snowflake_type = {
@@ -7164,7 +7885,7 @@ def map_unresolved_function(
7164
7885
  type(v).__name__, type(v).__name__.upper()
7165
7886
  )
7166
7887
  raise TypeError(
7167
- f'[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
7888
+ f'[snowpark_connect::type_mismatch] [INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
7168
7889
  )
7169
7890
 
7170
7891
  options = options or {}
@@ -7302,7 +8023,7 @@ def map_unresolved_function(
7302
8023
  result.append(escape_and_quote_string(str_value))
7303
8024
  case _:
7304
8025
  raise ValueError(
7305
- f"Unable to determine type for value: {python_type}"
8026
+ f"[snowpark_connect::type_mismatch] Unable to determine type for value: {python_type}"
7306
8027
  )
7307
8028
  elif isinstance(value, str):
7308
8029
  strip_value = (
@@ -7335,9 +8056,11 @@ def map_unresolved_function(
7335
8056
  if len(snowpark_arg_names) > 1 and snowpark_arg_names[1].startswith(
7336
8057
  "named_struct"
7337
8058
  ):
7338
- raise TypeError(
8059
+ exception = TypeError(
7339
8060
  "[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
7340
8061
  )
8062
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
8063
+ raise exception
7341
8064
 
7342
8065
  def get_snowpark_type_name(snowpark_type: DataType) -> str:
7343
8066
  return (
@@ -7370,7 +8093,11 @@ def map_unresolved_function(
7370
8093
  case [csv_data, options]:
7371
8094
  result_exp = _to_csv(csv_data, field_names, field_types, options)
7372
8095
  case _:
7373
- raise ValueError("Unrecognized from_csv parameters")
8096
+ exception = ValueError("Unrecognized from_csv parameters")
8097
+ attach_custom_error_code(
8098
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
8099
+ )
8100
+ raise exception
7374
8101
  result_type = StringType()
7375
8102
  case "to_date":
7376
8103
  if not spark_sql_ansi_enabled:
@@ -7397,23 +8124,29 @@ def map_unresolved_function(
7397
8124
  case NullType():
7398
8125
  result_exp = snowpark_fn.lit(None)
7399
8126
  case _:
7400
- raise AnalysisException(
8127
+ exception = AnalysisException(
7401
8128
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "to_date({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the ("STRING" or "DATE" or "TIMESTAMP" or "TIMESTAMP_NTZ") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
7402
8129
  )
8130
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
8131
+ raise exception
7403
8132
 
7404
8133
  result_type = DateType()
7405
8134
  case "to_json":
7406
8135
  if len(snowpark_args) > 1:
7407
8136
  if not isinstance(snowpark_typed_args[1].typ, MapType):
7408
- raise AnalysisException(
8137
+ exception = AnalysisException(
7409
8138
  "[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
7410
8139
  )
8140
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
8141
+ raise exception
7411
8142
  if not isinstance(
7412
8143
  snowpark_typed_args[1].typ.key_type, StringType
7413
8144
  ) or not isinstance(snowpark_typed_args[1].typ.value_type, StringType):
7414
- raise AnalysisException(
8145
+ exception = AnalysisException(
7415
8146
  f"""[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "{snowpark_typed_args[1].typ.simpleString().upper()}"."""
7416
8147
  )
8148
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
8149
+ raise exception
7417
8150
  result_exp = snowpark_fn.to_json(snowpark_fn.to_variant(snowpark_args[0]))
7418
8151
  result_type = StringType()
7419
8152
  case "to_number":
@@ -7468,7 +8201,13 @@ def map_unresolved_function(
7468
8201
  )
7469
8202
  )
7470
8203
  case _:
7471
- raise ValueError(f"Invalid number of arguments to {function_name}")
8204
+ exception = ValueError(
8205
+ f"Invalid number of arguments to {function_name}"
8206
+ )
8207
+ attach_custom_error_code(
8208
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
8209
+ )
8210
+ raise exception
7472
8211
  result_exp = snowpark_fn.cast(result_exp, get_timestamp_type())
7473
8212
  result_type = get_timestamp_type()
7474
8213
 
@@ -7486,7 +8225,13 @@ def map_unresolved_function(
7486
8225
  ),
7487
8226
  )
7488
8227
  case _:
7489
- raise ValueError(f"Invalid number of arguments to {function_name}")
8228
+ exception = ValueError(
8229
+ f"Invalid number of arguments to {function_name}"
8230
+ )
8231
+ attach_custom_error_code(
8232
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
8233
+ )
8234
+ raise exception
7490
8235
  result_exp = snowpark_fn.cast(
7491
8236
  result_exp, TimestampType(snowpark.types.TimestampTimeZone.LTZ)
7492
8237
  )
@@ -7511,7 +8256,13 @@ def map_unresolved_function(
7511
8256
  ),
7512
8257
  )
7513
8258
  case _:
7514
- raise ValueError(f"Invalid number of arguments to {function_name}")
8259
+ exception = ValueError(
8260
+ f"Invalid number of arguments to {function_name}"
8261
+ )
8262
+ attach_custom_error_code(
8263
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
8264
+ )
8265
+ raise exception
7515
8266
  result_exp = snowpark_fn.cast(
7516
8267
  result_exp, TimestampType(snowpark.types.TimestampTimeZone.NTZ)
7517
8268
  )
@@ -7553,9 +8304,13 @@ def map_unresolved_function(
7553
8304
  snowpark_fn.lit("YYYY-MM-DD HH24:MI:SS"),
7554
8305
  )
7555
8306
  case _:
7556
- raise SnowparkConnectNotImplementedError(
8307
+ exception = SnowparkConnectNotImplementedError(
7557
8308
  "to_unix_timestamp expected 1 or 2 arguments."
7558
8309
  )
8310
+ attach_custom_error_code(
8311
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
8312
+ )
8313
+ raise exception
7559
8314
 
7560
8315
  if len(exp.unresolved_function.arguments) == 1:
7561
8316
  spark_function_name = f"to_unix_timestamp({snowpark_arg_names[0]}, {'yyyy-MM-dd HH:mm:ss'})"
@@ -7617,14 +8372,123 @@ def map_unresolved_function(
7617
8372
  )
7618
8373
  result_type = DateType()
7619
8374
  case "try_add":
7620
- # Check for interval types and throw NotImplementedError
7621
- for arg in snowpark_typed_args:
7622
- if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
7623
- raise NotImplementedError(
7624
- "try_add with interval types is not supported"
8375
+ # Handle interval arithmetic with overflow detection
8376
+ match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
8377
+ case (DateType(), t) | (t, DateType()) if isinstance(
8378
+ t, YearMonthIntervalType
8379
+ ):
8380
+ result_type = DateType()
8381
+ result_exp = snowpark_args[0] + snowpark_args[1]
8382
+ case (DateType(), t) | (t, DateType()) if isinstance(
8383
+ t, DayTimeIntervalType
8384
+ ):
8385
+ result_type = TimestampType()
8386
+ result_exp = snowpark_args[0] + snowpark_args[1]
8387
+ case (TimestampType(), t) | (t, TimestampType()) if isinstance(
8388
+ t, (DayTimeIntervalType, YearMonthIntervalType)
8389
+ ):
8390
+ result_type = (
8391
+ snowpark_typed_args[0].typ
8392
+ if isinstance(snowpark_typed_args[0].typ, TimestampType)
8393
+ else snowpark_typed_args[1].typ
7625
8394
  )
7626
- result_exp = _try_arithmetic_helper(snowpark_typed_args, snowpark_args, 0)
7627
- result_exp = _type_with_typer(result_exp)
8395
+ result_exp = snowpark_args[0] + snowpark_args[1]
8396
+ case (t1, t2) if (
8397
+ isinstance(t1, YearMonthIntervalType)
8398
+ and isinstance(t2, (_NumericType, StringType))
8399
+ ) or (
8400
+ isinstance(t2, YearMonthIntervalType)
8401
+ and isinstance(t1, (_NumericType, StringType))
8402
+ ):
8403
+ # YearMonthInterval + numeric/string or numeric/string + YearMonthInterval should throw error
8404
+ exception = AnalysisException(
8405
+ f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "try_add({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
8406
+ )
8407
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
8408
+ raise exception
8409
+ case (t1, t2) if isinstance(t1, YearMonthIntervalType) and isinstance(
8410
+ t2, YearMonthIntervalType
8411
+ ):
8412
+ result_type = YearMonthIntervalType(
8413
+ min(t1.start_field, t2.start_field),
8414
+ max(t1.end_field, t2.end_field),
8415
+ )
8416
+
8417
+ # For year-month intervals, throw ArithmeticException if operands reach 10+ digits OR result exceeds 9 digits
8418
+ total1 = _calculate_total_months(snowpark_args[0])
8419
+ total2 = _calculate_total_months(snowpark_args[1])
8420
+ ten_digit_limit = snowpark_fn.lit(MAX_10_DIGIT_LIMIT)
8421
+
8422
+ precision_violation = (
8423
+ # Check if either operand already reaches 10 digits (parsing limit)
8424
+ (snowpark_fn.abs(total1) >= ten_digit_limit)
8425
+ | (snowpark_fn.abs(total2) >= ten_digit_limit)
8426
+ | (
8427
+ (total1 > 0)
8428
+ & (total2 > 0)
8429
+ & (total1 >= ten_digit_limit - total2)
8430
+ )
8431
+ | (
8432
+ (total1 < 0)
8433
+ & (total2 < 0)
8434
+ & (total1 <= -ten_digit_limit - total2)
8435
+ )
8436
+ )
8437
+
8438
+ raise_error = _raise_error_helper(result_type, ArithmeticException)
8439
+ result_exp = snowpark_fn.when(
8440
+ precision_violation,
8441
+ raise_error(
8442
+ snowpark_fn.lit(
8443
+ "Year-Month Interval result exceeds Snowflake interval precision limit"
8444
+ )
8445
+ ),
8446
+ ).otherwise(snowpark_args[0] + snowpark_args[1])
8447
+ case (t1, t2) if isinstance(t1, DayTimeIntervalType) and isinstance(
8448
+ t2, DayTimeIntervalType
8449
+ ):
8450
+ result_type = DayTimeIntervalType(
8451
+ min(t1.start_field, t2.start_field),
8452
+ max(t1.end_field, t2.end_field),
8453
+ )
8454
+ # Check for Snowflake's day limit (106751991 days is the cutoff)
8455
+ days1 = snowpark_fn.date_part("day", snowpark_args[0])
8456
+ days2 = snowpark_fn.date_part("day", snowpark_args[1])
8457
+ max_days = snowpark_fn.lit(
8458
+ MAX_DAY_TIME_DAYS
8459
+ ) # Snowflake's actual limit
8460
+ min_days = snowpark_fn.lit(-MAX_DAY_TIME_DAYS)
8461
+
8462
+ # Check if either operand exceeds the day limit - throw error like Spark does
8463
+ operand_limit_violation = (snowpark_fn.abs(days1) > max_days) | (
8464
+ snowpark_fn.abs(days2) > max_days
8465
+ )
8466
+
8467
+ # Check if result would exceed day limit (but operands are valid) - return NULL
8468
+ result_overflow = (
8469
+ # Check if result would exceed day limit (positive overflow)
8470
+ ((days1 > 0) & (days2 > 0) & (days1 > max_days - days2))
8471
+ | ((days1 < 0) & (days2 < 0) & (days1 < min_days - days2))
8472
+ )
8473
+
8474
+ raise_error = _raise_error_helper(result_type, ArithmeticException)
8475
+ result_exp = (
8476
+ snowpark_fn.when(
8477
+ operand_limit_violation,
8478
+ raise_error(
8479
+ snowpark_fn.lit(
8480
+ "Day-Time Interval operand exceeds Snowflake interval precision limit"
8481
+ )
8482
+ ),
8483
+ )
8484
+ .when(result_overflow, snowpark_fn.lit(None))
8485
+ .otherwise(snowpark_args[0] + snowpark_args[1])
8486
+ )
8487
+ case _:
8488
+ result_exp = _try_arithmetic_helper(
8489
+ snowpark_typed_args, snowpark_args, 0
8490
+ )
8491
+ result_exp = _type_with_typer(result_exp)
7628
8492
  case "try_aes_decrypt":
7629
8493
  result_exp = _aes_helper(
7630
8494
  "TRY_DECRYPT",
@@ -7676,13 +8540,49 @@ def map_unresolved_function(
7676
8540
  DoubleType(), cleaned, calculating_avg=True
7677
8541
  )
7678
8542
  case "try_divide":
7679
- # Check for interval types and throw NotImplementedError
7680
- for arg in snowpark_typed_args:
7681
- if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
7682
- raise NotImplementedError(
7683
- "try_divide with interval types is not supported"
7684
- )
8543
+ # Handle interval division with overflow detection
7685
8544
  match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
8545
+ case (t1, t2) if isinstance(t1, _AnsiIntervalType) and isinstance(
8546
+ t2, (_NumericType, StringType)
8547
+ ):
8548
+ # Interval / numeric/string
8549
+ result_type = t1
8550
+ interval_arg = snowpark_args[0]
8551
+ divisor = (
8552
+ snowpark_args[1]
8553
+ if isinstance(t2, _NumericType)
8554
+ else snowpark_fn.cast(snowpark_args[1], "double")
8555
+ )
8556
+
8557
+ # Check for division by zero first
8558
+ zero_check = divisor == 0
8559
+
8560
+ if isinstance(result_type, YearMonthIntervalType):
8561
+ # For year-month intervals, check if result exceeds 32-bit signed integer limit
8562
+ result_type = YearMonthIntervalType()
8563
+ total_months = _calculate_total_months(interval_arg)
8564
+ max_months = snowpark_fn.lit(MAX_32BIT_SIGNED_INT)
8565
+ overflow_check = (
8566
+ snowpark_fn.abs(total_months / divisor) > max_months
8567
+ )
8568
+ result_exp = (
8569
+ snowpark_fn.when(zero_check, snowpark_fn.lit(None))
8570
+ .when(overflow_check, snowpark_fn.lit(None))
8571
+ .otherwise(interval_arg / divisor)
8572
+ )
8573
+ else: # DayTimeIntervalType
8574
+ # For day-time intervals, check if result exceeds day limit
8575
+ result_type = DayTimeIntervalType()
8576
+ total_days = _calculate_total_days(interval_arg)
8577
+ max_days = snowpark_fn.lit(MAX_DAY_TIME_DAYS)
8578
+ overflow_check = (
8579
+ snowpark_fn.abs(total_days / divisor) > max_days
8580
+ )
8581
+ result_exp = (
8582
+ snowpark_fn.when(zero_check, snowpark_fn.lit(None))
8583
+ .when(overflow_check, snowpark_fn.lit(None))
8584
+ .otherwise(interval_arg / divisor)
8585
+ )
7686
8586
  case (NullType(), t) | (t, NullType()):
7687
8587
  result_exp = snowpark_fn.lit(None)
7688
8588
  result_type = FloatType()
@@ -7742,9 +8642,11 @@ def map_unresolved_function(
7742
8642
  ).otherwise(cleaned_left / cleaned_right)
7743
8643
  result_exp = _type_with_typer(result_exp)
7744
8644
  case (_, _):
7745
- raise AnalysisException(
8645
+ exception = AnalysisException(
7746
8646
  f"Incompatible types: {snowpark_typed_args[0].typ}, {snowpark_typed_args[1].typ}"
7747
8647
  )
8648
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
8649
+ raise exception
7748
8650
 
7749
8651
  case "try_element_at":
7750
8652
  # For structured ArrayType and MapType columns, Snowflake raises an error when an index is out of bounds or a key does not exist.
@@ -7786,17 +8688,82 @@ def map_unresolved_function(
7786
8688
  case _:
7787
8689
  # Currently we do not handle VariantType columns as the first argument here.
7788
8690
  # Spark will not support VariantType until 4.0.0, revisit this when the support is added.
7789
- raise AnalysisException(
8691
+ exception = AnalysisException(
7790
8692
  f"Expected either (ArrayType, IntegralType) or (MapType, StringType), got {snowpark_typed_args[0].typ}, {snowpark_typed_args[1].typ}."
7791
8693
  )
8694
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
8695
+ raise exception
7792
8696
  case "try_multiply":
7793
- # Check for interval types and throw NotImplementedError
7794
- for arg in snowpark_typed_args:
7795
- if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
7796
- raise NotImplementedError(
7797
- "try_multiply with interval types is not supported"
7798
- )
7799
8697
  match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
8698
+ case (t1, t2) if isinstance(t1, _AnsiIntervalType) and isinstance(
8699
+ t2, (_NumericType, StringType)
8700
+ ):
8701
+ # Interval * numeric/string
8702
+ result_type = t1
8703
+ interval_arg = snowpark_args[0]
8704
+ multiplier = (
8705
+ snowpark_args[1]
8706
+ if isinstance(t2, _NumericType)
8707
+ else snowpark_fn.cast(snowpark_args[1], "double")
8708
+ )
8709
+
8710
+ if isinstance(result_type, YearMonthIntervalType):
8711
+ # For year-month intervals, check if result exceeds 32-bit signed integer limit
8712
+ result_type = YearMonthIntervalType()
8713
+ total_months = _calculate_total_months(interval_arg)
8714
+ max_months = snowpark_fn.lit(MAX_32BIT_SIGNED_INT)
8715
+ overflow_check = (
8716
+ snowpark_fn.abs(total_months * multiplier) > max_months
8717
+ )
8718
+ result_exp = snowpark_fn.when(
8719
+ overflow_check, snowpark_fn.lit(None)
8720
+ ).otherwise(interval_arg * multiplier)
8721
+ else: # DayTimeIntervalType
8722
+ # For day-time intervals, check if result exceeds day limit
8723
+ result_type = DayTimeIntervalType()
8724
+ total_days = _calculate_total_days(interval_arg)
8725
+ max_days = snowpark_fn.lit(MAX_DAY_TIME_DAYS)
8726
+ overflow_check = (
8727
+ snowpark_fn.abs(total_days * multiplier) > max_days
8728
+ )
8729
+ result_exp = snowpark_fn.when(
8730
+ overflow_check, snowpark_fn.lit(None)
8731
+ ).otherwise(interval_arg * multiplier)
8732
+
8733
+ case (t1, t2) if isinstance(t2, _AnsiIntervalType) and isinstance(
8734
+ t1, (_NumericType, StringType)
8735
+ ):
8736
+ # numeric/string * Interval
8737
+ result_type = t2
8738
+ interval_arg = snowpark_args[1]
8739
+ multiplier = (
8740
+ snowpark_args[0]
8741
+ if isinstance(t1, _NumericType)
8742
+ else snowpark_fn.cast(snowpark_args[0], "double")
8743
+ )
8744
+
8745
+ if isinstance(result_type, YearMonthIntervalType):
8746
+ # For year-month intervals, check if result exceeds 32-bit signed integer limit
8747
+ result_type = YearMonthIntervalType()
8748
+ total_months = _calculate_total_months(interval_arg)
8749
+ max_months = snowpark_fn.lit(MAX_32BIT_SIGNED_INT)
8750
+ overflow_check = (
8751
+ snowpark_fn.abs(total_months * multiplier) > max_months
8752
+ )
8753
+ result_exp = snowpark_fn.when(
8754
+ overflow_check, snowpark_fn.lit(None)
8755
+ ).otherwise(interval_arg * multiplier)
8756
+ else: # DayTimeIntervalType
8757
+ # For day-time intervals, check if result exceeds day limit
8758
+ result_type = DayTimeIntervalType()
8759
+ total_days = _calculate_total_days(interval_arg)
8760
+ max_days = snowpark_fn.lit(MAX_DAY_TIME_DAYS)
8761
+ overflow_check = (
8762
+ snowpark_fn.abs(total_days * multiplier) > max_days
8763
+ )
8764
+ result_exp = snowpark_fn.when(
8765
+ overflow_check, snowpark_fn.lit(None)
8766
+ ).otherwise(interval_arg * multiplier)
7800
8767
  case (NullType(), t) | (t, NullType()):
7801
8768
  result_exp = snowpark_fn.lit(None)
7802
8769
  match t:
@@ -7870,9 +8837,11 @@ def map_unresolved_function(
7870
8837
  result_exp = cleaned_left * cleaned_right
7871
8838
  result_exp = _type_with_typer(result_exp)
7872
8839
  case (_, _):
7873
- raise AnalysisException(
8840
+ exception = AnalysisException(
7874
8841
  f"Incompatible types: {snowpark_typed_args[0].typ}, {snowpark_typed_args[1].typ}"
7875
8842
  )
8843
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
8844
+ raise exception
7876
8845
  case "try_sum":
7877
8846
  # Snowflake raises an error when a value that cannot be cast into a numeric is passed to SUM. Spark treats these as NULL values and
7878
8847
  # does not throw an error. Additionally, Spark returns NULL when this calculation results in an overflow, whereas Snowflake raises a "TypeError".
@@ -7894,14 +8863,112 @@ def map_unresolved_function(
7894
8863
  snowpark_typed_args[0].typ, snowpark_args[0]
7895
8864
  )
7896
8865
  case "try_subtract":
7897
- # Check for interval types and throw NotImplementedError
7898
- for arg in snowpark_typed_args:
7899
- if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
7900
- raise NotImplementedError(
7901
- "try_subtract with interval types is not supported"
8866
+ # Handle interval arithmetic with overflow detection
8867
+ match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
8868
+ case (DateType(), t) if isinstance(t, YearMonthIntervalType):
8869
+ result_type = DateType()
8870
+ result_exp = snowpark_args[0] - snowpark_args[1]
8871
+ case (DateType(), t) if isinstance(t, DayTimeIntervalType):
8872
+ result_type = TimestampType()
8873
+ result_exp = snowpark_args[0] - snowpark_args[1]
8874
+ case (TimestampType(), t) if isinstance(
8875
+ t, (DayTimeIntervalType, YearMonthIntervalType)
8876
+ ):
8877
+ result_type = snowpark_typed_args[0].typ
8878
+ result_exp = snowpark_args[0] - snowpark_args[1]
8879
+ case (t1, t2) if (
8880
+ isinstance(t1, YearMonthIntervalType)
8881
+ and isinstance(t2, (_NumericType, StringType))
8882
+ ) or (
8883
+ isinstance(t2, YearMonthIntervalType)
8884
+ and isinstance(t1, (_NumericType, StringType))
8885
+ ):
8886
+ # YearMonthInterval - numeric/string or numeric/string - YearMonthInterval should throw error
8887
+ exception = AnalysisException(
8888
+ f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "try_subtract({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
8889
+ )
8890
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
8891
+ raise exception
8892
+ case (t1, t2) if isinstance(t1, YearMonthIntervalType) and isinstance(
8893
+ t2, YearMonthIntervalType
8894
+ ):
8895
+ result_type = YearMonthIntervalType(
8896
+ min(t1.start_field, t2.start_field),
8897
+ max(t1.end_field, t2.end_field),
8898
+ )
8899
+ # Check for Snowflake's precision limits: 10+ digits for operands, 9+ digits for results
8900
+ total1 = _calculate_total_months(snowpark_args[0])
8901
+ total2 = _calculate_total_months(snowpark_args[1])
8902
+ ten_digit_limit = snowpark_fn.lit(MAX_10_DIGIT_LIMIT)
8903
+
8904
+ precision_violation = (
8905
+ # Check if either operand already reaches 10 digits (parsing limit)
8906
+ (snowpark_fn.abs(total1) >= ten_digit_limit)
8907
+ | (snowpark_fn.abs(total2) >= ten_digit_limit)
8908
+ | (
8909
+ (total1 > 0)
8910
+ & (total2 < 0)
8911
+ & (total1 >= ten_digit_limit + total2)
8912
+ )
8913
+ | (
8914
+ (total1 < 0)
8915
+ & (total2 > 0)
8916
+ & (total1 <= -ten_digit_limit + total2)
8917
+ )
7902
8918
  )
7903
- result_exp = _try_arithmetic_helper(snowpark_typed_args, snowpark_args, 1)
7904
- result_exp = _type_with_typer(result_exp)
8919
+
8920
+ raise_error = _raise_error_helper(result_type, ArithmeticException)
8921
+ result_exp = snowpark_fn.when(
8922
+ precision_violation,
8923
+ raise_error(
8924
+ snowpark_fn.lit(
8925
+ "Year-Month Interval result exceeds Snowflake interval precision limit"
8926
+ )
8927
+ ),
8928
+ ).otherwise(snowpark_args[0] - snowpark_args[1])
8929
+ case (t1, t2) if isinstance(t1, DayTimeIntervalType) and isinstance(
8930
+ t2, DayTimeIntervalType
8931
+ ):
8932
+ result_type = DayTimeIntervalType(
8933
+ min(t1.start_field, t2.start_field),
8934
+ max(t1.end_field, t2.end_field),
8935
+ )
8936
+ # Check for Snowflake's day limit (106751991 days is the cutoff)
8937
+ days1 = snowpark_fn.date_part("day", snowpark_args[0])
8938
+ days2 = snowpark_fn.date_part("day", snowpark_args[1])
8939
+ max_days = snowpark_fn.lit(
8940
+ MAX_DAY_TIME_DAYS
8941
+ ) # Snowflake's actual limit
8942
+ min_days = snowpark_fn.lit(-MAX_DAY_TIME_DAYS)
8943
+
8944
+ # Check if either operand exceeds the day limit - throw error like Spark does
8945
+ operand_limit_violation = (snowpark_fn.abs(days1) > max_days) | (
8946
+ snowpark_fn.abs(days2) > max_days
8947
+ )
8948
+
8949
+ # Check if result would exceed day limit (but operands are valid) - return NULL
8950
+ result_overflow = (
8951
+ (days1 > 0) & (days2 < 0) & (days1 > max_days + days2)
8952
+ ) | ((days1 < 0) & (days2 > 0) & (days1 < min_days + days2))
8953
+
8954
+ raise_error = _raise_error_helper(result_type, ArithmeticException)
8955
+ result_exp = (
8956
+ snowpark_fn.when(
8957
+ operand_limit_violation,
8958
+ raise_error(
8959
+ snowpark_fn.lit(
8960
+ "Day-Time Interval operand exceeds day limit"
8961
+ )
8962
+ ),
8963
+ )
8964
+ .when(result_overflow, snowpark_fn.lit(None))
8965
+ .otherwise(snowpark_args[0] - snowpark_args[1])
8966
+ )
8967
+ case _:
8968
+ result_exp = _try_arithmetic_helper(
8969
+ snowpark_typed_args, snowpark_args, 1
8970
+ )
8971
+ result_exp = _type_with_typer(result_exp)
7905
8972
  case "try_to_number":
7906
8973
  try_to_number = snowpark_fn.function("try_to_number")
7907
8974
  precision, scale = resolve_to_number_precision_and_scale(exp)
@@ -7924,7 +8991,13 @@ def map_unresolved_function(
7924
8991
  ),
7925
8992
  )
7926
8993
  case _:
7927
- raise ValueError(f"Invalid number of arguments to {function_name}")
8994
+ exception = ValueError(
8995
+ f"Invalid number of arguments to {function_name}"
8996
+ )
8997
+ attach_custom_error_code(
8998
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
8999
+ )
9000
+ raise exception
7928
9001
  result_type = get_timestamp_type()
7929
9002
  result_exp = snowpark_fn.cast(result_exp, result_type)
7930
9003
  case "typeof":
@@ -8036,9 +9109,13 @@ def map_unresolved_function(
8036
9109
  snowpark_fn.lit("YYYY-MM-DD HH24:MI:SS"),
8037
9110
  )
8038
9111
  case _:
8039
- raise SnowparkConnectNotImplementedError(
9112
+ exception = SnowparkConnectNotImplementedError(
8040
9113
  "unix_timestamp expected 0, 1 or 2 arguments."
8041
9114
  )
9115
+ attach_custom_error_code(
9116
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
9117
+ )
9118
+ raise exception
8042
9119
  result_type = LongType()
8043
9120
  case "unwrap_udt":
8044
9121
  snowpark_col_name = snowpark_args[0].get_name()
@@ -8055,9 +9132,11 @@ def map_unresolved_function(
8055
9132
  )
8056
9133
 
8057
9134
  if "__udt_info__" not in metadata:
8058
- raise AnalysisException(
9135
+ exception = AnalysisException(
8059
9136
  f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve '{spark_function_name})' due to data type mismatch: Parameter 1 requires the 'USERDEFINEDTYPE' type"
8060
9137
  )
9138
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9139
+ raise exception
8061
9140
 
8062
9141
  result_type = map_json_schema_to_snowpark(
8063
9142
  metadata["__udt_info__"]["sqlType"]
@@ -8113,9 +9192,11 @@ def map_unresolved_function(
8113
9192
  snowpark_args[0], DoubleType()
8114
9193
  )
8115
9194
  else:
8116
- raise AnalysisException(
9195
+ exception = AnalysisException(
8117
9196
  f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
8118
9197
  )
9198
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9199
+ raise exception
8119
9200
  result_type = DoubleType()
8120
9201
  result_exp = _resolve_aggregate_exp(
8121
9202
  snowpark_fn.var_pop(var_pop_argument), result_type
@@ -8128,9 +9209,11 @@ def map_unresolved_function(
8128
9209
  snowpark_args[0], DoubleType()
8129
9210
  )
8130
9211
  else:
8131
- raise AnalysisException(
9212
+ exception = AnalysisException(
8132
9213
  f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
8133
9214
  )
9215
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9216
+ raise exception
8134
9217
  result_type = DoubleType()
8135
9218
  result_exp = _resolve_aggregate_exp(
8136
9219
  snowpark_fn.var_samp(var_samp_argument), result_type
@@ -8163,10 +9246,12 @@ def map_unresolved_function(
8163
9246
  if len(snowpark_typed_args) > 0:
8164
9247
  condition_type = snowpark_typed_args[0].typ
8165
9248
  if not isinstance(condition_type, BooleanType):
8166
- raise AnalysisException(
9249
+ exception = AnalysisException(
8167
9250
  f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve CASE WHEN condition due to data type mismatch: "
8168
9251
  f"Parameter 1 requires the 'BOOLEAN' type, however got '{condition_type}'"
8169
9252
  )
9253
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9254
+ raise exception
8170
9255
 
8171
9256
  name_components = ["CASE"]
8172
9257
  name_components.append("WHEN")
@@ -8189,10 +9274,12 @@ def map_unresolved_function(
8189
9274
  # Validate each WHEN condition
8190
9275
  condition_type = snowpark_typed_args[i].typ
8191
9276
  if not isinstance(condition_type, BooleanType):
8192
- raise AnalysisException(
9277
+ exception = AnalysisException(
8193
9278
  f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve CASE WHEN condition due to data type mismatch: "
8194
9279
  f"Parameter {i + 1} requires the 'BOOLEAN' type, however got '{condition_type}'"
8195
9280
  )
9281
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9282
+ raise exception
8196
9283
  result_exp = result_exp.when(snowpark_args[i], snowpark_args[i + 1])
8197
9284
  result_type_indexes.append(i + 1)
8198
9285
  name_components.append("END")
@@ -8429,9 +9516,11 @@ def map_unresolved_function(
8429
9516
  # TODO: Add more here as we come across them.
8430
9517
  # Unfortunately the scope of function names are not documented in
8431
9518
  # the proto file.
8432
- raise SnowparkConnectNotImplementedError(
9519
+ exception = SnowparkConnectNotImplementedError(
8433
9520
  f"Unsupported function name {other}"
8434
9521
  )
9522
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
9523
+ raise exception
8435
9524
 
8436
9525
  def _to_typed_column(
8437
9526
  res: Column | TypedColumn,
@@ -8446,9 +9535,11 @@ def map_unresolved_function(
8446
9535
  # 1. Static type: Assign directly to `result_type` when type is known at resolve time
8447
9536
  # 2. Dynamic type based on function arguments types: Use `snowpark_typed_args` to determine type
8448
9537
  # 3. Use _type_with_typer() as last resort - it calls GS to determine the type
8449
- raise SnowparkConnectNotImplementedError(
9538
+ exception = SnowparkConnectNotImplementedError(
8450
9539
  f"Result type of function {function_name} not implemented"
8451
9540
  )
9541
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
9542
+ raise exception
8452
9543
  elif type(res_type) is list:
8453
9544
  tc = TypedColumn(res, lambda: res_type)
8454
9545
  else:
@@ -8460,7 +9551,7 @@ def map_unresolved_function(
8460
9551
  spark_col_names if len(spark_col_names) > 0 else [spark_function_name]
8461
9552
  )
8462
9553
  typed_col = _to_typed_column(result_exp, result_type, function_name)
8463
- typed_col.set_qualifiers(qualifiers)
9554
+ typed_col.set_qualifiers({ColumnQualifier(tuple(qualifier_parts))})
8464
9555
  return spark_col_names, typed_col
8465
9556
 
8466
9557
 
@@ -8534,15 +9625,19 @@ def _extract_window_args(fn: expressions_proto.Expression) -> (str, str):
8534
9625
  args = fn.unresolved_function.arguments
8535
9626
  match args:
8536
9627
  case [_, _, _]:
8537
- raise SnowparkConnectNotImplementedError(
9628
+ exception = SnowparkConnectNotImplementedError(
8538
9629
  "the slide_duration parameter is not supported"
8539
9630
  )
9631
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
9632
+ raise exception
8540
9633
  case [_, window_duration, slide_duration, _] if unwrap_literal(
8541
9634
  window_duration
8542
9635
  ) != unwrap_literal(slide_duration):
8543
- raise SnowparkConnectNotImplementedError(
9636
+ exception = SnowparkConnectNotImplementedError(
8544
9637
  "the slide_duration parameter is not supported"
8545
9638
  )
9639
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
9640
+ raise exception
8546
9641
  case [_, window_duration, _, start_time]:
8547
9642
  return unwrap_literal(window_duration), unwrap_literal(start_time)
8548
9643
  case [_, window_duration]:
@@ -8610,7 +9705,9 @@ def _find_common_type(
8610
9705
  typ = _common(type1.element_type, type2.element_type)
8611
9706
  return ArrayType(typ)
8612
9707
  case (ArrayType(), _) | (_, ArrayType()) if func_name == "concat":
8613
- raise AnalysisException(exception_base_message)
9708
+ exception = AnalysisException(exception_base_message)
9709
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9710
+ raise exception
8614
9711
  case (NullType(), t) | (t, NullType()):
8615
9712
  return t
8616
9713
  case (BinaryType(), BinaryType()):
@@ -8639,7 +9736,9 @@ def _find_common_type(
8639
9736
  if [field.name for field in fields1] != [
8640
9737
  field.name for field in fields2
8641
9738
  ]:
8642
- raise AnalysisException(exception_base_message)
9739
+ exception = AnalysisException(exception_base_message)
9740
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9741
+ raise exception
8643
9742
  fields = []
8644
9743
  for idx, field in enumerate(fields1):
8645
9744
  typ = _common(field.datatype, fields2[idx].datatype)
@@ -8649,8 +9748,24 @@ def _find_common_type(
8649
9748
  key_type = _common(type1.key_type, type2.key_type)
8650
9749
  value_type = _common(type1.value_type, type2.value_type)
8651
9750
  return MapType(key_type, value_type)
9751
+ case (_, _) if isinstance(type1, YearMonthIntervalType) and isinstance(
9752
+ type2, YearMonthIntervalType
9753
+ ):
9754
+ return YearMonthIntervalType(
9755
+ min(type1.start_field, type2.start_field),
9756
+ max(type1.end_field, type2.end_field),
9757
+ )
9758
+ case (_, _) if isinstance(type1, DayTimeIntervalType) and isinstance(
9759
+ type2, DayTimeIntervalType
9760
+ ):
9761
+ return DayTimeIntervalType(
9762
+ min(type1.start_field, type2.start_field),
9763
+ max(type1.end_field, type2.end_field),
9764
+ )
8652
9765
  case _:
8653
- raise AnalysisException(exception_base_message)
9766
+ exception = AnalysisException(exception_base_message)
9767
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9768
+ raise exception
8654
9769
 
8655
9770
  types = list(filter(lambda tp: tp is not None, types))
8656
9771
  if not types:
@@ -8663,7 +9778,9 @@ def _find_common_type(
8663
9778
  func_name_message = f" to `{func_name}`" if func_name else ""
8664
9779
  types_message = " or ".join([f'"{type}"' for type in types])
8665
9780
  exception_message = f"{exception_base_message} Cannot resolve expression due to data type mismatch: Input{func_name_message} should all be the same type, but it's ({types_message})."
8666
- raise AnalysisException(exception_message)
9781
+ exception = AnalysisException(exception_message)
9782
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9783
+ raise exception
8667
9784
  else:
8668
9785
  raise
8669
9786
 
@@ -8821,7 +9938,9 @@ def _resolve_function_with_lambda(
8821
9938
  case ArrayType():
8822
9939
  return VariantType()
8823
9940
  case t:
8824
- raise ValueError(f"Expected array, got {t}")
9941
+ exception = ValueError(f"Expected array, got {t}")
9942
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9943
+ raise exception
8825
9944
 
8826
9945
  def _get_map_types(tc: TypedColumn):
8827
9946
  match tc.typ:
@@ -8830,9 +9949,11 @@ def _resolve_function_with_lambda(
8830
9949
  case MapType():
8831
9950
  return VariantType(), VariantType()
8832
9951
  case t:
8833
- raise AnalysisException(
9952
+ exception = AnalysisException(
8834
9953
  f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Parameter 1 requires the "MAP" type, however "id" has the type "{t}".'
8835
9954
  )
9955
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
9956
+ raise exception
8836
9957
 
8837
9958
  def _map_to_array(m: dict) -> Optional[list]:
8838
9959
  # confirm that m is a dict and not a sqlNullWrapper
@@ -8898,9 +10019,13 @@ def _resolve_function_with_lambda(
8898
10019
  result_type = arg4_tc.typ # it's type of 'finish' lambda body
8899
10020
  result_exp = snowpark_fn.get(result_exp, snowpark_fn.lit(0))
8900
10021
  case _:
8901
- raise SnowparkConnectNotImplementedError(
10022
+ exception = SnowparkConnectNotImplementedError(
8902
10023
  f"{function_name} function requires 3 or 4 arguments"
8903
10024
  )
10025
+ attach_custom_error_code(
10026
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
10027
+ )
10028
+ raise exception
8904
10029
 
8905
10030
  snowpark_arg_names = [
8906
10031
  arg1_name,
@@ -9124,9 +10249,13 @@ def _resolve_function_with_lambda(
9124
10249
  f"lambdafunction({lambda_body_name}, namedlambdavariable(), namedlambdavariable())",
9125
10250
  ]
9126
10251
  case _:
9127
- raise SnowparkConnectNotImplementedError(
10252
+ exception = SnowparkConnectNotImplementedError(
9128
10253
  f"{function_name} function requires lambda function with 1 or 2 arguments"
9129
10254
  )
10255
+ attach_custom_error_code(
10256
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
10257
+ )
10258
+ raise exception
9130
10259
  case "transform_keys":
9131
10260
  _map_to_array_udf = cached_udf(
9132
10261
  _map_to_array,
@@ -9280,9 +10409,11 @@ def _resolve_function_with_lambda(
9280
10409
  result_exp = TypedColumn(result_exp, lambda: [ArrayType(fn_body.typ)])
9281
10410
  case other:
9282
10411
  # TODO: Add more here as we come across them.
9283
- raise SnowparkConnectNotImplementedError(
10412
+ exception = SnowparkConnectNotImplementedError(
9284
10413
  f"Unsupported function name {other}"
9285
10414
  )
10415
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
10416
+ raise exception
9286
10417
 
9287
10418
  spark_function_name = f"{function_name}({', '.join(snowpark_arg_names)})"
9288
10419
  if not isinstance(result_exp, TypedColumn):
@@ -9847,9 +10978,11 @@ def _try_arithmetic_helper(
9847
10978
  ) or (
9848
10979
  isinstance(arg2, DateType) and not isinstance(arg1, _IntegralType)
9849
10980
  ):
9850
- raise AnalysisException(
10981
+ exception = AnalysisException(
9851
10982
  '[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_add(dt, add)" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type'
9852
10983
  )
10984
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
10985
+ raise exception
9853
10986
  args = (
9854
10987
  snowpark_args[::-1]
9855
10988
  if isinstance(arg1, _IntegralType)
@@ -9872,9 +11005,11 @@ def _try_arithmetic_helper(
9872
11005
  elif isinstance(arg1, DateType) and isinstance(arg2, DateType):
9873
11006
  return snowpark_fn.daydiff(snowpark_args[0], snowpark_args[1])
9874
11007
  else:
9875
- raise AnalysisException(
11008
+ exception = AnalysisException(
9876
11009
  '[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_sub(dt, sub)" due to data type mismatch: Parameter 1 requires the "DATE" type and parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type'
9877
11010
  )
11011
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
11012
+ raise exception
9878
11013
  case (DecimalType(), _IntegralType()) | (_IntegralType(), DecimalType()) | (
9879
11014
  DecimalType(),
9880
11015
  DecimalType(),
@@ -9926,9 +11061,11 @@ def _try_arithmetic_helper(
9926
11061
  return updated_args[0] - updated_args[1]
9927
11062
 
9928
11063
  case (BooleanType(), _) | (_, BooleanType()):
9929
- raise AnalysisException(
11064
+ exception = AnalysisException(
9930
11065
  f"Incompatible types: {typed_args[0].typ}, {typed_args[1].typ}"
9931
11066
  )
11067
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
11068
+ raise exception
9932
11069
  case _:
9933
11070
  # Return NULL for incompatible types
9934
11071
  return snowpark_fn.lit(None)
@@ -9968,21 +11105,23 @@ def _get_add_sub_result_type(
9968
11105
  )
9969
11106
  case _:
9970
11107
  if global_config.spark_sql_ansi_enabled:
9971
- raise AnalysisException(
11108
+ exception = AnalysisException(
9972
11109
  f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".',
9973
11110
  )
11111
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
11112
+ raise exception
9974
11113
  else:
9975
11114
  result_type = DoubleType()
9976
11115
  case BooleanType():
9977
- raise AnalysisException(
11116
+ exception = AnalysisException(
9978
11117
  f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "BOOLEAN".',
9979
11118
  )
11119
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
11120
+ raise exception
9980
11121
  return result_type, overflow_possible
9981
11122
 
9982
11123
 
9983
- def _get_interval_type_name(
9984
- interval_type: Union[YearMonthIntervalType, DayTimeIntervalType]
9985
- ) -> str:
11124
+ def _get_interval_type_name(interval_type: _AnsiIntervalType) -> str:
9986
11125
  """Get the formatted interval type name for error messages."""
9987
11126
  if isinstance(interval_type, YearMonthIntervalType):
9988
11127
  if interval_type.start_field == 0 and interval_type.end_field == 0:
@@ -10009,21 +11148,15 @@ def _check_interval_string_comparison(
10009
11148
  ) -> None:
10010
11149
  """Check for invalid interval-string comparisons and raise AnalysisException if found."""
10011
11150
  if (
10012
- isinstance(
10013
- snowpark_typed_args[0].typ, (YearMonthIntervalType, DayTimeIntervalType)
10014
- )
11151
+ isinstance(snowpark_typed_args[0].typ, _AnsiIntervalType)
10015
11152
  and isinstance(snowpark_typed_args[1].typ, StringType)
10016
11153
  or isinstance(snowpark_typed_args[0].typ, StringType)
10017
- and isinstance(
10018
- snowpark_typed_args[1].typ, (YearMonthIntervalType, DayTimeIntervalType)
10019
- )
11154
+ and isinstance(snowpark_typed_args[1].typ, _AnsiIntervalType)
10020
11155
  ):
10021
11156
  # Format interval type name for error message
10022
11157
  interval_type = (
10023
11158
  snowpark_typed_args[0].typ
10024
- if isinstance(
10025
- snowpark_typed_args[0].typ, (YearMonthIntervalType, DayTimeIntervalType)
10026
- )
11159
+ if isinstance(snowpark_typed_args[0].typ, _AnsiIntervalType)
10027
11160
  else snowpark_typed_args[1].typ
10028
11161
  )
10029
11162
  interval_name = _get_interval_type_name(interval_type)
@@ -10039,9 +11172,11 @@ def _check_interval_string_comparison(
10039
11172
  else interval_name
10040
11173
  )
10041
11174
 
10042
- raise AnalysisException(
11175
+ exception = AnalysisException(
10043
11176
  f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} {operator} {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{left_type}" and "{right_type}").;'
10044
11177
  )
11178
+ attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
11179
+ raise exception
10045
11180
 
10046
11181
 
10047
11182
  def _get_spark_function_name(
@@ -10088,12 +11223,18 @@ def _get_spark_function_name(
10088
11223
  case (DateType(), DayTimeIntervalType()) | (
10089
11224
  DateType(),
10090
11225
  YearMonthIntervalType(),
11226
+ ) | (TimestampType(), DayTimeIntervalType()) | (
11227
+ TimestampType(),
11228
+ YearMonthIntervalType(),
10091
11229
  ):
10092
11230
  date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
10093
11231
  return f"{date_param_name1} {operation_op} {snowpark_arg_names[1]}"
10094
11232
  case (DayTimeIntervalType(), DateType()) | (
10095
11233
  YearMonthIntervalType(),
10096
11234
  DateType(),
11235
+ ) | (DayTimeIntervalType(), TimestampType()) | (
11236
+ YearMonthIntervalType(),
11237
+ TimestampType(),
10097
11238
  ):
10098
11239
  date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
10099
11240
  if function_name == "+":
@@ -10177,12 +11318,18 @@ def _timestamp_format_sanity_check(ts_value: str, ts_format: str) -> None:
10177
11318
  This is a basic validation to ensure the format matches the string.
10178
11319
  """
10179
11320
  if "yyyyyyy" in ts_format:
10180
- raise DateTimeException(
11321
+ exception = DateTimeException(
10181
11322
  f"Fail to recognize '{ts_format}' pattern in the DateTimeFormatter."
10182
11323
  )
11324
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
11325
+ raise exception
10183
11326
  if ts_format == "yy":
10184
11327
  if len(ts_value) != 2:
10185
- raise DateTimeException(f"Fail to parse '{ts_value}' in DateTimeFormatter.")
11328
+ exception = DateTimeException(
11329
+ f"Fail to parse '{ts_value}' in DateTimeFormatter."
11330
+ )
11331
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
11332
+ raise exception
10186
11333
 
10187
11334
  # For parsing, the acceptable fraction length can be [1, the number of contiguous 'S']
10188
11335
  s_contiguous = 0
@@ -10199,7 +11346,11 @@ def _timestamp_format_sanity_check(ts_value: str, ts_format: str) -> None:
10199
11346
  char_count += 1
10200
11347
 
10201
11348
  if s_contiguous + sum(x.isalnum() for x in ts_value) < char_count:
10202
- raise DateTimeException(f"Fail to parse '{ts_value}' in DateTimeFormatter.")
11349
+ exception = DateTimeException(
11350
+ f"Fail to parse '{ts_value}' in DateTimeFormatter."
11351
+ )
11352
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
11353
+ raise exception
10203
11354
 
10204
11355
 
10205
11356
  def _bounded_long_floor_expr(expr):
@@ -10326,17 +11477,21 @@ def _validate_number_format_string(format_str: str) -> None:
10326
11477
  """
10327
11478
 
10328
11479
  def _unexpected_char(char):
10329
- raise AnalysisException(
11480
+ exception = AnalysisException(
10330
11481
  f"[INVALID_FORMAT.UNEXPECTED_TOKEN] The format is invalid: '{original_format}'. "
10331
11482
  f"Found the unexpected character '{char}' in the format string; "
10332
11483
  "the structure of the format string must match: "
10333
11484
  "`[MI|S]` `[$]` `[0|9|G|,]*` `[.|D]` `[0|9]*` `[$]` `[PR|MI|S]`."
10334
11485
  )
11486
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
11487
+ raise exception
10335
11488
 
10336
11489
  if not format_str:
10337
- raise AnalysisException(
11490
+ exception = AnalysisException(
10338
11491
  "[INVALID_FORMAT.EMPTY] The format is invalid: ''. The number format string cannot be empty."
10339
11492
  )
11493
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
11494
+ raise exception
10340
11495
 
10341
11496
  # Create a working copy of the format string
10342
11497
  remaining = format_str
@@ -10412,9 +11567,11 @@ def _validate_number_format_string(format_str: str) -> None:
10412
11567
  _unexpected_char(char)
10413
11568
 
10414
11569
  # If no invalid character found but no digits, it's still invalid
10415
- raise AnalysisException(
11570
+ exception = AnalysisException(
10416
11571
  f"[INVALID_FORMAT.WRONG_NUM_DIGIT] The format is invalid: '{format_str}'. The format string requires at least one number digit."
10417
11572
  )
11573
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
11574
+ raise exception
10418
11575
 
10419
11576
 
10420
11577
  def _trim_helper(value: Column, trim_value: Column, trim_type: Column) -> Column:
@@ -10465,3 +11622,30 @@ def _map_from_spark_tz(value: Column) -> Column:
10465
11622
  .when(value == "VST", snowpark_fn.lit("Asia/Ho_Chi_Minh"))
10466
11623
  .otherwise(value) # Return original timezone if no mapping found
10467
11624
  )
11625
+
11626
+
11627
+ def _calculate_total_months(interval_arg):
11628
+ """Calculate total months from a year-month interval."""
11629
+ years = snowpark_fn.date_part("year", interval_arg)
11630
+ months = snowpark_fn.date_part("month", interval_arg)
11631
+ return years * 12 + months
11632
+
11633
+
11634
+ def _calculate_total_days(interval_arg):
11635
+ """Calculate total days from a day-time interval."""
11636
+ days = snowpark_fn.date_part("day", interval_arg)
11637
+ hours = snowpark_fn.date_part("hour", interval_arg)
11638
+ minutes = snowpark_fn.date_part("minute", interval_arg)
11639
+ seconds = snowpark_fn.date_part("second", interval_arg)
11640
+ # Convert hours, minutes, seconds to fractional days
11641
+ fractional_days = (hours * 3600 + minutes * 60 + seconds) / 86400
11642
+ return days + fractional_days
11643
+
11644
+
11645
+ def _calculate_total_seconds(interval_arg):
11646
+ """Calculate total seconds from a day-time interval."""
11647
+ days = snowpark_fn.date_part("day", interval_arg)
11648
+ hours = snowpark_fn.date_part("hour", interval_arg)
11649
+ minutes = snowpark_fn.date_part("minute", interval_arg)
11650
+ seconds = snowpark_fn.date_part("second", interval_arg)
11651
+ return days * 86400 + hours * 3600 + minutes * 60 + seconds