PyPI - snowpark-connect - Versions diffs - 0.28.1__py3-none-any.whl → 0.30.0__py3-none-any.whl - Mend - Supply Chain Defender

snowpark-connect 0.28.1py3-none-any.whl → 0.30.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (47) hide show

snowflake/snowpark_connect/expression/map_unresolved_function.py CHANGED Viewed

@@ -15,11 +15,12 @@ import tempfile
 import time
 import uuid
 from collections import defaultdict
+from collections.abc import Callable
 from contextlib import suppress
 from decimal import ROUND_HALF_EVEN, ROUND_HALF_UP, Context, Decimal
 from functools import partial, reduce
 from pathlib import Path
-from typing import List, Optional
+from typing import List, Optional, Union
 from urllib.parse import quote, unquote
 import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
@@ -49,6 +50,7 @@ from snowflake.snowpark.types import (
     ByteType,
     DataType,
     DateType,
+    DayTimeIntervalType,
     DecimalType,
     DoubleType,
     FloatType,
@@ -63,6 +65,7 @@ from snowflake.snowpark.types import (
     TimestampTimeZone,
     TimestampType,
     VariantType,
+    YearMonthIntervalType,
     _FractionalType,
     _IntegralType,
     _NumericType,
@@ -199,7 +202,7 @@ def _validate_numeric_args(
             case StringType():
                 # Cast strings to doubles following Spark
                 # https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala#L204
-                modified_args[i] = _try_cast_helper(snowpark_args[i], DoubleType())
+                modified_args[i] = snowpark_fn.try_cast(snowpark_args[i], DoubleType())
             case _:
                 raise TypeError(
                     f"Data type mismatch: {function_name} requires numeric types, but got {typed_args[0].typ} and {typed_args[1].typ}."
@@ -519,7 +522,7 @@ def map_unresolved_function(
                     DecimalType() as t,
                 ):
                     p1, s1 = _get_type_precision(t)
-                    result_type = _get_decimal_multiplication_result_type(
+                    result_type, _ = _get_decimal_multiplication_result_type(
                         p1, s1, p1, s1
                     )
                     result_exp = snowpark_fn.lit(None)
@@ -528,11 +531,17 @@ def map_unresolved_function(
                 ):
                     p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
                     p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
-                    result_type = _get_decimal_multiplication_result_type(
-                        p1, s1, p2, s2
-                    )
-                    result_exp = _get_decimal_multiplication_result_exp(
-                        result_type, t, snowpark_args
+                    (
+                        result_type,
+                        overflow_possible,
+                    ) = _get_decimal_multiplication_result_type(p1, s1, p2, s2)
+                    result_exp = _arithmetic_operation(
+                        snowpark_typed_args[0],
+                        snowpark_typed_args[1],
+                        lambda x, y: x * y,
+                        overflow_possible,
+                        global_config.spark_sql_ansi_enabled,
+                        result_type,
                     )
                 case (NullType(), NullType()):
                     result_type = DoubleType()
@@ -617,7 +626,7 @@ def map_unresolved_function(
             )
             match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
                 case (NullType(), _) | (_, NullType()):
-                    result_type = _get_add_sub_result_type(
+                    result_type, _ = _get_add_sub_result_type(
                         snowpark_typed_args[0].typ,
                         snowpark_typed_args[1].typ,
                         spark_function_name,
@@ -632,8 +641,17 @@ def map_unresolved_function(
                     if isinstance(t, (IntegerType, ShortType, ByteType)):
                         result_type = DateType()
                         result_exp = snowpark_args[0] + snowpark_args[1]
+                    elif isinstance(t, (DayTimeIntervalType, YearMonthIntervalType)):
+                        result_type = TimestampType()
+                        result_exp = (
+                            snowpark_args[date_param_index]
+                            + snowpark_args[t_param_index]
+                        )
                     elif (
-                        "INTERVAL"
+                        hasattr(
+                            snowpark_typed_args[t_param_index].col._expr1, "pretty_name"
+                        )
+                        and "INTERVAL"
                         == snowpark_typed_args[t_param_index].col._expr1.pretty_name
                     ):
                         result_type = TimestampType()
@@ -693,14 +711,21 @@ def map_unresolved_function(
                         f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
                     )
                 case _:
-                    result_type = _get_add_sub_result_type(
+                    result_type, overflow_possible = _get_add_sub_result_type(
                         snowpark_typed_args[0].typ,
                         snowpark_typed_args[1].typ,
                         spark_function_name,
                     )
-                    result_exp = snowpark_args[0] + snowpark_args[1]
-                    if isinstance(result_type, DecimalType):
-                        result_exp = _cast_helper(result_exp, result_type)
+                    result_exp = _arithmetic_operation(
+                        snowpark_typed_args[0],
+                        snowpark_typed_args[1],
+                        lambda x, y: x + y,
+                        overflow_possible,
+                        global_config.spark_sql_ansi_enabled,
+                        result_type,
+                    )
         case "-":
             spark_function_name = _get_spark_function_name(
                 snowpark_typed_args[0],
@@ -715,7 +740,7 @@ def map_unresolved_function(
                     result_type = LongType()
                     result_exp = snowpark_fn.lit(None).cast(result_type)
                 case (NullType(), _) | (_, NullType()):
-                    result_type = _get_add_sub_result_type(
+                    result_type, _ = _get_add_sub_result_type(
                         snowpark_typed_args[0].typ,
                         snowpark_typed_args[1].typ,
                         spark_function_name,
@@ -726,6 +751,12 @@ def map_unresolved_function(
                     # TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
                     result_type = LongType()
                     result_exp = snowpark_args[0] - snowpark_args[1]
+                case (DateType(), DayTimeIntervalType()) | (
+                    DateType(),
+                    YearMonthIntervalType(),
+                ):
+                    result_type = TimestampType()
+                    result_exp = snowpark_args[0] - snowpark_args[1]
                 case (DateType(), StringType()):
                     if (
                         hasattr(snowpark_typed_args[1].col._expr1, "pretty_name")
@@ -806,14 +837,20 @@ def map_unresolved_function(
                         f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
                     )
                 case _:
-                    result_type = _get_add_sub_result_type(
+                    result_type, overflow_possible = _get_add_sub_result_type(
                         snowpark_typed_args[0].typ,
                         snowpark_typed_args[1].typ,
                         spark_function_name,
                     )
-                    result_exp = snowpark_args[0] - snowpark_args[1]
-                    if isinstance(result_type, DecimalType):
-                        result_exp = _cast_helper(result_exp, result_type)
+                    result_exp = _arithmetic_operation(
+                        snowpark_typed_args[0],
+                        snowpark_typed_args[1],
+                        lambda x, y: x - y,
+                        overflow_possible,
+                        global_config.spark_sql_ansi_enabled,
+                        result_type,
+                    )
         case "/":
             match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
                 case (DecimalType() as t1, NullType()):
@@ -825,15 +862,17 @@ def map_unresolved_function(
                 ):
                     p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
                     p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
-                    result_type, overflow_detected = _get_decimal_division_result_type(
+                    result_type, overflow_possible = _get_decimal_division_result_type(
                         p1, s1, p2, s2
                     )
-                    result_exp = _get_decimal_division_result_exp(
+                    result_exp = _arithmetic_operation(
+                        snowpark_typed_args[0],
+                        snowpark_typed_args[1],
+                        lambda x, y: _divnull(x, y),
+                        overflow_possible,
+                        global_config.spark_sql_ansi_enabled,
                         result_type,
-                        t,
-                        overflow_detected,
-                        snowpark_args,
-                        spark_function_name,
                     )
                 case (NullType(), NullType()):
                     result_type = DoubleType()
@@ -922,6 +961,10 @@ def map_unresolved_function(
                 raise AnalysisException(
                     f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} < {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
                 )
+            # Check for interval-string comparisons
+            _check_interval_string_comparison(
+                "<", snowpark_typed_args, snowpark_arg_names
+            )
             left, right = _coerce_for_comparison(
                 snowpark_typed_args[0], snowpark_typed_args[1]
             )
@@ -936,6 +979,10 @@ def map_unresolved_function(
                 raise AnalysisException(
                     f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} <= {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
                 )
+            # Check for interval-string comparisons
+            _check_interval_string_comparison(
+                "<=", snowpark_typed_args, snowpark_arg_names
+            )
             left, right = _coerce_for_comparison(
                 snowpark_typed_args[0], snowpark_typed_args[1]
             )
@@ -954,6 +1001,10 @@ def map_unresolved_function(
             )
             result_exp = TypedColumn(left.eqNullSafe(right), lambda: [BooleanType()])
         case "==" | "=":
+            # Check for interval-string comparisons
+            _check_interval_string_comparison(
+                "=", snowpark_typed_args, snowpark_arg_names
+            )
             spark_function_name = f"({snowpark_arg_names[0]} = {snowpark_arg_names[1]})"
             left, right = _coerce_for_comparison(
                 snowpark_typed_args[0], snowpark_typed_args[1]
@@ -969,6 +1020,10 @@ def map_unresolved_function(
                 raise AnalysisException(
                     f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} > {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
                 )
+            # Check for interval-string comparisons
+            _check_interval_string_comparison(
+                ">", snowpark_typed_args, snowpark_arg_names
+            )
             left, right = _coerce_for_comparison(
                 snowpark_typed_args[0], snowpark_typed_args[1]
             )
@@ -983,6 +1038,10 @@ def map_unresolved_function(
                 raise AnalysisException(
                     f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} >= {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
                 )
+            # Check for interval-string comparisons
+            _check_interval_string_comparison(
+                ">=", snowpark_typed_args, snowpark_arg_names
+            )
             left, right = _coerce_for_comparison(
                 snowpark_typed_args[0], snowpark_typed_args[1]
             )
@@ -1113,9 +1172,7 @@ def map_unresolved_function(
             # SNOW-1955784: Support accuracy parameter
             # Use percentile_disc to return actual values from dataset (matches PySpark behavior)
-            def _pyspark_approx_percentile(
-                column: Column, percentage: float, original_type: DataType
-            ) -> Column:
+            def _pyspark_approx_percentile(column: Column, percentage: float) -> Column:
                 """
                 PySpark-compatible percentile that returns actual values from dataset.
                 - PySpark's approx_percentile returns the "smallest value in the ordered col values
@@ -1132,7 +1189,7 @@ def map_unresolved_function(
                 result = snowpark_fn.function("percentile_disc")(
                     snowpark_fn.lit(percentage)
                 ).within_group(column)
-                return snowpark_fn.cast(result, original_type)
+                return result
             column_type = snowpark_typed_args[0].typ
@@ -1143,26 +1200,18 @@ def map_unresolved_function(
                 assert array_func.function_name == "array", array_func
                 percentile_results = [
-                    _pyspark_approx_percentile(
-                        snowpark_args[0], unwrap_literal(arg), column_type
-                    )
+                    _pyspark_approx_percentile(snowpark_args[0], unwrap_literal(arg))
                     for arg in array_func.arguments
                 ]
                 result_type = ArrayType(element_type=column_type, contains_null=False)
-                result_exp = snowpark_fn.cast(
-                    snowpark_fn.array_construct(*percentile_results),
-                    result_type,
-                )
+                result_exp = snowpark_fn.array_construct(*percentile_results)
+                result_exp = _resolve_aggregate_exp(result_exp, result_type)
             else:
                 # Handle single percentile
                 percentage = unwrap_literal(exp.unresolved_function.arguments[1])
-                result_exp = TypedColumn(
-                    _pyspark_approx_percentile(
-                        snowpark_args[0], percentage, column_type
-                    ),
-                    lambda: [column_type],
-                )
+                result_exp = _pyspark_approx_percentile(snowpark_args[0], percentage)
+                result_exp = _resolve_aggregate_exp(result_exp, column_type)
         case "array":
             if len(snowpark_args) == 0:
                 result_exp = snowpark_fn.cast(
@@ -1336,27 +1385,55 @@ def map_unresolved_function(
                 result_exp = snowpark_fn.cast(result_exp, array_type)
             result_exp = TypedColumn(result_exp, lambda: snowpark_typed_args[0].types)
         case "array_repeat":
+            elem, count = snowpark_args[0], snowpark_args[1]
+            elem_type = snowpark_typed_args[0].typ
+            result_type = ArrayType(elem_type)
-            @cached_udf(
-                input_types=[VariantType(), LongType()],
-                return_type=ArrayType(),
-            )
-            def _array_repeat(elem, n):
-                if n is None:
-                    return None
-                if n < 0:
-                    return []
-                return [elem] * n
+            fallback_to_udf = True
-            result_exp = snowpark_fn.cast(
-                _array_repeat(
-                    snowpark_fn.cast(snowpark_args[0], VariantType()), snowpark_args[1]
-                ),
-                ArrayType(snowpark_typed_args[0].typ),
-            )
-            result_exp = TypedColumn(
-                result_exp, lambda: [ArrayType(snowpark_typed_args[0].typ)]
-            )
+            if isinstance(count._expression, Literal):
+                count_value = count._expression.value
+                fallback_to_udf = False
+                if count_value is None:
+                    result_exp = snowpark_fn.lit(None).cast(result_type)
+                elif count_value <= 0:
+                    result_exp = snowpark_fn.array_construct().cast(result_type)
+                elif count_value <= 16:
+                    # count_value is small enough to initialize the array directly in memory
+                    elem_variant = snowpark_fn.cast(elem, VariantType())
+                    result_exp = snowpark_fn.array_construct(
+                        *([elem_variant] * count_value)
+                    ).cast(result_type)
+                else:
+                    fallback_to_udf = True
+            if fallback_to_udf:
+                @cached_udf(
+                    input_types=[VariantType(), LongType()],
+                    return_type=ArrayType(),
+                )
+                def _array_repeat(elem, n):
+                    if n is None:
+                        return None
+                    if n < 0:
+                        return []
+                    return [elem] * n
+                elem_variant = snowpark_fn.cast(elem, VariantType())
+                result_exp = (
+                    snowpark_fn.when(
+                        count.is_null(), snowpark_fn.lit(None).cast(result_type)
+                    )
+                    .when(count <= 0, snowpark_fn.array_construct().cast(result_type))
+                    .otherwise(
+                        snowpark_fn.cast(
+                            _array_repeat(elem_variant, count), result_type
+                        )
+                    )
+                )
         case "array_size":
             array_type = snowpark_typed_args[0].typ
             if not isinstance(array_type, ArrayType):
@@ -1556,7 +1633,7 @@ def map_unresolved_function(
             result_exp = TypedColumn(result_exp, lambda: [LongType()])
         case "bit_get" | "getbit":
             snowflake_compat = get_boolean_session_config_param(
-                "enable_snowflake_extension_behavior"
+                "snowpark.connect.enable_snowflake_extension_behavior"
             )
             col, pos = snowpark_args
             if snowflake_compat:
@@ -1863,14 +1940,11 @@ def map_unresolved_function(
             qualifiers = snowpark_args[0].get_qualifiers()
         case "collect_list" | "array_agg":
             # TODO: SNOW-1967177 - Support structured types in array_agg
-            result_exp = snowpark_fn.cast(
-                snowpark_fn.array_agg(
-                    snowpark_typed_args[0].column(to_semi_structure=True)
-                ),
-                ArrayType(snowpark_typed_args[0].typ),
+            result_exp = snowpark_fn.array_agg(
+                snowpark_typed_args[0].column(to_semi_structure=True)
             )
-            result_exp = TypedColumn(
-                result_exp, lambda: [ArrayType(snowpark_typed_args[0].typ)]
+            result_exp = _resolve_aggregate_exp(
+                result_exp, ArrayType(snowpark_typed_args[0].typ)
             )
             spark_function_name = f"collect_list({snowpark_arg_names[0]})"
         case "collect_set":
@@ -2357,15 +2431,30 @@ def map_unresolved_function(
                 # If format is NULL, return NULL for all rows
                 result_exp = snowpark_fn.lit(None)
             else:
+                format_lit = snowpark_fn.lit(
+                    map_spark_timestamp_format_expression(
+                        exp.unresolved_function.arguments[1],
+                        snowpark_typed_args[0].typ,
+                    )
+                )
                 result_exp = snowpark_fn.date_format(
                     snowpark_args[0],
-                    snowpark_fn.lit(
-                        map_spark_timestamp_format_expression(
-                            exp.unresolved_function.arguments[1],
-                            snowpark_typed_args[0].typ,
-                        )
-                    ),
+                    format_lit,
                 )
+                if format_literal == "EEEE":
+                    # TODO: SNOW-2356874, for weekday, Snowflake only supports abbreviated name, e.g. "Fri". Patch spark "EEEE" until
+                    #  snowflake supports full weekday name.
+                    result_exp = (
+                        snowpark_fn.when(result_exp == "Mon", "Monday")
+                        .when(result_exp == "Tue", "Tuesday")
+                        .when(result_exp == "Wed", "Wednesday")
+                        .when(result_exp == "Thu", "Thursday")
+                        .when(result_exp == "Fri", "Friday")
+                        .when(result_exp == "Sat", "Saturday")
+                        .when(result_exp == "Sun", "Sunday")
+                        .otherwise(result_exp)
+                    )
             result_exp = TypedColumn(result_exp, lambda: [StringType()])
         case "date_from_unix_date":
             result_exp = snowpark_fn.date_add(
@@ -2464,6 +2553,14 @@ def map_unresolved_function(
                 )
             result_type = LongType()
         case "date_part" | "datepart" | "extract":
+            # Check for interval types and throw NotImplementedError
+            if isinstance(
+                snowpark_typed_args[1].typ, (YearMonthIntervalType, DayTimeIntervalType)
+            ):
+                raise NotImplementedError(
+                    f"{function_name} with interval types is not supported"
+                )
             field_lit: str | None = unwrap_literal(exp.unresolved_function.arguments[0])
             if field_lit is None:
@@ -3239,7 +3336,7 @@ def map_unresolved_function(
             # TODO: See the spark-compatibility-issues.md explanation, this is quite different from Spark.
             # MapType columns as input should raise an exception as they are not hashable.
             snowflake_compat = get_boolean_session_config_param(
-                "enable_snowflake_extension_behavior"
+                "snowpark.connect.enable_snowflake_extension_behavior"
             )
             # Snowflake's hash function does allow MAP types, but Spark does not. Therefore, if we have the expansion flag enabled
             # we want to let it pass through and hash MAP types.
@@ -3746,10 +3843,21 @@ def map_unresolved_function(
                 snowpark_fn.lit(is_outer),
             )
         case "input_file_name":
-            # TODO: Use METADATA$FILENAME to get an actual file name, but this requires a refactor, because this is not possible straightaway here.
-            raise SnowparkConnectNotImplementedError(
-                "input_file_name is not yet supported."
+            # Return the filename metadata column for file-based DataFrames
+            # If METADATA$FILENAME doesn't exist (e.g., for DataFrames created from local data),
+            # return empty string to match Spark's behavior
+            from snowflake.snowpark_connect.relation.read.metadata_utils import (
+                METADATA_FILENAME_COLUMN,
             )
+            available_columns = column_mapping.get_snowpark_columns()
+            if METADATA_FILENAME_COLUMN in available_columns:
+                result_exp = snowpark_fn.col(METADATA_FILENAME_COLUMN)
+            else:
+                # Return empty when METADATA$FILENAME column doesn't exist, matching Spark behavior
+                result_exp = snowpark_fn.lit("").cast(StringType())
+            result_type = StringType()
+            spark_function_name = "input_file_name()"
         case "instr":
             result_exp = snowpark_fn.charindex(snowpark_args[1], snowpark_args[0])
             result_type = LongType()
@@ -4709,7 +4817,7 @@ def map_unresolved_function(
             )
         case "md5":
             snowflake_compat = get_boolean_session_config_param(
-                "enable_snowflake_extension_behavior"
+                "snowpark.connect.enable_snowflake_extension_behavior"
             )
             # MD5 in Spark only accepts BinaryType or types that can be implicitly cast to it (StringType)
@@ -5283,9 +5391,14 @@ def map_unresolved_function(
             result_exp = snowpark_fn.function(function_name)(
                 _check_percentile_percentage(exp.unresolved_function.arguments[1])
             ).within_group(snowpark_args[0])
-            result_exp = TypedColumn(
-                snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
+            result_exp = (
+                TypedColumn(
+                    snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
+                )
+                if not is_window_enabled()
+                else TypedColumnWithDeferredCast(result_exp, lambda: [DoubleType()])
             )
             spark_function_name = f"{function_name}({unwrap_literal(exp.unresolved_function.arguments[1])}) WITHIN GROUP (ORDER BY {snowpark_arg_names[0]})"
         case "pi":
             spark_function_name = "PI()"
@@ -7504,6 +7617,12 @@ def map_unresolved_function(
                 )
             result_type = DateType()
         case "try_add":
+            # Check for interval types and throw NotImplementedError
+            for arg in snowpark_typed_args:
+                if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
+                    raise NotImplementedError(
+                        "try_add with interval types is not supported"
+                    )
             result_exp = _try_arithmetic_helper(snowpark_typed_args, snowpark_args, 0)
             result_exp = _type_with_typer(result_exp)
         case "try_aes_decrypt":
@@ -7557,6 +7676,12 @@ def map_unresolved_function(
                         DoubleType(), cleaned, calculating_avg=True
                     )
         case "try_divide":
+            # Check for interval types and throw NotImplementedError
+            for arg in snowpark_typed_args:
+                if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
+                    raise NotImplementedError(
+                        "try_divide with interval types is not supported"
+                    )
             match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
                 case (NullType(), t) | (t, NullType()):
                     result_exp = snowpark_fn.lit(None)
@@ -7580,63 +7705,20 @@ def map_unresolved_function(
                     )
                     | (DecimalType(), DecimalType())
                 ):
-                    # compute new precision and scale using correct decimal division rules
-                    if isinstance(
-                        snowpark_typed_args[0].typ, DecimalType
-                    ) and isinstance(snowpark_typed_args[1].typ, DecimalType):
-                        s1, p1 = (
-                            snowpark_typed_args[0].typ.scale,
-                            snowpark_typed_args[0].typ.precision,
-                        )
-                        s2, p2 = (
-                            snowpark_typed_args[1].typ.scale,
-                            snowpark_typed_args[1].typ.precision,
-                        )
-                        # The scale and precision formula that Spark follows for DecimalType
-                        # arithmetic operations can be found in the following Spark source
-                        # code file:
-                        # https://github.com/apache/spark/blob/a584cc48ef63fefb2e035349c8684250f8b936c4/docs/sql-ref-ansi-compliance.md
-                        new_scale = max(6, s1 + p2 + 1)
-                        new_precision = p1 - s1 + s2 + new_scale
-                    elif isinstance(snowpark_typed_args[0].typ, DecimalType):
-                        s1, p1 = (
-                            snowpark_typed_args[0].typ.scale,
-                            snowpark_typed_args[0].typ.precision,
-                        )
-                        # INT is treated as Decimal(10, 0)
-                        new_scale = max(6, s1 + 11)
-                        new_precision = p1 - s1 + new_scale
-                    else:  # right is DecimalType
-                        s2, p2 = (
-                            snowpark_typed_args[1].typ.scale,
-                            snowpark_typed_args[1].typ.precision,
-                        )
-                        # INT is treated as Decimal(10, 0)
-                        new_scale = max(6, 11 + p2)
-                        new_precision = (
-                            10 - 0 + s2 + new_scale
-                        )  # INT has precision 10, scale 0
-                    # apply precision cap
-                    if new_precision > 38:
-                        new_scale -= new_precision - 38
-                        new_precision = 38
-                        new_scale = max(new_scale, 6)
-                    left_double = snowpark_fn.cast(snowpark_args[0], DoubleType())
-                    right_double = snowpark_fn.cast(snowpark_args[1], DoubleType())
-                    quotient = snowpark_fn.when(
-                        snowpark_args[1] == 0, snowpark_fn.lit(None)
-                    ).otherwise(left_double / right_double)
-                    quotient = snowpark_fn.cast(quotient, StringType())
+                    p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
+                    p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
+                    result_type, overflow_possible = _get_decimal_division_result_type(
+                        p1, s1, p2, s2
+                    )
-                    result_exp = _try_cast_helper(
-                        quotient, DecimalType(new_precision, new_scale)
+                    result_exp = _arithmetic_operation(
+                        snowpark_typed_args[0],
+                        snowpark_typed_args[1],
+                        lambda x, y: _divnull(x, y),
+                        overflow_possible,
+                        False,
+                        result_type,
                     )
-                    result_type = DecimalType(new_precision, new_scale)
                 case (_NumericType(), _NumericType()):
                     result_exp = snowpark_fn.when(
                         snowpark_args[1] == 0, snowpark_fn.lit(None)
@@ -7708,6 +7790,12 @@ def map_unresolved_function(
                         f"Expected either (ArrayType, IntegralType) or (MapType, StringType), got {snowpark_typed_args[0].typ}, {snowpark_typed_args[1].typ}."
                     )
         case "try_multiply":
+            # Check for interval types and throw NotImplementedError
+            for arg in snowpark_typed_args:
+                if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
+                    raise NotImplementedError(
+                        "try_multiply with interval types is not supported"
+                    )
             match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
                 case (NullType(), t) | (t, NullType()):
                     result_exp = snowpark_fn.lit(None)
@@ -7749,42 +7837,21 @@ def map_unresolved_function(
                     )
                     | (DecimalType(), DecimalType())
                 ):
-                    # figure out what precision to use as the overflow amount
-                    if isinstance(
-                        snowpark_typed_args[0].typ, DecimalType
-                    ) and isinstance(snowpark_typed_args[1].typ, DecimalType):
-                        new_precision = (
-                            snowpark_typed_args[0].typ.precision
-                            + snowpark_typed_args[1].typ.precision
-                            + 1
-                        )
-                        new_scale = (
-                            snowpark_typed_args[0].typ.scale
-                            + snowpark_typed_args[1].typ.scale
-                        )
-                    elif isinstance(snowpark_typed_args[0].typ, DecimalType):
-                        new_precision = snowpark_typed_args[0].typ.precision + 11
-                        new_scale = snowpark_typed_args[0].typ.scale
-                    else:
-                        new_precision = snowpark_typed_args[1].typ.precision + 11
-                        new_scale = snowpark_typed_args[1].typ.scale
-                    # truncating down appropriately
-                    if new_precision > 38:
-                        new_precision = 38
-                    if new_scale > new_precision:
-                        new_scale = new_precision
-                    left_double = snowpark_fn.cast(snowpark_args[0], DoubleType())
-                    right_double = snowpark_fn.cast(snowpark_args[1], DoubleType())
-                    product = left_double * right_double
-                    product = snowpark_fn.cast(product, StringType())
-                    result_exp = _try_cast_helper(
-                        product, DecimalType(new_precision, new_scale)
+                    p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
+                    p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
+                    (
+                        result_type,
+                        overflow_possible,
+                    ) = _get_decimal_multiplication_result_type(p1, s1, p2, s2)
+                    result_exp = _arithmetic_operation(
+                        snowpark_typed_args[0],
+                        snowpark_typed_args[1],
+                        lambda x, y: x * y,
+                        overflow_possible,
+                        False,
+                        result_type,
                     )
-                    result_type = DecimalType(new_precision, new_scale)
                 case (_NumericType(), _NumericType()):
                     result_exp = snowpark_args[0] * snowpark_args[1]
                     result_exp = _type_with_typer(result_exp)
@@ -7827,6 +7894,12 @@ def map_unresolved_function(
                     snowpark_typed_args[0].typ, snowpark_args[0]
                 )
         case "try_subtract":
+            # Check for interval types and throw NotImplementedError
+            for arg in snowpark_typed_args:
+                if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
+                    raise NotImplementedError(
+                        "try_subtract with interval types is not supported"
+                    )
             result_exp = _try_arithmetic_helper(snowpark_typed_args, snowpark_args, 1)
             result_exp = _type_with_typer(result_exp)
         case "try_to_number":
@@ -8391,20 +8464,10 @@ def map_unresolved_function(
     return spark_col_names, typed_col
-def _cast_helper(column: Column, to: DataType) -> Column:
-    if global_config.spark_sql_ansi_enabled:
-        column_mediator = (
-            snowpark_fn.cast(column, StringType())
-            if isinstance(to, DecimalType)
-            else column
-        )
-        return snowpark_fn.cast(column_mediator, to)
-    else:
-        return _try_cast_helper(column, to)
 def _try_cast_helper(column: Column, to: DataType) -> Column:
     """
+    DEPRECATED because of performance issues
     Attempts to cast a given column to a specified data type using the same behaviour as Spark.
     Args:
@@ -9600,71 +9663,109 @@ def _decimal_add_sub_result_type_helper(p1, s1, p2, s2):
     return result_precision, min_scale, return_type_precision, return_type_scale
-def _get_decimal_multiplication_result_exp(
-    result_type: DecimalType | DataType,
-    other_type: DataType,
-    snowpark_args: list[Column],
-) -> Column:
-    if global_config.spark_sql_ansi_enabled:
-        result_exp = snowpark_args[0] * snowpark_args[1]
-    else:
-        if isinstance(other_type, _IntegralType):
-            result_exp = snowpark_args[0].cast(result_type) * snowpark_args[1].cast(
-                result_type
-            )
-        else:
-            result_exp = snowpark_args[0].cast(DoubleType()) * snowpark_args[1].cast(
-                DoubleType()
-            )
-        result_exp = _try_cast_helper(result_exp, result_type)
-    return result_exp
-def _get_decimal_multiplication_result_type(p1, s1, p2, s2) -> DecimalType:
+def _get_decimal_multiplication_result_type(p1, s1, p2, s2) -> tuple[DecimalType, bool]:
     result_precision = p1 + p2 + 1
     result_scale = s1 + s2
+    overflow_possible = False
     if result_precision > 38:
+        overflow_possible = True
         if result_scale > 6:
             overflow = result_precision - 38
             result_scale = max(6, result_scale - overflow)
         result_precision = 38
-    return DecimalType(result_precision, result_scale)
+    return DecimalType(result_precision, result_scale), overflow_possible
-def _get_decimal_division_result_exp(
-    result_type: DecimalType | DataType,
-    other_type: DataType,
-    overflow_detected: bool,
-    snowpark_args: list[Column],
-    spark_function_name: str,
+def _arithmetic_operation(
+    arg1: TypedColumn,
+    arg2: TypedColumn,
+    op: Callable[[Column, Column], Column],
+    overflow_possible: bool,
+    should_raise_on_overflow: bool,
+    target_type: DecimalType,
 ) -> Column:
-    if (
-        isinstance(other_type, DecimalType)
-        and overflow_detected
-        and global_config.spark_sql_ansi_enabled
-    ):
-        raise ArithmeticException(
-            f'[NUMERIC_VALUE_OUT_OF_RANGE] {spark_function_name} cannot be represented as Decimal({result_type.precision}, {result_type.scale}). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
+    def _cast_arg(tc: TypedColumn) -> Column:
+        _, s = _get_type_precision(tc.typ)
+        typ = (
+            DoubleType()
+            if s > 0
+            or (
+                isinstance(tc.typ, _FractionalType)
+                and not isinstance(tc.typ, DecimalType)
+            )
+            else LongType()
+        )
+        return tc.col.cast(typ)
+    op_for_overflow_check = op(arg1.col.cast(DoubleType()), arg2.col.cast(DoubleType()))
+    safe_op = op(_cast_arg(arg1), _cast_arg(arg2))
+    if overflow_possible:
+        return _cast_arithmetic_operation_result(
+            op_for_overflow_check, safe_op, target_type, should_raise_on_overflow
         )
     else:
-        dividend = snowpark_args[0].cast(DoubleType())
-        divisor = snowpark_args[1]
-        result_exp = _divnull(dividend, divisor)
-        result_exp = _cast_helper(result_exp, result_type)
-    return result_exp
+        return op(arg1.col, arg2.col).cast(target_type)
+def _cast_arithmetic_operation_result(
+    overflow_check_expr: Column,
+    result_expr: Column,
+    target_type: DecimalType,
+    should_raise_on_overflow: bool,
+) -> Column:
+    """
+    Casts an arithmetic operation result to the target decimal type with overflow detection.
+    This function uses a dual-expression approach for robust overflow handling:
+    Args:
+        overflow_check_expr: Arithmetic expression using DoubleType operands for overflow detection.
+                           This expression is used ONLY for boundary checking against the target
+                           decimal's min/max values. DoubleType preserves the magnitude of large
+                           intermediate results that might overflow in decimal arithmetic.
+        result_expr: Arithmetic expression using safer operand types (LongType for integers,
+                    DoubleType for fractionals) for the actual result computation.
+        target_type: Target DecimalType to cast the result to.
+        should_raise_on_overflow: If True raises ArithmeticException on overflow, if False, returns NULL on overflow.
+    """
+    def create_overflow_handler(min_val, max_val, type_name: str):
+        if should_raise_on_overflow:
+            raise_error = _raise_error_helper(target_type, ArithmeticException)
+            return snowpark_fn.when(
+                (overflow_check_expr < snowpark_fn.lit(min_val))
+                | (overflow_check_expr > snowpark_fn.lit(max_val)),
+                raise_error(
+                    snowpark_fn.lit(
+                        f'[NUMERIC_VALUE_OUT_OF_RANGE] Value cannot be represented as {type_name}. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
+                    )
+                ),
+            ).otherwise(result_expr.cast(target_type))
+        else:
+            return snowpark_fn.when(
+                (overflow_check_expr < snowpark_fn.lit(min_val))
+                | (overflow_check_expr > snowpark_fn.lit(max_val)),
+                snowpark_fn.lit(None),
+            ).otherwise(result_expr.cast(target_type))
+    precision = target_type.precision
+    scale = target_type.scale
+    max_val = (10**precision - 1) / (10**scale)
+    min_val = -max_val
+    return create_overflow_handler(min_val, max_val, f"DECIMAL({precision},{scale})")
 def _get_decimal_division_result_type(p1, s1, p2, s2) -> tuple[DecimalType, bool]:
-    overflow_detected = False
+    overflow_possible = False
     result_scale = max(6, s1 + p2 + 1)
     result_precision = p1 - s1 + s2 + result_scale
     if result_precision > 38:
-        if result_precision > 40:
-            overflow_detected = True
+        overflow_possible = True
         overflow = result_precision - 38
         result_scale = max(6, result_scale - overflow)
         result_precision = 38
-    return DecimalType(result_precision, result_scale), overflow_detected
+    return DecimalType(result_precision, result_scale), overflow_possible
 def _try_arithmetic_helper(
@@ -9778,46 +9879,20 @@ def _try_arithmetic_helper(
             DecimalType(),
             DecimalType(),
         ):
-            p1, s1 = _get_type_precision(typed_args[0].typ)
-            p2, s2 = _get_type_precision(typed_args[1].typ)
-            if isinstance(typed_args[0].typ, _IntegralType) and isinstance(
-                typed_args[1].typ, DecimalType
-            ):
-                new_scale = s2
-                new_precision = max(p2, p1 + s2)
-            elif isinstance(typed_args[0].typ, DecimalType) and isinstance(
-                typed_args[1].typ, _IntegralType
-            ):
-                new_scale = s1
-                new_precision = max(p1, p2 + s1)
-            else:
-                # Both decimal types
-                if operation_type == 1 and s1 == s2:  # subtraction with matching scales
-                    new_scale = s1
-                    max_integral_digits = max(p1 - s1, p2 - s2)
-                    new_precision = max_integral_digits + new_scale
-                else:
-                    new_scale = max(s1, s2)
-                    max_integral_digits = max(p1 - s1, p2 - s2)
-                    new_precision = max_integral_digits + new_scale + 1
-            # Overflow check
-            if new_precision > 38:
-                if global_config.spark_sql_ansi_enabled:
-                    raise ArithmeticException(
-                        f'[NUMERIC_VALUE_OUT_OF_RANGE] Precision {new_precision} exceeds maximum allowed precision of 38. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
-                    )
-                return snowpark_fn.lit(None)
-            left_operand, right_operand = snowpark_args[0], snowpark_args[1]
-            result = (
-                left_operand + right_operand
-                if operation_type == 0
-                else left_operand - right_operand
+            result_type, overflow_possible = _get_add_sub_result_type(
+                typed_args[0].typ,
+                typed_args[1].typ,
+                "try_add" if operation_type == 0 else "try_subtract",
+            )
+            return _arithmetic_operation(
+                typed_args[0],
+                typed_args[1],
+                lambda x, y: x + y if operation_type == 0 else x - y,
+                overflow_possible,
+                False,
+                result_type,
             )
-            return snowpark_fn.cast(result, DecimalType(new_precision, new_scale))
         # If either of the inputs is floating point, we can just let it go through to Snowflake, where overflow
         # matches Spark and goes to inf.
@@ -9863,7 +9938,8 @@ def _get_add_sub_result_type(
     type1: DataType,
     type2: DataType,
     spark_function_name: str,
-) -> DataType:
+) -> tuple[DataType, bool]:
+    overflow_possible = False
     result_type = _find_common_type([type1, type2])
     match result_type:
         case DecimalType():
@@ -9872,6 +9948,7 @@ def _get_add_sub_result_type(
             result_scale = max(s1, s2)
             result_precision = max(p1 - s1, p2 - s2) + result_scale + 1
             if result_precision > 38:
+                overflow_possible = True
                 if result_scale > 6:
                     overflow = result_precision - 38
                     result_scale = max(6, result_scale - overflow)
@@ -9900,7 +9977,71 @@ def _get_add_sub_result_type(
             raise AnalysisException(
                 f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "BOOLEAN".',
             )
-    return result_type
+    return result_type, overflow_possible
+def _get_interval_type_name(
+    interval_type: Union[YearMonthIntervalType, DayTimeIntervalType]
+) -> str:
+    """Get the formatted interval type name for error messages."""
+    if isinstance(interval_type, YearMonthIntervalType):
+        if interval_type.start_field == 0 and interval_type.end_field == 0:
+            return "INTERVAL YEAR"
+        elif interval_type.start_field == 1 and interval_type.end_field == 1:
+            return "INTERVAL MONTH"
+        else:
+            return "INTERVAL YEAR TO MONTH"
+    else:  # DayTimeIntervalType
+        if interval_type.start_field == 0 and interval_type.end_field == 0:
+            return "INTERVAL DAY"
+        elif interval_type.start_field == 1 and interval_type.end_field == 1:
+            return "INTERVAL HOUR"
+        elif interval_type.start_field == 2 and interval_type.end_field == 2:
+            return "INTERVAL MINUTE"
+        elif interval_type.start_field == 3 and interval_type.end_field == 3:
+            return "INTERVAL SECOND"
+        else:
+            return "INTERVAL DAY TO SECOND"
+def _check_interval_string_comparison(
+    operator: str, snowpark_typed_args: List[TypedColumn], snowpark_arg_names: List[str]
+) -> None:
+    """Check for invalid interval-string comparisons and raise AnalysisException if found."""
+    if (
+        isinstance(
+            snowpark_typed_args[0].typ, (YearMonthIntervalType, DayTimeIntervalType)
+        )
+        and isinstance(snowpark_typed_args[1].typ, StringType)
+        or isinstance(snowpark_typed_args[0].typ, StringType)
+        and isinstance(
+            snowpark_typed_args[1].typ, (YearMonthIntervalType, DayTimeIntervalType)
+        )
+    ):
+        # Format interval type name for error message
+        interval_type = (
+            snowpark_typed_args[0].typ
+            if isinstance(
+                snowpark_typed_args[0].typ, (YearMonthIntervalType, DayTimeIntervalType)
+            )
+            else snowpark_typed_args[1].typ
+        )
+        interval_name = _get_interval_type_name(interval_type)
+        left_type = (
+            "STRING"
+            if isinstance(snowpark_typed_args[0].typ, StringType)
+            else interval_name
+        )
+        right_type = (
+            "STRING"
+            if isinstance(snowpark_typed_args[1].typ, StringType)
+            else interval_name
+        )
+        raise AnalysisException(
+            f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} {operator} {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{left_type}" and "{right_type}").;'
+        )
 def _get_spark_function_name(
@@ -9944,6 +10085,21 @@ def _get_spark_function_name(
                 return f"{date_param_name1} {operation_op} {snowpark_arg_names[1]}"
             else:
                 return f"{operation_func}(cast({date_param_name1} as date), cast({snowpark_arg_names[1]} as double))"
+        case (DateType(), DayTimeIntervalType()) | (
+            DateType(),
+            YearMonthIntervalType(),
+        ):
+            date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
+            return f"{date_param_name1} {operation_op} {snowpark_arg_names[1]}"
+        case (DayTimeIntervalType(), DateType()) | (
+            YearMonthIntervalType(),
+            DateType(),
+        ):
+            date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
+            if function_name == "+":
+                return f"{date_param_name2} {operation_op} {snowpark_arg_names[0]}"
+            else:
+                return default_spark_function_name
         case (DateType() as dt, _) | (_, DateType() as dt):
             date_param_index = 0 if dt == col1.typ else 1
             date_param_name = _get_literal_param_name(