PyPI - snowpark-connect - Versions diffs - 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

snowpark-connect 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

snowflake/snowpark_connect/expression/map_cast.py CHANGED Viewed

@@ -6,6 +6,7 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
 import pyspark.sql.connect.proto.types_pb2 as types_proto
 from pyspark.errors.exceptions.base import (
     AnalysisException,
+    ArithmeticException,
     IllegalArgumentException,
     NumberFormatException,
     SparkRuntimeException,
@@ -18,7 +19,9 @@ from snowflake.snowpark.types import (
     BooleanType,
     DataType,
     DateType,
+    DecimalType,
     DoubleType,
+    FloatType,
     IntegerType,
     LongType,
     MapType,
@@ -36,6 +39,13 @@ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.error.error_codes import ErrorCodes
 from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
+from snowflake.snowpark_connect.expression.error_utils import raise_error_helper
+from snowflake.snowpark_connect.expression.integral_types_support import (
+    apply_fractional_to_integral_cast,
+    apply_fractional_to_integral_cast_with_ansi_check,
+    apply_integral_overflow_with_ansi_check,
+    get_integral_type_bounds,
+)
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.type_mapping import (
     map_type_string_to_snowpark_type,
@@ -54,7 +64,7 @@ SYMBOL_FUNCTIONS = {"<", ">", "<=", ">=", "!=", "+", "-", "*", "/", "%", "div"}
 CAST_FUNCTIONS = {
     "boolean": types_proto.DataType(boolean=types_proto.DataType.Boolean()),
     "int": types_proto.DataType(integer=types_proto.DataType.Integer()),
-    "smallint": types_proto.DataType(integer=types_proto.DataType.Integer()),
+    "smallint": types_proto.DataType(short=types_proto.DataType.Short()),
     "bigint": types_proto.DataType(long=types_proto.DataType.Long()),
     "tinyint": types_proto.DataType(byte=types_proto.DataType.Byte()),
     "float": types_proto.DataType(float=types_proto.DataType.Float()),
@@ -237,6 +247,11 @@ def map_cast(
         case (_, BooleanType()) if isinstance(from_type, _NumericType):
             result_exp = col.cast(LongType()).cast(to_type)
+        case (_IntegralType(), _IntegralType()):
+            result_exp = apply_integral_overflow_with_ansi_check(
+                col, to_type, spark_sql_ansi_enabled
+            )
         # binary
         case (StringType(), BinaryType()):
             result_exp = snowpark_fn.to_binary(col, "UTF-8")
@@ -272,6 +287,44 @@ def map_cast(
             result_exp = snowpark_fn.to_varchar(col, "UTF-8")
         # numeric
+        case (_, _) if isinstance(from_type, (FloatType, DoubleType)) and isinstance(
+            to_type, _IntegralType
+        ):
+            truncated = (
+                snowpark_fn.when(
+                    col == snowpark_fn.lit(float("nan")), snowpark_fn.lit(0)
+                )
+                .when(col < 0, snowpark_fn.ceil(col))
+                .otherwise(snowpark_fn.floor(col))
+            )
+            if spark_sql_ansi_enabled:
+                result_exp = apply_fractional_to_integral_cast_with_ansi_check(
+                    truncated, to_type, True
+                )
+            else:
+                target_min, target_max = get_integral_type_bounds(to_type)
+                result_exp = (
+                    snowpark_fn.when(
+                        truncated > snowpark_fn.lit(target_max),
+                        snowpark_fn.lit(target_max),
+                    )
+                    .when(
+                        truncated < snowpark_fn.lit(target_min),
+                        snowpark_fn.lit(target_min),
+                    )
+                    .otherwise(truncated.cast(to_type))
+                )
+        case (_, _) if isinstance(from_type, DecimalType) and isinstance(
+            to_type, _IntegralType
+        ):
+            result_exp = snowpark_fn.when(col < 0, snowpark_fn.ceil(col)).otherwise(
+                snowpark_fn.floor(col)
+            )
+            result_exp = result_exp.cast(to_type)
+            result_exp = apply_integral_overflow_with_ansi_check(
+                result_exp, to_type, spark_sql_ansi_enabled
+            )
         case (_, _) if isinstance(from_type, _FractionalType) and isinstance(
             to_type, _IntegralType
         ):
@@ -282,16 +335,49 @@ def map_cast(
                 .when(col < 0, snowpark_fn.ceil(col))
                 .otherwise(snowpark_fn.floor(col))
             )
-            result_exp = result_exp.cast(to_type)
+            result_exp = apply_fractional_to_integral_cast(result_exp, to_type)
         case (StringType(), _) if (isinstance(to_type, _IntegralType)):
             if spark_sql_ansi_enabled:
-                result_exp = snowpark_fn.cast(col, DoubleType())
+                double_val = snowpark_fn.cast(col, DoubleType())
+                target_min, target_max = get_integral_type_bounds(to_type)
+                raise_error = raise_error_helper(to_type, NumberFormatException)
+                to_type_name = to_type.__class__.__name__.upper().replace("TYPE", "")
+                truncated = snowpark_fn.when(
+                    double_val < 0, snowpark_fn.ceil(double_val)
+                ).otherwise(snowpark_fn.floor(double_val))
+                result_exp = snowpark_fn.when(
+                    (truncated < snowpark_fn.lit(target_min))
+                    | (truncated > snowpark_fn.lit(target_max)),
+                    raise_error(
+                        snowpark_fn.lit("[CAST_INVALID_INPUT] The value '"),
+                        col,
+                        snowpark_fn.lit(
+                            f'\' of the type "STRING" cannot be cast to "{to_type_name}" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
+                        ),
+                    ),
+                ).otherwise(truncated.cast(to_type))
             else:
-                result_exp = snowpark_fn.try_cast(col, DoubleType())
-            result_exp = snowpark_fn.when(
-                result_exp < 0, snowpark_fn.ceil(result_exp)
-            ).otherwise(snowpark_fn.floor(result_exp))
-            result_exp = result_exp.cast(to_type)
+                double_val = snowpark_fn.try_cast(col, DoubleType())
+                truncated = snowpark_fn.when(
+                    double_val < 0, snowpark_fn.ceil(double_val)
+                ).otherwise(snowpark_fn.floor(double_val))
+                target_min, target_max = get_integral_type_bounds(to_type)
+                result_exp = (
+                    snowpark_fn.when(
+                        double_val.isNull(), snowpark_fn.lit(None).cast(to_type)
+                    )
+                    .when(
+                        (truncated < snowpark_fn.lit(target_min))
+                        | (truncated > snowpark_fn.lit(target_max)),
+                        snowpark_fn.lit(None).cast(to_type),
+                    )
+                    .otherwise(truncated.cast(to_type))
+                )
         # https://docs.snowflake.com/en/sql-reference/functions/try_cast Only works on certain types (mostly non-structured ones)
         case (StringType(), _) if isinstance(to_type, _NumericType) or isinstance(
             to_type, StringType
@@ -368,10 +454,19 @@ def sanity_check(
         except Exception:
             raise_cast_failure_exception = True
     if raise_cast_failure_exception:
-        exception = NumberFormatException(
-            """[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error."""
-        )
-        attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+        if not isinstance(from_type, StringType) and isinstance(to_type, _IntegralType):
+            from_type_name = from_type.__class__.__name__.upper().replace("TYPE", "")
+            to_type_name = to_type.__class__.__name__.upper().replace("TYPE", "")
+            value_suffix = "L" if isinstance(from_type, LongType) else ""
+            exception = ArithmeticException(
+                f"""[CAST_OVERFLOW] The value {value}{value_suffix} of the type "{from_type_name}" cannot be cast to "{to_type_name}" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error."""
+            )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+        else:
+            exception = NumberFormatException(
+                """[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error."""
+            )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
         raise exception
@@ -386,15 +481,11 @@ def _cast_string_to_year_month_interval(col: Column, to_type: YearMonthIntervalT
     5. 'INTERVAL [+|-]'[+|-]y' YEAR' format - extract the y part
     6. 'INTERVAL [+|-]'[+|-]m' MONTH' format - extract the m part
     """
-    from snowflake.snowpark_connect.expression.map_unresolved_function import (
-        _raise_error_helper,
-    )
     # Extract values from different formats
     value = snowpark_fn.regexp_extract(col, "'([^']+)'", 1)
     years = snowpark_fn.regexp_extract(col, "^[+-]?\\d+", 0)
     months = snowpark_fn.regexp_extract(col, "-(\\d+)$", 1)
-    raise_error = _raise_error_helper(to_type, IllegalArgumentException)
+    raise_error = raise_error_helper(to_type, IllegalArgumentException)
     # For MONTH-only intervals, treat the input as months
     if (

snowflake/snowpark_connect/expression/map_udf.py CHANGED Viewed

@@ -244,6 +244,7 @@ def map_common_inline_user_defined_udf(
         # All Scala UDFs return Variant, so we always need to cast back to the original type
         result_expr = snowpark_fn.cast(udf_call_expr, original_return_type)
         result_type = original_return_type
     elif isinstance(original_return_type, (MapType, StructType)) and isinstance(
         processed_return_type, VariantType
     ):

snowpark-connect 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

snowpark-connect 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl