PyPI - snowpark-connect - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show

snowflake/snowpark_connect/error/error_utils.py CHANGED Viewed

@@ -12,6 +12,7 @@ https://github.com/apache/spark/blob/master/common/utils/src/main/resources/erro
 import json
 import pathlib
 import re
+import threading
 import traceback
 import jpype
@@ -35,9 +36,12 @@ from snowflake.core.exceptions import NotFoundError
 from snowflake.connector.errors import ProgrammingError
 from snowflake.snowpark.exceptions import SnowparkClientException, SnowparkSQLException
-from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
 from snowflake.snowpark_connect.error.error_mapping import ERROR_MAPPINGS_JSON
+# Thread-local storage for custom error codes when we can't attach them directly to exceptions
+_thread_local = threading.local()
 # The JSON string in error_mapping.py is a copy of https://github.com/apache/spark/blob/master/common/utils/src/main/resources/error/error-conditions.json.
 # The file doesn't have to be synced with spark latest main. Just update it when required.
 current_dir = pathlib.Path(__file__).parent.resolve()
@@ -81,6 +85,21 @@ invalid_bit_pattern = re.compile(
 )
+def attach_custom_error_code(exception: Exception, custom_error_code: int) -> Exception:
+    """
+    Attach a custom error code to any exception instance.
+    This allows us to add custom error codes to existing PySpark exceptions.
+    """
+    if not hasattr(exception, "custom_error_code"):
+        try:
+            exception.custom_error_code = custom_error_code
+        except (AttributeError, TypeError):
+            # Some exception types (like Java exceptions) don't allow setting custom attributes
+            # Store the error code in thread-local storage for later retrieval
+            _thread_local.pending_error_code = custom_error_code
+    return exception
 def contains_udtf_select(sql_string):
     # This function tries to detect if the SQL string contains a UDTF (User Defined Table Function) call.
     # Looks for select FROM TABLE(...) or FROM ( TABLE(...) )
@@ -100,20 +119,29 @@ def _get_converted_known_sql_or_custom_exception(
     # custom exception
     if "[snowpark_connect::invalid_array_index]" in msg:
-        return ArrayIndexOutOfBoundsException(
+        exception = ArrayIndexOutOfBoundsException(
             message='The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
         )
+        attach_custom_error_code(exception, ErrorCodes.ARRAY_INDEX_OUT_OF_BOUNDS)
+        return exception
     if "[snowpark_connect::invalid_index_of_zero]" in msg:
-        return SparkRuntimeException(
+        exception = SparkRuntimeException(
             message="[INVALID_INDEX_OF_ZERO] The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)."
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        return exception
     if "[snowpark_connect::invalid_index_of_zero_in_slice]" in msg:
-        return SparkRuntimeException(
+        exception = SparkRuntimeException(
             message="Unexpected value for start in function slice: SQL array indices start at 1."
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        return exception
     invalid_bit = invalid_bit_pattern.search(msg)
     if invalid_bit:
-        return IllegalArgumentException(message=invalid_bit.group(0))
+        exception = IllegalArgumentException(message=invalid_bit.group(0))
+        attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
+        return exception
     match = snowpark_connect_exception_pattern.search(
         ex.message if hasattr(ex, "message") else str(ex)
     )
@@ -125,71 +153,136 @@ def _get_converted_known_sql_or_custom_exception(
             if class_name
             else SparkConnectGrpcException
         )
-        return exception_class(message=message)
+        exception = exception_class(message=message)
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        return exception
     if "select with no columns" in msg and contains_udtf_select(query):
         # We try our best to detect if the SQL string contains a UDTF call and the output schema is empty.
-        return PythonException(message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}")
+        exception = PythonException(
+            message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}"
+        )
+        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+        return exception
     # known sql exception
     if ex.sql_error_code not in (100038, 100037, 100035, 100357):
         return None
     if "(22018): numeric value" in msg:
-        return NumberFormatException(
+        exception = NumberFormatException(
             message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+        return exception
     if "(22018): boolean value" in msg:
-        return SparkRuntimeException(
+        exception = SparkRuntimeException(
             message='[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error.'
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+        return exception
     if "(22007): timestamp" in msg:
-        return AnalysisException(
+        exception = AnalysisException(
             "[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Data type mismatch"
         )
+        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+        return exception
     if getattr(ex, "sql_error_code", None) == 100357:
         if re.search(init_multi_args_exception_pattern, msg):
-            return PythonException(
+            exception = PythonException(
                 message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the init method {ex.message}"
             )
+            attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+            return exception
         if re.search(terminate_multi_args_exception_pattern, msg):
-            return PythonException(
+            exception = PythonException(
                 message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the terminate method: {ex.message}"
             )
+            attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+            return exception
         if "failed to split string, provided pattern:" in msg:
-            return IllegalArgumentException(
+            exception = IllegalArgumentException(
                 message=f"Failed to split string using provided pattern. {ex.message}"
             )
+            attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+            return exception
         if "100357" in msg and "wrong tuple size for returned value" in msg:
-            return PythonException(
+            exception = PythonException(
                 message=f"[UDTF_RETURN_SCHEMA_MISMATCH] The number of columns in the result does not match the specified schema. {ex.message}"
             )
+            attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+            return exception
         if "100357 (p0000): python interpreter error:" in msg:
             if "in eval" in msg:
-                return PythonException(
+                exception = PythonException(
                     message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'eval' method: error. {ex.message}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                return exception
             if "in terminate" in msg:
-                return PythonException(
+                exception = PythonException(
                     message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the 'terminate' method: terminate error. {ex.message}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                return exception
             if "object is not iterable" in msg and contains_udtf_select(query):
-                return PythonException(
+                exception = PythonException(
                     message=f"[UDTF_RETURN_NOT_ITERABLE] {ex.message}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                return exception
-            return PythonException(message=f"{ex.message}")
+            exception = PythonException(message=f"{ex.message}")
+            attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+            return exception
     return None
+def _sanitize_custom_error_message(msg):
+    if "[snowpark_connect::unsupported_operation]" in msg:
+        return (
+            msg.replace("[snowpark_connect::unsupported_operation] ", ""),
+            ErrorCodes.UNSUPPORTED_OPERATION,
+        )
+    if "[snowpark_connect::internal_error]" in msg:
+        return (
+            msg.replace("[snowpark_connect::internal_error] ", ""),
+            ErrorCodes.INTERNAL_ERROR,
+        )
+    if "[snowpark_connect::invalid_operation]" in msg:
+        return (
+            msg.replace("[snowpark_connect::invalid_operation] ", ""),
+            ErrorCodes.INVALID_OPERATION,
+        )
+    if "[snowpark_connect::type_mismatch]" in msg:
+        return (
+            msg.replace("[snowpark_connect::type_mismatch] ", ""),
+            ErrorCodes.TYPE_MISMATCH,
+        )
+    if "[snowpark_connect::invalid_input]" in msg:
+        return (
+            msg.replace("[snowpark_connect::invalid_input] ", ""),
+            ErrorCodes.INVALID_INPUT,
+        )
+    if "[snowpark_connect::unsupported_type]" in msg:
+        return (
+            msg.replace("[snowpark_connect::unsupported_type] ", ""),
+            ErrorCodes.UNSUPPORTED_TYPE,
+        )
+    return msg, None
 def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
+    # Lazy import to avoid circular dependency
+    from snowflake.snowpark_connect.config import global_config
     include_stack_trace = (
         global_config.get("spark.sql.pyspark.jvmStacktrace.enabled")
         if hasattr(global_config, "spark.sql.pyspark.jvmStacktrace.enabled")
@@ -211,6 +304,7 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
                         error_class="DIVIDE_BY_ZERO",
                         message_parameters={"config": '"spark.sql.ansi.enabled"'},
                     )
+                    attach_custom_error_code(ex, ErrorCodes.DIVISION_BY_ZERO)
                 elif ex.sql_error_code in (100096, 100040):
                     # Spark seems to want the Java base class instead of org.apache.spark.sql.SparkDateTimeException
                     # which is what should really be thrown
@@ -299,14 +393,40 @@ def build_grpc_error_response(ex: Exception) -> status_pb2.Status:
             domain="snowflake.sas",
         )
-    detail = any_pb2.Any()
-    detail.Pack(error_info)
     if message is None:
         message = str(ex)
+    custom_error_code = None
+    # attach error code using visa exception message
+    message, custom_error_code_from_msg = _sanitize_custom_error_message(message)
+    # Check if exception already has a custom error code, if not add INTERNAL_ERROR as default
+    if not hasattr(ex, "custom_error_code") or ex.custom_error_code is None:
+        attach_custom_error_code(
+            ex,
+            ErrorCodes.INTERNAL_ERROR
+            if custom_error_code_from_msg is None
+            else custom_error_code_from_msg,
+        )
+    # Get the custom error code from the exception or thread-local storage
+    custom_error_code = getattr(ex, "custom_error_code", None) or getattr(
+        _thread_local, "pending_error_code", None
+    )
+    # Clear thread-local storage after retrieving the error code
+    if hasattr(_thread_local, "pending_error_code"):
+        delattr(_thread_local, "pending_error_code")
+    separator = "==========================================="
+    error_code_added_message = f"\n{separator}\nSNOWPARK CONNECT ERROR CODE: {custom_error_code}\n{separator}\n{message}"
+    detail = any_pb2.Any()
+    detail.Pack(error_info)
     rich_status = status_pb2.Status(
-        code=code_pb2.INTERNAL, message=message, details=[detail]
+        code=code_pb2.INTERNAL, message=error_code_added_message, details=[detail]
     )
     return rich_status

snowflake/snowpark_connect/error/exceptions.py CHANGED Viewed

@@ -2,27 +2,36 @@
 # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
 #
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
 class SnowparkConnectException(Exception):
     """Parent class to all SnowparkConnect related exceptions."""
-    def __init__(self, *args, **kwargs) -> None:
+    def __init__(self, *args, custom_error_code=None, **kwargs) -> None:
         super().__init__(*args, **kwargs)
+        self.custom_error_code = custom_error_code
 class MissingDatabase(SnowparkConnectException):
-    def __init__(self) -> None:
+    def __init__(self, custom_error_code=None) -> None:
         super().__init__(
             "No default database found in session",
+            custom_error_code=custom_error_code or ErrorCodes.MISSING_DATABASE,
         )
 class MissingSchema(SnowparkConnectException):
-    def __init__(self) -> None:
+    def __init__(self, custom_error_code=None) -> None:
         super().__init__(
             "No default schema found in session",
+            custom_error_code=custom_error_code or ErrorCodes.MISSING_SCHEMA,
         )
 class MaxRetryExceeded(SnowparkConnectException):
-    ...
+    def __init__(
+        self,
+        message="Maximum retry attempts exceeded",
+    ) -> None:
+        super().__init__(message)

snowflake/snowpark_connect/execute_plan/map_execution_command.py CHANGED Viewed

@@ -11,6 +11,8 @@ from snowflake.snowpark_connect.column_name_handler import ColumnNames
 from snowflake.snowpark_connect.config import global_config, sessions_config
 from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
 from snowflake.snowpark_connect.expression import map_udf
 from snowflake.snowpark_connect.relation import map_udtf
@@ -52,9 +54,11 @@ def _create_column_rename_map(
             new_column_name = (
                 f"{new_column_name}_DEDUP_{column_counts[normalized_name] - 1}"
             )
-            renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
+            renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, set()))
         else:
-            not_renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
+            not_renamed_cols.append(
+                ColumnNames(new_column_name, col.snowpark_name, set())
+            )
     if len(renamed_cols) == 0:
         return {
@@ -207,6 +211,8 @@ def map_execution_command(
             map_udtf.register_udtf(request.plan.command.register_table_function)
         case other:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Command type {other} not implemented"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception

snowflake/snowpark_connect/execute_plan/map_execution_root.py CHANGED Viewed

@@ -21,6 +21,8 @@ from snowflake.snowpark._internal.utils import (
 )
 from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.execute_plan.utils import (
     arrow_table_to_arrow_bytes,
     pandas_to_arrow_batches_bytes,
@@ -56,7 +58,9 @@ def sproc_connector_fetch_arrow_batches_fix(self) -> Iterator[Table]:
     if self._prefetch_hook is not None:
         self._prefetch_hook()
     if self._query_result_format != "arrow":
-        raise NotSupportedError
+        exception = NotSupportedError()
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     return self._result_set._fetch_arrow_batches()

snowflake/snowpark_connect/execute_plan/utils.py CHANGED Viewed

@@ -8,6 +8,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
 from pyspark.sql.pandas.types import _dedup_names
 from snowflake.snowpark import types as sf_types
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.type_mapping import map_snowpark_types_to_pyarrow_types
 from snowflake.snowpark_connect.utils.telemetry import (
     SnowparkConnectNotImplementedError,
@@ -88,9 +90,11 @@ def is_streaming(rel: relation_proto.Relation) -> bool:
             case "html_string":
                 return is_streaming(rel.html_string.input)
             case "cached_remote_relation":
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Cached remote relation not implemented"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "common_inline_user_defined_table_function":
                 return is_streaming(rel.common_inline_user_defined_table_function.input)
             case "fill_na":

snowflake/snowpark_connect/expression/function_defaults.py CHANGED Viewed

@@ -7,6 +7,9 @@ from typing import Any
 import pyspark.sql.connect.proto.expressions_pb2 as expressions_pb2
 import pyspark.sql.connect.proto.types_pb2 as types_pb2
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 @dataclass(frozen=True)
 class DefaultParameter:
@@ -154,7 +157,9 @@ def _create_literal_expression(value: Any) -> expressions_pb2.Expression:
         null_type.null.SetInParent()
         expr.literal.null.CopyFrom(null_type)
     else:
-        raise ValueError(f"Unsupported literal type: {value}")
+        exception = ValueError(f"Unsupported literal type: {value}")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
+        raise exception
     return expr
@@ -189,11 +194,13 @@ def inject_function_defaults(
     # Check if any required params are missing.
     if missing_arg_count > len(defaults):
-        raise ValueError(
+        exception = ValueError(
             f"Function '{function_name}' is missing required arguments. "
             f"Expected {total_args} args, got {current_arg_count}, "
             f"but only {len(defaults)} defaults are defined."
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
+        raise exception
     defaults_to_append = defaults[-missing_arg_count:]
     injected = False

snowflake/snowpark_connect/expression/literal.py CHANGED Viewed

@@ -10,6 +10,8 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
 from tzlocal import get_localzone
 from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.utils.context import get_is_evaluating_sql
 from snowflake.snowpark_connect.utils.telemetry import (
     SnowparkConnectNotImplementedError,
@@ -100,4 +102,8 @@ def get_literal_field_and_name(literal: expressions_proto.Expression.Literal):
         case "null" | None:
             return None, "NULL"
         case other:
-            raise SnowparkConnectNotImplementedError(f"Other Literal Type {other}")
+            exception = SnowparkConnectNotImplementedError(
+                f"Other Literal Type {other}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception

snowflake/snowpark_connect/expression/map_cast.py CHANGED Viewed

@@ -31,6 +31,8 @@ from snowflake.snowpark.types import (
 )
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.type_mapping import (
     map_type_string_to_snowpark_type,
@@ -87,7 +89,9 @@ def map_cast(
             to_type = map_type_string_to_snowpark_type(exp.cast.type_str)
             to_type_str = exp.cast.type_str.upper()
         case _:
-            raise ValueError("No type to cast to")
+            exception = ValueError("No type to cast to")
+            attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+            raise exception
     from_exp = exp.cast.expr
     new_name, typed_column = map_single_column_expression(
@@ -300,9 +304,11 @@ def map_cast(
             else:
                 result_exp = snowpark_fn.try_cast(col, to_type)
         case (StringType(), _):
-            raise AnalysisException(
+            exception = AnalysisException(
                 f"""[DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION] Cannot resolve "{col_name}" due to data type mismatch: cannot cast "{snowpark_to_proto_type(from_type, column_mapping)}" to "{exp.cast.type_str.upper()}".;"""
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+            raise exception
         case _:
             result_exp = snowpark_fn.cast(col, to_type)
@@ -317,9 +323,11 @@ def sanity_check(
     """
     if isinstance(from_type, LongType) and isinstance(to_type, BinaryType):
-        raise NumberFormatException(
+        exception = NumberFormatException(
             f"""[DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION] Cannot resolve "CAST({value} AS BINARY)" due to data type mismatch: cannot cast "BIGINT" to "BINARY" with ANSI mode on."""
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+        raise exception
     if (
         from_type_cast
@@ -329,9 +337,11 @@ def sanity_check(
         if value is not None:
             value = value.strip().lower()
         if value not in {"t", "true", "f", "false", "y", "yes", "n", "no", "0", "1"}:
-            raise SparkRuntimeException(
+            exception = SparkRuntimeException(
                 f"""[CAST_INVALID_INPUT] The value '{value}' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error."""
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+            raise exception
     raise_cast_failure_exception = False
     if isinstance(to_type, _IntegralType):
@@ -351,6 +361,8 @@ def sanity_check(
         except Exception:
             raise_cast_failure_exception = True
     if raise_cast_failure_exception:
-        raise NumberFormatException(
+        exception = NumberFormatException(
             """[CAST_INVALID_INPUT] Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary setting "spark.sql.ansi.enabled" to "false" may bypass this error."""
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
+        raise exception

snowflake/snowpark_connect/expression/map_expression.py CHANGED Viewed

@@ -14,6 +14,8 @@ from snowflake.snowpark import Session
 from snowflake.snowpark._internal.analyzer.expression import UnresolvedAttribute
 from snowflake.snowpark.types import TimestampTimeZone, TimestampType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression import (
     map_extension,
     map_udf,
@@ -62,9 +64,11 @@ def map_alias(
         # Multi-column case: handle like explode("map").alias("key", "value")
         col_names, col = map_expression(alias.expr, column_mapping, typer)
         if len(col_names) != len(list(alias.name)):
-            raise ValueError(
+            exception = ValueError(
                 f"Found the unresolved operator: 'Project [{col_names} AS ({', '.join(list(alias.name))})]. Number of aliases ({len(list(alias.name))}) does not match number of columns ({len(col_names)})"
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         return list(alias.name), col
     name, col = map_single_column_expression(alias.expr, column_mapping, typer)
@@ -226,22 +230,27 @@ def map_expression(
                     | exp.sort_order.SORT_DIRECTION_ASCENDING
                 ):
                     if exp.sort_order.null_ordering == exp.sort_order.SORT_NULLS_LAST:
-                        return [child_name], snowpark_fn.asc_nulls_last(child_column)
+                        col = snowpark_fn.asc_nulls_last(child_column.col)
                     else:
                         # If nulls are not specified or null_ordering is FIRST in the sort order, Spark defaults to nulls
                         # first in the case of ascending sort order.
-                        return [child_name], snowpark_fn.asc_nulls_first(child_column)
+                        col = snowpark_fn.asc_nulls_first(child_column.col)
                 case exp.sort_order.SORT_DIRECTION_DESCENDING:
                     if exp.sort_order.null_ordering == exp.sort_order.SORT_NULLS_FIRST:
-                        return [child_name], snowpark_fn.desc_nulls_first(child_column)
+                        col = snowpark_fn.desc_nulls_first(child_column.col)
                     else:
                         # If nulls are not specified or null_ordering is LAST in the sort order, Spark defaults to nulls
                         # last in the case of descending sort order.
-                        return [child_name], snowpark_fn.desc_nulls_last(child_column)
+                        col = snowpark_fn.desc_nulls_last(child_column.col)
                 case _:
-                    raise ValueError(
+                    exception = ValueError(
                         f"Invalid sort direction {exp.sort_order.direction}"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
+                    )
+                    raise exception
+            return [child_name], TypedColumn(col, lambda: typer.type(col))
         case "unresolved_attribute":
             col_name, col = map_att.map_unresolved_attribute(exp, column_mapping, typer)
             # Check if this is a multi-column regex expansion
@@ -275,6 +284,36 @@ def map_expression(
             )
             return [col_name], col
         case "unresolved_function":
+            from snowflake.snowpark_connect.utils.context import (
+                get_is_processing_order_by,
+            )
+            is_order_by = get_is_processing_order_by()
+            if is_order_by:
+                # For expressions in an order by clause check if we can reuse already-computed column.
+                if exp.unresolved_function.function_name:
+                    func_name = exp.unresolved_function.function_name
+                    available_columns = column_mapping.get_spark_columns()
+                    for col_name in available_columns:
+                        if (
+                            func_name.lower() in col_name.lower()
+                            and "(" in col_name
+                            and ")" in col_name
+                        ):
+                            # This looks like it might be an expression
+                            snowpark_col_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
+                                col_name
+                            )
+                            if snowpark_col_name:
+                                # Optimization applied - reusing already computed column
+                                return [col_name], TypedColumn(
+                                    snowpark_fn.col(snowpark_col_name),
+                                    lambda col_name=snowpark_col_name: typer.type(
+                                        col_name
+                                    ),
+                                )
             return map_func.map_unresolved_function(exp, column_mapping, typer)
         case "unresolved_named_lambda_variable":
             # Validate that this lambda variable is in scope
@@ -293,13 +332,17 @@ def map_expression(
                         col, lambda: typer.type(col)
                     )
                 else:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Cannot resolve variable '{var_name}' within lambda function. "
                         f"Lambda functions can access their own parameters and parent dataframe columns. "
                         f"Current lambda parameters: {current_params}. "
                         f"If '{var_name}' is an outer scope lambda variable from a nested lambda, "
                         f"that is an unsupported feature in Snowflake SQL."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
             col = snowpark_fn.Column(
                 UnresolvedAttribute(exp.unresolved_named_lambda_variable.name_parts[0])
@@ -334,6 +377,8 @@ def map_expression(
         case "update_fields":
             return map_update_fields.map_update_fields(exp, column_mapping, typer)
         case _:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Unsupported expression type {expr_type}"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception

snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl