PyPI - snowpark-connect - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show

snowflake/snowpark_connect/expression/map_extension.py CHANGED Viewed

@@ -13,6 +13,8 @@ from snowflake.snowpark.types import (
     YearMonthIntervalType,
 )
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
 from snowflake.snowpark_connect.utils.context import (
@@ -78,9 +80,11 @@ def map_extension(
             elif value.HasField("unresolved_attribute"):
                 name = "__" + key + "__" + exp_name[0]
             else:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Named argument not supported yet for this input."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             return [name], typed_col
         case "interval_literal":
@@ -152,9 +156,11 @@ def map_extension(
             queries = df.queries["queries"]
             if len(queries) != 1:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Unexpected number of queries: {len(queries)}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             query = f"({queries[0]})"
             match extension.subquery_expression.subquery_type:
@@ -168,7 +174,13 @@ def map_extension(
                     result_type = BooleanType()
                 case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_TABLE_ARG:
                     # TODO: Currently, map_sql.py handles this, so we never end up here.
-                    raise SnowparkConnectNotImplementedError("Unexpected table arg")
+                    exception = SnowparkConnectNotImplementedError(
+                        "Unexpected table arg"
+                    )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 case snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN:
                     cols = [
                         map_expression(e, column_mapping, typer)
@@ -184,14 +196,22 @@ def map_extension(
                     )
                     result_type = BooleanType()
                 case other:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"Unexpected subquery type: {other}"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
             return [name], TypedColumn(result_exp, lambda: [result_type])
         case other:
-            raise SnowparkConnectNotImplementedError(f"Unexpected extension {other}")
+            exception = SnowparkConnectNotImplementedError(
+                f"Unexpected extension {other}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
 def _format_year_month_interval(
@@ -257,6 +277,9 @@ def _format_day_time_interval(
     if is_negative:
         days = -days
+    # Calculate days string representation (handle -0 case)
+    days_str = "-0" if (is_negative and days == 0) else str(days)
     # Format based on the specific start/end field context
     if (
         start_field == DayTimeIntervalType.DAY and end_field == DayTimeIntervalType.DAY
@@ -324,7 +347,10 @@ def _format_day_time_interval(
         start_field == DayTimeIntervalType.HOUR
         and end_field == DayTimeIntervalType.MINUTE
     ):  # HOUR TO MINUTE
-        str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
+        if is_negative:
+            str_value = f"INTERVAL '-{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
+        else:
+            str_value = f"INTERVAL '{_TWO_DIGIT_FORMAT.format(hours)}:{_TWO_DIGIT_FORMAT.format(minutes)}' HOUR TO MINUTE"
     elif (
         start_field == DayTimeIntervalType.HOUR
         and end_field == DayTimeIntervalType.SECOND
@@ -348,21 +374,21 @@ def _format_day_time_interval(
         and end_field == DayTimeIntervalType.SECOND
     ):  # DAY TO SECOND
         if seconds == int(seconds):
-            str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
+            str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
         else:
             seconds_str = _format_seconds_precise(seconds)
-            str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
+            str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
     else:
         # Fallback - use smart formatting like the original literal.py logic
-        if days > 0:
+        if days >= 0:
             if hours == 0 and minutes == 0 and seconds == 0:
                 str_value = f"INTERVAL '{int(days)}' DAY"
             else:
                 if seconds == int(seconds):
-                    str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
+                    str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{_format_time_component(int(seconds))}' DAY TO SECOND"
                 else:
                     seconds_str = _format_seconds_precise(seconds)
-                    str_value = f"INTERVAL '{days} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
+                    str_value = f"INTERVAL '{days_str} {_format_time_component(hours)}:{_format_time_component(minutes)}:{seconds_str}' DAY TO SECOND"
         elif hours > 0:
             if minutes == 0 and seconds == 0:
                 str_value = f"INTERVAL '{_format_time_component(hours)}' HOUR"

snowflake/snowpark_connect/expression/map_sql_expression.py CHANGED Viewed

@@ -18,8 +18,11 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quot
 from snowflake.snowpark.types import DayTimeIntervalType, YearMonthIntervalType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.typed_column import TypedColumn
 from snowflake.snowpark_connect.utils.context import (
+    get_jpype_jclass_lock,
     get_sql_named_arg,
     get_sql_plan,
     get_sql_pos_arg,
@@ -73,17 +76,20 @@ def sql_parser():
 @cache
 def _get_sql_parser():
-    return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
+    with get_jpype_jclass_lock():
+        return jpype.JClass("org.apache.spark.sql.execution.SparkSqlParser")()
 @cache
 def _get_sql_conf():
-    return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
+    with get_jpype_jclass_lock():
+        return jpype.JClass("org.apache.spark.sql.internal.SQLConf")
 @cache
 def _as_java_list():
-    return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
+    with get_jpype_jclass_lock():
+        return jpype.JClass("scala.collection.JavaConverters").seqAsJavaList
 def as_java_list(obj):
@@ -92,7 +98,8 @@ def as_java_list(obj):
 @cache
 def _as_java_map():
-    return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
+    with get_jpype_jclass_lock():
+        return jpype.JClass("scala.collection.JavaConverters").mapAsJavaMap
 def as_java_map(obj):
@@ -253,12 +260,47 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
     class_name = str(exp.getClass().getSimpleName())
     match class_name:
         case "AggregateExpression":
-            func_name = as_java_list(exp.children())[0].nodeName()
+            aggregate_func = as_java_list(exp.children())[0]
+            func_name = aggregate_func.nodeName()
             args = [
                 map_logical_plan_expression(e)
-                for e in as_java_list(as_java_list(exp.children())[0].children())
+                for e in list(as_java_list(aggregate_func.children()))
             ]
-            proto = apply_filter_clause(func_name, args, exp)
+            # Special handling for percentile_cont and percentile_disc
+            # These functions have a 'reverse' property that indicates sort order
+            # Pass it as a 3rd argument (sort_order expression) without modifying children
+            if func_name.lower() in ("percentile_cont", "percentiledisc"):
+                # percentile_cont/disc should always have exactly 2 children: unresolved attribute and percentile value
+                if len(args) != 2:
+                    exception = AssertionError(
+                        f"{func_name} expected 2 args but got {len(args)}"
+                    )
+                    attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                    raise exception
+                reverse = bool(aggregate_func.reverse())
+                direction = (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
+                    if reverse
+                    else expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
+                )
+                sort_order_expr = expressions_proto.Expression(
+                    sort_order=expressions_proto.Expression.SortOrder(
+                        child=args[0],
+                        direction=direction,
+                    )
+                )
+                args.append(sort_order_expr)
+                proto = apply_filter_clause(func_name, [args[0]], exp)
+                # second arg is a literal value and it doesn't make sense to apply filter on it.
+                # also skips filtering on sort_order.
+                proto.unresolved_function.arguments.append(args[1])
+                proto.unresolved_function.arguments.append(sort_order_expr)
+            else:
+                proto = apply_filter_clause(func_name, args, exp)
         case "Alias":
             proto = expressions_proto.Expression(
                 alias=expressions_proto.Expression.Alias(
@@ -275,7 +317,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                     function_name="when",
                     arguments=[
                         map_logical_plan_expression(e)
-                        for e in as_java_list(exp.children())
+                        for e in list(as_java_list(exp.children()))
                     ],
                 )
             )
@@ -289,7 +331,8 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
             )
         case "Coalesce":
             arguments = [
-                map_logical_plan_expression(e) for e in as_java_list(exp.children())
+                map_logical_plan_expression(e)
+                for e in list(as_java_list(exp.children()))
             ]
             proto = expressions_proto.Expression(
@@ -357,7 +400,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                         subquery_type=snowflake_proto.SubqueryExpression.SUBQUERY_TYPE_IN,
                         in_subquery_values=[
                             map_logical_plan_expression(value)
-                            for value in as_java_list(exp.values())
+                            for value in list(as_java_list(exp.values()))
                         ],
                     )
                 )
@@ -366,7 +409,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
         case "LambdaFunction":
             arguments = [
                 map_logical_plan_expression(arg).unresolved_named_lambda_variable
-                for arg in as_java_list(exp.arguments())
+                for arg in list(as_java_list(exp.arguments()))
             ]
             proto = expressions_proto.Expression(
                 lambda_function=expressions_proto.Expression.LambdaFunction(
@@ -380,14 +423,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                     function_name=class_name.lower(),
                     arguments=[
                         map_logical_plan_expression(e)
-                        for e in as_java_list(exp.children())
+                        for e in list(as_java_list(exp.children()))
                     ],
                 )
             )
         case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
-            patterns = as_java_list(exp.patterns())
+            patterns = list(as_java_list(exp.patterns()))
             arguments = [
-                map_logical_plan_expression(e) for e in as_java_list(exp.children())
+                map_logical_plan_expression(e)
+                for e in list(as_java_list(exp.children()))
             ]
             arguments += [map_logical_plan_expression(e) for e in patterns]
             proto = expressions_proto.Expression(
@@ -421,19 +465,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                 end_field = _YEAR_MONTH_FIELD_MAP.get(end_field_name)
                 if start_field is None:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Invalid year-month interval start field: '{start_field_name}'. Expected 'year' or 'month'."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 if end_field is None:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Invalid year-month interval end field: '{end_field_name}'. Expected 'year' or 'month'."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 # Validate field ordering (start_field should be <= end_field)
                 if start_field > end_field:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Invalid year-month interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 # Use extension for year-month intervals to preserve start/end field info
                 literal = expressions_proto.Expression.Literal(
@@ -466,19 +516,25 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                 end_field = _DAY_TIME_FIELD_MAP.get(end_field_name)
                 if start_field is None:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Invalid day-time interval start field: '{start_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 if end_field is None:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Invalid day-time interval end field: '{end_field_name}'. Expected 'day', 'hour', 'minute', or 'second'."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 # Validate field ordering (start_field should be <= end_field)
                 if start_field > end_field:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Invalid day-time interval: start field '{start_field_name}' must come before or equal to end field '{end_field_name}'."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 # Use extension for day-time intervals to preserve start/end field info
                 literal = expressions_proto.Expression.Literal(
@@ -534,19 +590,27 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
             name = str(exp.name())
             value = get_sql_named_arg(name)
             if not value.HasField("literal_type"):
-                raise AnalysisException(f"Found an unbound parameter {name!r}")
+                exception = AnalysisException(f"Found an unbound parameter {name!r}")
+                attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
+                raise exception
             proto = expressions_proto.Expression(literal=value)
         case "NamePlaceholder$":
             # This is a placeholder for an expression name to be resolved later.
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "NamePlaceholder is not supported in SQL expressions."
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         case "PosParameter":
             pos = exp.pos()
             try:
                 value = get_sql_pos_arg(pos)
             except KeyError:
-                raise AnalysisException(f"Found an unbound parameter at position {pos}")
+                exception = AnalysisException(
+                    f"Found an unbound parameter at position {pos}"
+                )
+                attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
+                raise exception
             proto = expressions_proto.Expression(literal=value)
         case "ScalarSubquery":
             rel_proto = map_logical_plan_relation(exp.plan())
@@ -616,7 +680,7 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                     ]
                     + [
                         map_logical_plan_expression(e)
-                        for e in as_java_list(exp.children())
+                        for e in list(as_java_list(exp.children()))
                     ],
                 )
             )
@@ -659,18 +723,20 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
             )
         case "UnresolvedFunction":
             func_name = ".".join(
-                str(part) for part in as_java_list(exp.nameParts())
+                str(part) for part in list(as_java_list(exp.nameParts()))
             ).lower()
             args = [
                 map_logical_plan_expression(arg)
-                for arg in as_java_list(exp.arguments())
+                for arg in list(as_java_list(exp.arguments()))
             ]
             proto = apply_filter_clause(func_name, args, exp, exp.isDistinct())
         case "UnresolvedNamedLambdaVariable":
             proto = expressions_proto.Expression(
                 unresolved_named_lambda_variable=expressions_proto.Expression.UnresolvedNamedLambdaVariable(
-                    name_parts=[str(part) for part in as_java_list(exp.nameParts())],
+                    name_parts=[
+                        str(part) for part in list(as_java_list(exp.nameParts()))
+                    ],
                 )
             )
         case "UnresolvedStar":
@@ -691,9 +757,11 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                 # Build Window expression
                 proto = get_window_expression_proto(window_spec, exp.child())
             else:
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"Window specification not found {window_spec_reference!r}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
+                raise exception
         case "UTF8String":
             proto = expressions_proto.Expression(
                 literal=expressions_proto.Expression.Literal(
@@ -723,13 +791,15 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
                     function_name=proto_func,
                     arguments=[
                         map_logical_plan_expression(arg)
-                        for arg in as_java_list(exp.children())
+                        for arg in list(as_java_list(exp.children()))
                     ],
                 )
             )
         case other:
-            raise SnowparkConnectNotImplementedError(f"Not implemented: {other}")
+            exception = SnowparkConnectNotImplementedError(f"Not implemented: {other}")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     return proto
@@ -752,11 +822,11 @@ def get_window_expression_proto(
             window_function=map_logical_plan_expression(window_function),
             partition_spec=[
                 map_logical_plan_expression(e)
-                for e in as_java_list(window_spec.partitionSpec())
+                for e in list(as_java_list(window_spec.partitionSpec()))
             ],
             order_spec=[
                 map_logical_plan_expression(e).sort_order
-                for e in as_java_list(window_spec.orderSpec())
+                for e in list(as_java_list(window_spec.orderSpec()))
             ],
             frame_spec=frame_spec_proto,
         )

snowflake/snowpark_connect/expression/map_udf.py CHANGED Viewed

@@ -10,6 +10,8 @@ from snowflake import snowpark
 from snowflake.snowpark.types import MapType, StructType, VariantType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
 from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -54,7 +56,11 @@ def cache_external_udf_wrapper(from_register_udf: bool):
                     case "python_udf":
                         pass
                     case _:
-                        raise ValueError(f"Unsupported UDF type: {function_type}")
+                        exception = ValueError(f"Unsupported UDF type: {function_type}")
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
                 return cached_udf
@@ -97,9 +103,11 @@ def register_udf(
         case "scalar_scala_udf":
             output_type = udf_proto.scalar_scala_udf.outputType
         case _:
-            raise ValueError(
+            exception = ValueError(
                 f"Unsupported UDF type: {udf_proto.WhichOneof('function')}"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     processed_return_type, original_return_type = process_udf_return_type(output_type)
     session = get_or_create_snowpark_session()
     kwargs = {

snowflake/snowpark_connect/expression/map_unresolved_attribute.py CHANGED Viewed

@@ -15,6 +15,8 @@ from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.types import ArrayType, LongType, MapType, StructType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
 from snowflake.snowpark_connect.utils.context import (
@@ -85,9 +87,11 @@ def map_unresolved_attribute(
         grouping_spark_columns = get_current_grouping_columns()
         if not grouping_spark_columns:
             # grouping__id can only be used with GROUP BY CUBE/ROLLUP/GROUPING SETS
-            raise AnalysisException(
+            exception = AnalysisException(
                 "[MISSING_GROUP_BY] grouping__id can only be used with GROUP BY (CUBE | ROLLUP | GROUPING SETS)"
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
+            raise exception
         # Convert to GROUPING_ID() function call with the grouping columns
         # Map Spark column names to Snowpark column names
         snowpark_cols = []
@@ -99,9 +103,11 @@ def map_unresolved_attribute(
                 )
             )
             if not snowpark_name:
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"[INTERNAL_ERROR] Cannot find Snowpark column mapping for grouping column '{spark_col_name}'"
                 )
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
             snowpark_cols.append(snowpark_fn.col(snowpark_name))
         # Call GROUPING_ID with all grouping columns using Snowpark names
@@ -155,10 +161,12 @@ def map_unresolved_attribute(
             if is_catalog:
                 # This looks like a catalog.database.column.field pattern
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{original_attr_name}` cannot be resolved. "
                     f"Cross-catalog column references are not supported in DataFrame API."
                 )
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
     attr_name = ".".join(name_parts)
@@ -205,18 +213,24 @@ def map_unresolved_attribute(
                 if compiled_regex.fullmatch(col_name):
                     matched_columns.append(col_name)
         except re.error as e:
-            raise AnalysisException(f"Invalid regex pattern '{regex_pattern}': {e}")
+            exception = AnalysisException(
+                f"Invalid regex pattern '{regex_pattern}': {e}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
+            raise exception
         if not matched_columns:
             # Keep the improved error message for SQL regex patterns
             # This is only hit for SQL queries like SELECT `(e|f)` FROM table
             # when spark.sql.parser.quotedRegexColumnNames is enabled
-            raise AnalysisException(
+            exception = AnalysisException(
                 f"No columns match the regex pattern '{regex_pattern}'. "
                 f"Snowflake SQL does not support SELECT statements with no columns. "
                 f"Please ensure your regex pattern matches at least one column. "
                 f"Available columns: {', '.join(available_columns[:10])}{'...' if len(available_columns) > 10 else ''}"
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+            raise exception
         # When multiple columns match, we need to signal that this should expand to multiple columns
         # Since map_unresolved_attribute can only return one column, we'll use a special marker
@@ -233,7 +247,7 @@ def map_unresolved_attribute(
                 )
             )
             col = get_col(snowpark_name)
-            qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
+            qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
             typed_col = TypedColumn(col, lambda: typer.type(col))
             typed_col.set_qualifiers(qualifiers)
             # Store matched columns info for later use
@@ -248,7 +262,7 @@ def map_unresolved_attribute(
                 )
             )
             col = get_col(snowpark_name)
-            qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
+            qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
             typed_col = TypedColumn(col, lambda: typer.type(col))
             typed_col.set_qualifiers(qualifiers)
             return (matched_columns[0], typed_col)
@@ -266,7 +280,7 @@ def map_unresolved_attribute(
     )
     if snowpark_name is not None:
         col = get_col(snowpark_name)
-        qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_attr_name)
+        qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_attr_name)
     else:
         # this means it has to be a struct column with a field name
         snowpark_name: str | None = None
@@ -324,7 +338,7 @@ def map_unresolved_attribute(
                 )
                 if snowpark_name is not None:
                     col = get_col(snowpark_name)
-                    qualifiers = column_mapping.get_qualifier_for_spark_column(
+                    qualifiers = column_mapping.get_qualifiers_for_spark_column(
                         unqualified_name
                     )
                     typed_col = TypedColumn(col, lambda: typer.type(col))
@@ -346,16 +360,22 @@ def map_unresolved_attribute(
                 )
                 if outer_col_name:
                     # This is an outer scope column being referenced inside a lambda
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Reference to non-lambda variable '{attr_name}' within lambda function. "
                         f"Lambda functions can only access their own parameters. "
                         f"If '{attr_name}' is a table column, it must be passed as an explicit parameter to the enclosing function."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
             if has_plan_id:
-                raise AnalysisException(
+                exception = AnalysisException(
                     f'[RESOLVED_REFERENCE_COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
                 )
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
             else:
                 # Column does not exist. Pass in dummy column name for lazy error throwing as it could be a built-in function
                 snowpark_name = attr_name
@@ -365,9 +385,11 @@ def map_unresolved_attribute(
             col_type = typer.type(col)[0]
         except SnowparkSQLException as e:
             if e.raw_message is not None and "invalid identifier" in e.raw_message:
-                raise AnalysisException(
+                exception = AnalysisException(
                     f'[COLUMN_NOT_FOUND] The column "{attr_name}" does not exist in the target dataframe.'
                 )
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
             else:
                 raise
         is_struct = isinstance(col_type, StructType)
@@ -383,7 +405,7 @@ def map_unresolved_attribute(
         for field_name in path:
             col = col.getItem(field_name)
-        qualifiers = []
+        qualifiers = set()
     typed_col = TypedColumn(col, lambda: typer.type(col))
     typed_col.set_qualifiers(qualifiers)
@@ -416,7 +438,9 @@ def _match_path_to_struct(path: list[str], col_type: StructType) -> list[str]:
             typ = typ.value_type if isinstance(typ, MapType) else typ.element_type
         else:
             # If the type is not a struct, map, or array, we cannot access the field.
-            raise AnalysisException(
+            exception = AnalysisException(
                 f"[INVALID_EXTRACT_BASE_FIELD_TYPE] Can't extract a value from \"{'.'.join(path[:i])}\". Need a complex type [STRUCT, ARRAY, MAP] but got \"{typ}\"."
             )
+            attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+            raise exception
     return adjusted_path

snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl