PyPI - snowpark-connect - Versions diffs - 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

snowpark-connect 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (81) hide show

snowflake/snowpark_connect/relation/map_column_ops.py CHANGED Viewed

@@ -33,7 +33,11 @@ from snowflake.snowpark_connect.column_name_handler import (
 )
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
-from snowflake.snowpark_connect.error.error_utils import SparkException
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import (
+    SparkException,
+    attach_custom_error_code,
+)
 from snowflake.snowpark_connect.expression.map_expression import (
     map_alias,
     map_expression,
@@ -369,56 +373,64 @@ def map_sort(
                 for col in input_container.column_map.get_spark_columns()
             ]
-    for so in sort_order:
-        if so.child.HasField("literal"):
-            column_index = unwrap_literal(so.child)
-            try:
-                if column_index <= 0:
-                    raise IndexError
-                col = input_df[column_index - 1]
-            except IndexError:
-                raise AnalysisException(
-                    f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
+    # Process ORDER BY expressions with a context flag to enable column reuse optimization
+    from snowflake.snowpark_connect.utils.context import push_processing_order_by_scope
+    with push_processing_order_by_scope():
+        for so in sort_order:
+            if so.child.HasField("literal"):
+                column_index = unwrap_literal(so.child)
+                try:
+                    if column_index <= 0:
+                        exception = IndexError()
+                        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                        raise exception
+                    col = input_df[column_index - 1]
+                except IndexError:
+                    exception = AnalysisException(
+                        f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
+                    )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
+            else:
+                _, typed_column = map_single_column_expression(
+                    so.child, input_container.column_map, typer
                 )
-        else:
-            _, typed_column = map_single_column_expression(
-                so.child, input_container.column_map, typer
-            )
-            col = typed_column.col
+                col = typed_column.col
-        match (so.direction, so.null_ordering):
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
-            ):
-                col = col.asc_nulls_first()
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
-            ):
-                col = col.asc_nulls_last()
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
-            ):
-                col = col.desc_nulls_first()
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
-            ):
-                col = col.desc_nulls_last()
+            match (so.direction, so.null_ordering):
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
+                ):
+                    col = col.asc_nulls_first()
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
+                ):
+                    col = col.asc_nulls_last()
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
+                ):
+                    col = col.desc_nulls_first()
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
+                ):
+                    col = col.desc_nulls_last()
-        cols.append(col)
+            cols.append(col)
-        ascending.append(
-            so.direction
-            == expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
-        )
-        if (
-            so.direction
-            != expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
-        ):
-            order_specified = True
+            ascending.append(
+                so.direction
+                == expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
+            )
+            if (
+                so.direction
+                != expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
+            ):
+                order_specified = True
     # TODO: sort.isglobal.
     if not order_specified:
@@ -446,9 +458,11 @@ def map_to_df(
     new_column_names = list(rel.to_df.column_names)
     if len(new_column_names) != len(input_container.column_map.columns):
         # TODO: Check error type here
-        raise ValueError(
+        exception = ValueError(
             "Number of column names must match number of columns in DataFrame"
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+        raise exception
     snowpark_new_column_names = make_column_names_snowpark_compatible(
         new_column_names, rel.common.plan_id
     )
@@ -507,9 +521,11 @@ def map_to_schema(
     for field in rel.to_schema.schema.struct.fields:
         if field.name in already_existing_columns:
             if count_case_insensitive_column_names[field.name.lower()] > 1:
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"[AMBIGUOUS_COLUMN_OR_FIELD] Column or field `{field.name}` is ambiguous and has {len(input_container.column_map.spark_to_col[field.name])} matches."
                 )
+                attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
+                raise exception
             snowpark_name = None
             for name in input_container.column_map.spark_to_col:
                 if name.lower() == field.name.lower():
@@ -526,17 +542,23 @@ def map_to_schema(
                         and snowpark_field.nullable
                         and not isinstance(snowpark_field.datatype, StructType)
                     ):
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"[NULLABLE_COLUMN_OR_FIELD] Column or field `{field.name}` is nullable while it's required to be non-nullable."
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.INVALID_OPERATION
+                        )
+                        raise exception
                     # Check type casting validation
                     if not _can_cast_column_in_schema(
                         snowpark_field.datatype, proto_to_snowpark_type(field.data_type)
                     ):
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"""[INVALID_COLUMN_OR_FIELD_DATA_TYPE] Column or field `{field.name}` is of type "{map_snowpark_to_pyspark_types(proto_to_snowpark_type(field.data_type))}" while it's required to be "{map_snowpark_to_pyspark_types(snowpark_field.datatype)}"."""
                         )
+                        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+                        raise exception
     if len(already_existing_columns) == len(new_column_names):
         # All columns already exist, we're doing a simple update.
         snowpark_new_column_names = []
@@ -761,9 +783,11 @@ def map_with_columns(
         name = names_list[0]
         name_normalized = input_container.column_map._normalized_spark_name(name)
         if name_normalized in seen_columns:
-            raise ValueError(
+            exception = ValueError(
                 f"[COLUMN_ALREADY_EXISTS] The column `{name}` already exists."
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         seen_columns.add(name_normalized)
         # If the column name is already in the DataFrame, we replace it, so we use the
         # mapping to get the correct column name.
@@ -772,7 +796,9 @@ def map_with_columns(
                 [name]
             )
             if len(all_instances_of_spark_column_name) == 0:
-                raise KeyError(f"Spark column name {name} does not exist")
+                exception = KeyError(f"Spark column name {name} does not exist")
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
             with_columns_names.extend(all_instances_of_spark_column_name)
             with_columns_exprs.extend(
                 [expr.col] * len(all_instances_of_spark_column_name)
@@ -852,7 +878,9 @@ def map_unpivot(
     # Spark API:    df.unpivot([id_columns], [unpivot_columns], var_column, val_column)
     # Snowpark API: df.unpivot(val_column, var_column, [unpivot_columns])
     if rel.unpivot.HasField("values") and len(rel.unpivot.values.values) == 0:
-        raise SparkException.unpivot_requires_value_columns()
+        exception = SparkException.unpivot_requires_value_columns()
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     input_container = map_relation(rel.unpivot.input)
     input_df = input_container.dataframe
@@ -893,7 +921,7 @@ def map_unpivot(
         )
         if not get_lease_common_ancestor_classes(type_list):
             # TODO: match exactly how spark shows mismatched columns
-            raise SparkException.unpivot_value_data_type_mismatch(
+            exception = SparkException.unpivot_value_data_type_mismatch(
                 ", ".join(
                     [
                         f"{dtype} {column_name}"
@@ -901,6 +929,8 @@ def map_unpivot(
                     ]
                 )
             )
+            attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+            raise exception
         return not is_same_type and contains_numeric_type
     def get_column_names(
@@ -1097,7 +1127,9 @@ def map_group_map(
         snowpark_grouping_expressions.append(snowpark_column.col)
         group_name_list.append(new_name)
     if rel.group_map.func.python_udf is None:
-        raise ValueError("group_map relation without python udf is not supported")
+        exception = ValueError("group_map relation without python udf is not supported")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     python_major, python_minor = rel.group_map.func.python_udf.python_ver.split(".")
     is_compatible_python = sys.version_info.major == int(

snowflake/snowpark_connect/relation/map_extension.py CHANGED Viewed

@@ -17,6 +17,8 @@ from snowflake.snowpark_connect.column_name_handler import (
 )
 from snowflake.snowpark_connect.config import get_boolean_session_config_param
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.map_expression import map_expression
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.relation.map_relation import map_relation
@@ -84,11 +86,13 @@ def map_extension(
             input_df = result.dataframe
             snowpark_col_names = result.column_map.get_snowpark_columns()
             if len(subquery_aliases.aliases) != len(snowpark_col_names):
-                raise AnalysisException(
+                exception = AnalysisException(
                     "Number of column aliases does not match number of columns. "
                     f"Number of column aliases: {len(subquery_aliases.aliases)}; "
                     f"number of columns: {len(snowpark_col_names)}."
                 )
+                attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                raise exception
             return DataFrameContainer.create_with_column_mapping(
                 dataframe=input_df,
                 spark_column_names=subquery_aliases.aliases,
@@ -108,18 +112,22 @@ def map_extension(
             left_queries = left_df.queries["queries"]
             if len(left_queries) != 1:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Unexpected number of queries: {len(left_queries)}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             left_query = left_queries[0]
             with push_outer_dataframe(left_result):
                 right_result = map_relation(lateral_join.right)
                 right_df = right_result.dataframe
             right_queries = right_df.queries["queries"]
             if len(right_queries) != 1:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Unexpected number of queries: {len(right_queries)}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             right_query = right_queries[0]
             input_df_sql = f"WITH __left AS ({left_query}) SELECT * FROM __left INNER JOIN LATERAL ({right_query})"
             session = snowpark.Session.get_active_session()
@@ -139,7 +147,11 @@ def map_extension(
         case "aggregate":
             return map_aggregate(extension.aggregate, rel.common.plan_id)
         case other:
-            raise SnowparkConnectNotImplementedError(f"Unexpected extension {other}")
+            exception = SnowparkConnectNotImplementedError(
+                f"Unexpected extension {other}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
 def get_udtf_project(relation: relation_proto.Relation) -> bool:
@@ -174,7 +186,9 @@ def handle_udtf_with_table_arguments(
     session = snowpark.Session.get_active_session()
     udtf_name_lower = udtf_info.function_name.lower()
     if udtf_name_lower not in session._udtfs:
-        raise ValueError(f"UDTF '{udtf_info.function_name}' not found.")
+        exception = ValueError(f"UDTF '{udtf_info.function_name}' not found.")
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception
     _udtf_obj, udtf_spark_output_names = session._udtfs[udtf_name_lower]
     table_containers = []
@@ -188,10 +202,12 @@ def handle_udtf_with_table_arguments(
         if not get_boolean_session_config_param(
             "spark.sql.tvf.allowMultipleTableArguments.enabled"
         ):
-            raise AnalysisException(
+            exception = AnalysisException(
                 "[TABLE_VALUED_FUNCTION_TOO_MANY_TABLE_ARGUMENTS] Multiple table arguments are not enabled. "
                 "Please set `spark.sql.tvf.allowMultipleTableArguments.enabled` to `true`"
             )
+            attach_custom_error_code(exception, ErrorCodes.CONFIG_NOT_ENABLED)
+            raise exception
         base_df = table_containers[0][0].dataframe
         first_table_col_count = len(base_df.columns)
@@ -339,9 +355,11 @@ def map_aggregate(
             exp, input_container.column_map, typer
         )
         if len(new_names) != 1:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Multi-column aggregate expressions are not supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         return new_names[0], snowpark_column
     raw_groupings: list[tuple[str, TypedColumn]] = []
@@ -474,9 +492,11 @@ def map_aggregate(
                 snowpark.GroupingSets(*sets_mapped)
             )
         case other:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Unsupported GROUP BY type: {other}"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     result = result.agg(*aggregations, exclude_grouping_columns=True)

snowflake/snowpark_connect/relation/map_join.py CHANGED Viewed

@@ -5,6 +5,7 @@
 from functools import reduce
 import pyspark.sql.connect.proto.relations_pb2 as relation_proto
+from pyspark.errors import AnalysisException
 import snowflake.snowpark.functions as snowpark_fn
 from snowflake import snowpark
@@ -12,7 +13,11 @@ from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
-from snowflake.snowpark_connect.error.error_utils import SparkException
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import (
+    SparkException,
+    attach_custom_error_code,
+)
 from snowflake.snowpark_connect.expression.map_expression import (
     map_single_column_expression,
 )
@@ -62,7 +67,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
     match rel.join.join_type:
         case relation_proto.Join.JOIN_TYPE_UNSPECIFIED:
             # TODO: Understand what UNSPECIFIED Join type is
-            raise SnowparkConnectNotImplementedError("Unspecified Join Type")
+            exception = SnowparkConnectNotImplementedError("Unspecified Join Type")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         case relation_proto.Join.JOIN_TYPE_INNER:
             join_type = "inner"
         case relation_proto.Join.JOIN_TYPE_FULL_OUTER:
@@ -78,7 +85,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
         case relation_proto.Join.JOIN_TYPE_CROSS:
             join_type = "cross"
         case other:
-            raise SnowparkConnectNotImplementedError(f"Other Join Type: {other}")
+            exception = SnowparkConnectNotImplementedError(f"Other Join Type: {other}")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     # This handles case sensitivity for using_columns
     case_corrected_right_columns: list[str] = []
@@ -124,9 +133,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
             is None
             for c in using_columns
         ):
-            import pyspark
-            raise pyspark.errors.AnalysisException(
+            exception = AnalysisException(
                 USING_COLUMN_NOT_FOUND_ERROR.format(
                     next(
                         c
@@ -140,6 +147,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
                     left_container.column_map.get_spark_columns(),
                 )
             )
+            attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+            raise exception
         if any(
             right_container.column_map.get_snowpark_column_name_from_spark_column_name(
                 c, allow_non_exists=True, return_first=True
@@ -147,9 +156,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
             is None
             for c in using_columns
         ):
-            import pyspark
-            raise pyspark.errors.AnalysisException(
+            exception = AnalysisException(
                 USING_COLUMN_NOT_FOUND_ERROR.format(
                     next(
                         c
@@ -163,6 +170,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
                     right_container.column_map.get_spark_columns(),
                 )
             )
+            attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+            raise exception
         # Round trip the using columns through the column map to get the correct names
         # in order to support case sensitivity.
@@ -227,7 +236,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
             result = joined_df.drop(*(right for _, right in snowpark_using_columns))
     else:
         if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
-            raise SparkException.implicit_cartesian_product("inner")
+            exception = SparkException.implicit_cartesian_product("inner")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         result: snowpark.DataFrame = left_input.join(
             right=right_input,
             how=join_type,

snowflake/snowpark_connect/relation/map_local_relation.py CHANGED Viewed

@@ -19,6 +19,8 @@ from snowflake.snowpark_connect.column_name_handler import (
     make_column_names_snowpark_compatible,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.type_mapping import (
     get_python_sql_utils_class,
     map_json_schema_to_snowpark,
@@ -327,9 +329,11 @@ def map_local_relation(
             column_metadata=column_metadata,
         )
     else:
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "LocalRelation without data & schema is not supported"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
 def map_range(

snowflake/snowpark_connect/relation/map_relation.py CHANGED Viewed

@@ -8,6 +8,8 @@ import pandas
 import pyspark.sql.connect.proto.relations_pb2 as relation_proto
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.utils.cache import (
     df_cache_map_get,
     df_cache_map_put_if_absent,
@@ -103,7 +105,9 @@ def map_relation(
     else:
         # This happens when the relation is empty, usually because the incoming message
         # type was incorrectly routed here.
-        raise SnowparkConnectNotImplementedError("No Relation Type")
+        exception = SnowparkConnectNotImplementedError("No Relation Type")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     result: DataFrameContainer | pandas.DataFrame
     operation = rel.WhichOneof("rel_type")
@@ -121,11 +125,19 @@ def map_relation(
                     case relation_proto.Aggregate.GroupType.GROUP_TYPE_PIVOT:
                         result = map_aggregate.map_pivot_aggregate(rel)
                     case other:
-                        raise SnowparkConnectNotImplementedError(f"AGGREGATE {other}")
+                        exception = SnowparkConnectNotImplementedError(
+                            f"AGGREGATE {other}"
+                        )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
             case "approx_quantile":
                 result = map_stats.map_approx_quantile(rel)
             case "as_of_join":
-                raise SnowparkConnectNotImplementedError("AS_OF_JOIN")
+                exception = SnowparkConnectNotImplementedError("AS_OF_JOIN")
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "catalog":  # TODO: order these alphabetically
                 result = map_catalog.map_catalog(rel.catalog)
             case "collect_metrics":
@@ -179,9 +191,11 @@ def map_relation(
                     (get_session_id(), rel.cached_local_relation.hash)
                 )
                 if cached_df is None:
-                    raise ValueError(
+                    exception = ValueError(
                         f"Local relation with hash {rel.cached_local_relation.hash} not found in cache."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                    raise exception
                 return cached_df
             case "map_partitions":
                 result = map_map_partitions.map_map_partitions(rel)
@@ -235,7 +249,13 @@ def map_relation(
                     case relation_proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT:
                         result = map_row_ops.map_except(rel)
                     case other:
-                        raise SnowparkConnectNotImplementedError(f"SET_OP {other}")
+                        exception = SnowparkConnectNotImplementedError(
+                            f"SET_OP {other}"
+                        )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
             case "show_string":
                 result = map_show_string.map_show_string(rel)
             case "sort":
@@ -261,11 +281,17 @@ def map_relation(
             case "with_columns_renamed":
                 result = map_column_ops.map_with_columns_renamed(rel)
             case "with_relations":
-                raise SnowparkConnectNotImplementedError("WITH_RELATIONS")
+                exception = SnowparkConnectNotImplementedError("WITH_RELATIONS")
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "group_map":
                 result = map_column_ops.map_group_map(rel)
             case other:
-                raise SnowparkConnectNotImplementedError(f"Other Relation {other}")
+                exception = SnowparkConnectNotImplementedError(
+                    f"Other Relation {other}"
+                )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
         # Store container in plan cache
         if isinstance(result, DataFrameContainer):

snowflake/snowpark_connect/relation/map_row_ops.py CHANGED Viewed

@@ -29,12 +29,17 @@ from snowflake.snowpark_connect.column_name_handler import (
 )
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
 from snowflake.snowpark_connect.expression.map_expression import (
     map_single_column_expression,
 )
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.relation.map_relation import map_relation
+from snowflake.snowpark_connect.utils.identifiers import (
+    split_fully_qualified_spark_name,
+)
 from snowflake.snowpark_connect.utils.telemetry import (
     SnowparkConnectNotImplementedError,
 )
@@ -55,9 +60,11 @@ def map_deduplicate(
         rel.deduplicate.HasField("within_watermark")
         and rel.deduplicate.within_watermark
     ):
-        raise AnalysisException(
+        exception = AnalysisException(
             "dropDuplicatesWithinWatermark is not supported with batch DataFrames/DataSets"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     if (
         rel.deduplicate.HasField("all_columns_as_keys")
@@ -131,11 +138,19 @@ def map_fillna(
     input_df = input_container.dataframe
     if len(rel.fill_na.cols) > 0:
+        if rel.fill_na.cols == ["*"]:
+            # Expand "*" to all columns
+            spark_col_names = input_container.column_map.get_spark_columns()
+        else:
+            spark_col_names = list(rel.fill_na.cols)
+        # We don't validate the fully qualified spark name here as fillNa is no-op for structured type colums.
+        # It only works for scalar type columns like float, int, string or bool.
         columns: list[str] = [
             input_container.column_map.get_snowpark_column_name_from_spark_column_name(
-                c
+                split_fully_qualified_spark_name(c)[0]
             )
-            for c in rel.fill_na.cols
+            for c in spark_col_names
         ]
         values = [get_literal_field_and_name(v)[0] for v in rel.fill_na.values]
         if len(values) == 1:
@@ -212,7 +227,9 @@ def map_union(
     spark_sql_ansi_enabled = global_config.spark_sql_ansi_enabled
     if left_dtypes != right_dtypes and not rel.set_op.by_name:
         if len(left_dtypes) != len(right_dtypes):
-            raise AnalysisException("UNION: the number of columns must match")
+            exception = AnalysisException("UNION: the number of columns must match")
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         target_left_dtypes, target_right_dtypes = [], []
         for left_type, right_type in zip(left_dtypes, right_dtypes):
             match (left_type, right_type):
@@ -248,9 +265,11 @@ def map_union(
                         not spark_sql_ansi_enabled
                         or snowpark.types.StringType() not in [left_type, right_type]
                     ):  # In ansi mode , string type union boolean type is acceptable
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"""[INCOMPATIBLE_COLUMN_TYPE] UNION can only be performed on tables with compatible column types. "{str(left_type)}" type which is not compatible with "{str(right_type)}". """
                         )
+                        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+                        raise exception
                     target_left_dtypes.append(left_type)
                     target_right_dtypes.append(right_type)
                 case _:
@@ -776,7 +795,9 @@ def map_sample(
     frac = rel.sample.upper_bound - rel.sample.lower_bound
     if frac < 0 or frac > 1:
-        raise IllegalArgumentException("Sample fraction must be between 0 and 1")
+        exception = IllegalArgumentException("Sample fraction must be between 0 and 1")
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     # The seed argument is not supported here. There are a number of reasons that implementing
     # this will be complicated in Snowflake. Here is a list of complications:
     #
@@ -791,9 +812,11 @@ def map_sample(
     # these issues.
     if rel.sample.with_replacement:
         # TODO: Use a random number generator with ROW_NUMBER and SELECT.
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Sample with replacement is not supported"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     else:
         result: snowpark.DataFrame = input_df.sample(frac=frac)
         return DataFrameContainer(
@@ -901,9 +924,13 @@ def _union_by_name_optimized(
             set_schema_getter(result, lambda: StructType(result_fields))
             return result
         else:
-            raise SnowparkClientExceptionMessages.DF_CANNOT_RESOLVE_COLUMN_NAME_AMONG(
-                missing_left, missing_right
+            exception = (
+                SnowparkClientExceptionMessages.DF_CANNOT_RESOLVE_COLUMN_NAME_AMONG(
+                    missing_left, missing_right
+                )
             )
+            attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+            raise exception
     result = left_df.unionAllByName(
         right_df, allow_missing_columns=allow_missing_columns

snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl