PyPI - snowpark-connect - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show

snowflake/snowpark_connect/relation/map_catalog.py CHANGED Viewed

@@ -8,6 +8,8 @@ import pandas
 import pyspark.sql.connect.proto.catalog_pb2 as catalog_proto
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.catalogs import CATALOGS
 from snowflake.snowpark_connect.relation.catalogs.utils import (
     CURRENT_CATALOG_NAME,
@@ -148,4 +150,6 @@ def map_catalog(
             return get_current_catalog().uncacheTable(rel.uncache_table.table_name)
         case other:
             # TODO: list_function implementation is blocked on SNOW-1787268
-            raise SnowparkConnectNotImplementedError(f"Other Relation {other}")
+            exception = SnowparkConnectNotImplementedError(f"Other Relation {other}")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception

snowflake/snowpark_connect/relation/map_column_ops.py CHANGED Viewed

@@ -29,11 +29,16 @@ from snowflake.snowpark.column import Column
 from snowflake.snowpark.table_function import _ExplodeFunctionCall
 from snowflake.snowpark.types import DataType, StructField, StructType, _NumericType
 from snowflake.snowpark_connect.column_name_handler import (
+    ColumnQualifier,
     make_column_names_snowpark_compatible,
 )
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
-from snowflake.snowpark_connect.error.error_utils import SparkException
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import (
+    SparkException,
+    attach_custom_error_code,
+)
 from snowflake.snowpark_connect.expression.map_expression import (
     map_alias,
     map_expression,
@@ -369,56 +374,64 @@ def map_sort(
                 for col in input_container.column_map.get_spark_columns()
             ]
-    for so in sort_order:
-        if so.child.HasField("literal"):
-            column_index = unwrap_literal(so.child)
-            try:
-                if column_index <= 0:
-                    raise IndexError
-                col = input_df[column_index - 1]
-            except IndexError:
-                raise AnalysisException(
-                    f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
+    # Process ORDER BY expressions with a context flag to enable column reuse optimization
+    from snowflake.snowpark_connect.utils.context import push_processing_order_by_scope
+    with push_processing_order_by_scope():
+        for so in sort_order:
+            if so.child.HasField("literal"):
+                column_index = unwrap_literal(so.child)
+                try:
+                    if column_index <= 0:
+                        exception = IndexError()
+                        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                        raise exception
+                    col = input_df[column_index - 1]
+                except IndexError:
+                    exception = AnalysisException(
+                        f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
+                    )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
+            else:
+                _, typed_column = map_single_column_expression(
+                    so.child, input_container.column_map, typer
                 )
-        else:
-            _, typed_column = map_single_column_expression(
-                so.child, input_container.column_map, typer
-            )
-            col = typed_column.col
+                col = typed_column.col
-        match (so.direction, so.null_ordering):
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
-            ):
-                col = col.asc_nulls_first()
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
-            ):
-                col = col.asc_nulls_last()
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
-            ):
-                col = col.desc_nulls_first()
-            case (
-                expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
-                expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
-            ):
-                col = col.desc_nulls_last()
+            match (so.direction, so.null_ordering):
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
+                ):
+                    col = col.asc_nulls_first()
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
+                ):
+                    col = col.asc_nulls_last()
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
+                ):
+                    col = col.desc_nulls_first()
+                case (
+                    expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
+                    expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
+                ):
+                    col = col.desc_nulls_last()
-        cols.append(col)
+            cols.append(col)
-        ascending.append(
-            so.direction
-            == expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
-        )
-        if (
-            so.direction
-            != expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
-        ):
-            order_specified = True
+            ascending.append(
+                so.direction
+                == expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
+            )
+            if (
+                so.direction
+                != expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
+            ):
+                order_specified = True
     # TODO: sort.isglobal.
     if not order_specified:
@@ -446,9 +459,11 @@ def map_to_df(
     new_column_names = list(rel.to_df.column_names)
     if len(new_column_names) != len(input_container.column_map.columns):
         # TODO: Check error type here
-        raise ValueError(
+        exception = ValueError(
             "Number of column names must match number of columns in DataFrame"
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+        raise exception
     snowpark_new_column_names = make_column_names_snowpark_compatible(
         new_column_names, rel.common.plan_id
     )
@@ -507,9 +522,11 @@ def map_to_schema(
     for field in rel.to_schema.schema.struct.fields:
         if field.name in already_existing_columns:
             if count_case_insensitive_column_names[field.name.lower()] > 1:
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"[AMBIGUOUS_COLUMN_OR_FIELD] Column or field `{field.name}` is ambiguous and has {len(input_container.column_map.spark_to_col[field.name])} matches."
                 )
+                attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
+                raise exception
             snowpark_name = None
             for name in input_container.column_map.spark_to_col:
                 if name.lower() == field.name.lower():
@@ -526,17 +543,23 @@ def map_to_schema(
                         and snowpark_field.nullable
                         and not isinstance(snowpark_field.datatype, StructType)
                     ):
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"[NULLABLE_COLUMN_OR_FIELD] Column or field `{field.name}` is nullable while it's required to be non-nullable."
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.INVALID_OPERATION
+                        )
+                        raise exception
                     # Check type casting validation
                     if not _can_cast_column_in_schema(
                         snowpark_field.datatype, proto_to_snowpark_type(field.data_type)
                     ):
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"""[INVALID_COLUMN_OR_FIELD_DATA_TYPE] Column or field `{field.name}` is of type "{map_snowpark_to_pyspark_types(proto_to_snowpark_type(field.data_type))}" while it's required to be "{map_snowpark_to_pyspark_types(snowpark_field.datatype)}"."""
                         )
+                        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+                        raise exception
     if len(already_existing_columns) == len(new_column_names):
         # All columns already exist, we're doing a simple update.
         snowpark_new_column_names = []
@@ -761,9 +784,11 @@ def map_with_columns(
         name = names_list[0]
         name_normalized = input_container.column_map._normalized_spark_name(name)
         if name_normalized in seen_columns:
-            raise ValueError(
+            exception = ValueError(
                 f"[COLUMN_ALREADY_EXISTS] The column `{name}` already exists."
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         seen_columns.add(name_normalized)
         # If the column name is already in the DataFrame, we replace it, so we use the
         # mapping to get the correct column name.
@@ -772,7 +797,9 @@ def map_with_columns(
                 [name]
             )
             if len(all_instances_of_spark_column_name) == 0:
-                raise KeyError(f"Spark column name {name} does not exist")
+                exception = KeyError(f"Spark column name {name} does not exist")
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
             with_columns_names.extend(all_instances_of_spark_column_name)
             with_columns_exprs.extend(
                 [expr.col] * len(all_instances_of_spark_column_name)
@@ -852,7 +879,9 @@ def map_unpivot(
     # Spark API:    df.unpivot([id_columns], [unpivot_columns], var_column, val_column)
     # Snowpark API: df.unpivot(val_column, var_column, [unpivot_columns])
     if rel.unpivot.HasField("values") and len(rel.unpivot.values.values) == 0:
-        raise SparkException.unpivot_requires_value_columns()
+        exception = SparkException.unpivot_requires_value_columns()
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     input_container = map_relation(rel.unpivot.input)
     input_df = input_container.dataframe
@@ -893,7 +922,7 @@ def map_unpivot(
         )
         if not get_lease_common_ancestor_classes(type_list):
             # TODO: match exactly how spark shows mismatched columns
-            raise SparkException.unpivot_value_data_type_mismatch(
+            exception = SparkException.unpivot_value_data_type_mismatch(
                 ", ".join(
                     [
                         f"{dtype} {column_name}"
@@ -901,6 +930,8 @@ def map_unpivot(
                     ]
                 )
             )
+            attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+            raise exception
         return not is_same_type and contains_numeric_type
     def get_column_names(
@@ -984,7 +1015,7 @@ def map_unpivot(
     column_project = []
     column_reverse_project = []
     snowpark_columns = []
-    qualifiers = []
+    qualifiers: list[set[ColumnQualifier]] = []
     for c in input_container.column_map.get_snowpark_columns():
         c_name = snowpark_functions_col(c, input_container.column_map).get_name()
         if c_name in unpivot_col_names:
@@ -1012,7 +1043,7 @@ def map_unpivot(
             )
             snowpark_columns.append(c)
             qualifiers.append(
-                input_container.column_map.get_qualifier_for_spark_column(c)
+                input_container.column_map.get_qualifiers_for_spark_column(c)
             )
     # Without the case when postprocessing, the result Spark dataframe is:
@@ -1057,7 +1088,7 @@ def map_unpivot(
         snowpark_functions_col(snowpark_value_column_name, input_container.column_map)
     )
     snowpark_columns.append(snowpark_value_column_name)
-    qualifiers.extend([[]] * 2)
+    qualifiers.extend([set() for _ in range(2)])
     result = (
         input_df.select(*column_project)
@@ -1097,7 +1128,9 @@ def map_group_map(
         snowpark_grouping_expressions.append(snowpark_column.col)
         group_name_list.append(new_name)
     if rel.group_map.func.python_udf is None:
-        raise ValueError("group_map relation without python udf is not supported")
+        exception = ValueError("group_map relation without python udf is not supported")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     python_major, python_minor = rel.group_map.func.python_udf.python_ver.split(".")
     is_compatible_python = sys.version_info.major == int(

snowflake/snowpark_connect/relation/map_extension.py CHANGED Viewed

@@ -15,8 +15,11 @@ from snowflake.snowpark_connect.column_name_handler import (
     ColumnNameMap,
     make_column_names_snowpark_compatible,
 )
+from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
 from snowflake.snowpark_connect.config import get_boolean_session_config_param
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.map_expression import map_expression
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.relation.map_relation import map_relation
@@ -84,11 +87,13 @@ def map_extension(
             input_df = result.dataframe
             snowpark_col_names = result.column_map.get_snowpark_columns()
             if len(subquery_aliases.aliases) != len(snowpark_col_names):
-                raise AnalysisException(
+                exception = AnalysisException(
                     "Number of column aliases does not match number of columns. "
                     f"Number of column aliases: {len(subquery_aliases.aliases)}; "
                     f"number of columns: {len(snowpark_col_names)}."
                 )
+                attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                raise exception
             return DataFrameContainer.create_with_column_mapping(
                 dataframe=input_df,
                 spark_column_names=subquery_aliases.aliases,
@@ -108,18 +113,22 @@ def map_extension(
             left_queries = left_df.queries["queries"]
             if len(left_queries) != 1:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Unexpected number of queries: {len(left_queries)}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             left_query = left_queries[0]
             with push_outer_dataframe(left_result):
                 right_result = map_relation(lateral_join.right)
                 right_df = right_result.dataframe
             right_queries = right_df.queries["queries"]
             if len(right_queries) != 1:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Unexpected number of queries: {len(right_queries)}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             right_query = right_queries[0]
             input_df_sql = f"WITH __left AS ({left_query}) SELECT * FROM __left INNER JOIN LATERAL ({right_query})"
             session = snowpark.Session.get_active_session()
@@ -139,7 +148,11 @@ def map_extension(
         case "aggregate":
             return map_aggregate(extension.aggregate, rel.common.plan_id)
         case other:
-            raise SnowparkConnectNotImplementedError(f"Unexpected extension {other}")
+            exception = SnowparkConnectNotImplementedError(
+                f"Unexpected extension {other}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
 def get_udtf_project(relation: relation_proto.Relation) -> bool:
@@ -166,7 +179,7 @@ def get_udtf_project(relation: relation_proto.Relation) -> bool:
 def handle_udtf_with_table_arguments(
     udtf_info: snowflake_proto.UDTFWithTableArguments,
-) -> snowpark.DataFrame:
+) -> DataFrameContainer:
     """
     Handle UDTF with one or more table arguments using Snowpark's join_table_function.
     For multiple table arguments, this creates a Cartesian product of all input tables.
@@ -174,7 +187,9 @@ def handle_udtf_with_table_arguments(
     session = snowpark.Session.get_active_session()
     udtf_name_lower = udtf_info.function_name.lower()
     if udtf_name_lower not in session._udtfs:
-        raise ValueError(f"UDTF '{udtf_info.function_name}' not found.")
+        exception = ValueError(f"UDTF '{udtf_info.function_name}' not found.")
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception
     _udtf_obj, udtf_spark_output_names = session._udtfs[udtf_name_lower]
     table_containers = []
@@ -188,10 +203,12 @@ def handle_udtf_with_table_arguments(
         if not get_boolean_session_config_param(
             "spark.sql.tvf.allowMultipleTableArguments.enabled"
         ):
-            raise AnalysisException(
+            exception = AnalysisException(
                 "[TABLE_VALUED_FUNCTION_TOO_MANY_TABLE_ARGUMENTS] Multiple table arguments are not enabled. "
                 "Please set `spark.sql.tvf.allowMultipleTableArguments.enabled` to `true`"
             )
+            attach_custom_error_code(exception, ErrorCodes.CONFIG_NOT_ENABLED)
+            raise exception
         base_df = table_containers[0][0].dataframe
         first_table_col_count = len(base_df.columns)
@@ -270,7 +287,7 @@ def handle_lateral_join_with_udtf(
     left_result: DataFrameContainer,
     udtf_relation: relation_proto.Relation,
     udtf_info: tuple[snowpark.udtf.UserDefinedTableFunction, list],
-) -> snowpark.DataFrame:
+) -> DataFrameContainer:
     """
     Handle lateral join with UDTF on the right side using join_table_function.
     """
@@ -303,7 +320,7 @@ def handle_lateral_join_with_udtf(
 def map_aggregate(
     aggregate: snowflake_proto.Aggregate, plan_id: int
-) -> snowpark.DataFrame:
+) -> DataFrameContainer:
     input_container = map_relation(aggregate.input)
     input_df: snowpark.DataFrame = input_container.dataframe
@@ -339,13 +356,15 @@ def map_aggregate(
             exp, input_container.column_map, typer
         )
         if len(new_names) != 1:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Multi-column aggregate expressions are not supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         return new_names[0], snowpark_column
     raw_groupings: list[tuple[str, TypedColumn]] = []
-    raw_aggregations: list[tuple[str, TypedColumn, list[str]]] = []
+    raw_aggregations: list[tuple[str, TypedColumn, set[ColumnQualifier]]] = []
     if not is_group_by_all:
         raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
@@ -383,11 +402,11 @@ def map_aggregate(
         col = _map_column(exp)
         if exp.WhichOneof("expr_type") == "unresolved_attribute":
             spark_name = col[0]
-            qualifiers = input_container.column_map.get_qualifier_for_spark_column(
-                spark_name
-            )
+            qualifiers: set[
+                ColumnQualifier
+            ] = input_container.column_map.get_qualifiers_for_spark_column(spark_name)
         else:
-            qualifiers = []
+            qualifiers = set()
         raw_aggregations.append((col[0], col[1], qualifiers))
@@ -420,7 +439,7 @@ def map_aggregate(
     spark_columns: list[str] = []
     snowpark_columns: list[str] = []
     snowpark_column_types: list[snowpark_types.DataType] = []
-    all_qualifiers: list[list[str]] = []
+    all_qualifiers: list[set[ColumnQualifier]] = []
     # Use grouping columns directly without aliases
     groupings = [col.col for _, col in raw_groupings]
@@ -474,9 +493,11 @@ def map_aggregate(
                 snowpark.GroupingSets(*sets_mapped)
             )
         case other:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Unsupported GROUP BY type: {other}"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     result = result.agg(*aggregations, exclude_grouping_columns=True)

snowflake/snowpark_connect/relation/map_join.py CHANGED Viewed

@@ -5,14 +5,20 @@
 from functools import reduce
 import pyspark.sql.connect.proto.relations_pb2 as relation_proto
+from pyspark.errors import AnalysisException
 import snowflake.snowpark.functions as snowpark_fn
 from snowflake import snowpark
 from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
+from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
-from snowflake.snowpark_connect.error.error_utils import SparkException
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import (
+    SparkException,
+    attach_custom_error_code,
+)
 from snowflake.snowpark_connect.expression.map_expression import (
     map_single_column_expression,
 )
@@ -62,7 +68,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
     match rel.join.join_type:
         case relation_proto.Join.JOIN_TYPE_UNSPECIFIED:
             # TODO: Understand what UNSPECIFIED Join type is
-            raise SnowparkConnectNotImplementedError("Unspecified Join Type")
+            exception = SnowparkConnectNotImplementedError("Unspecified Join Type")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         case relation_proto.Join.JOIN_TYPE_INNER:
             join_type = "inner"
         case relation_proto.Join.JOIN_TYPE_FULL_OUTER:
@@ -78,7 +86,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
         case relation_proto.Join.JOIN_TYPE_CROSS:
             join_type = "cross"
         case other:
-            raise SnowparkConnectNotImplementedError(f"Other Join Type: {other}")
+            exception = SnowparkConnectNotImplementedError(f"Other Join Type: {other}")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     # This handles case sensitivity for using_columns
     case_corrected_right_columns: list[str] = []
@@ -124,9 +134,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
             is None
             for c in using_columns
         ):
-            import pyspark
-            raise pyspark.errors.AnalysisException(
+            exception = AnalysisException(
                 USING_COLUMN_NOT_FOUND_ERROR.format(
                     next(
                         c
@@ -140,6 +148,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
                     left_container.column_map.get_spark_columns(),
                 )
             )
+            attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+            raise exception
         if any(
             right_container.column_map.get_snowpark_column_name_from_spark_column_name(
                 c, allow_non_exists=True, return_first=True
@@ -147,9 +157,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
             is None
             for c in using_columns
         ):
-            import pyspark
-            raise pyspark.errors.AnalysisException(
+            exception = AnalysisException(
                 USING_COLUMN_NOT_FOUND_ERROR.format(
                     next(
                         c
@@ -163,6 +171,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
                     right_container.column_map.get_spark_columns(),
                 )
             )
+            attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+            raise exception
         # Round trip the using columns through the column map to get the correct names
         # in order to support case sensitivity.
@@ -227,7 +237,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
             result = joined_df.drop(*(right for _, right in snowpark_using_columns))
     else:
         if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
-            raise SparkException.implicit_cartesian_product("inner")
+            exception = SparkException.implicit_cartesian_product("inner")
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         result: snowpark.DataFrame = left_input.join(
             right=right_input,
             how=join_type,
@@ -256,8 +268,10 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
             ]  # this is to make sure we only remove the column once
         ]
-        qualifiers = list(left_container.column_map.get_qualifiers()) + [
-            right_container.column_map.get_qualifier_for_spark_column(spark_col)
+        qualifiers: list[set[ColumnQualifier]] = list(
+            left_container.column_map.get_qualifiers()
+        ) + [
+            {right_container.column_map.get_qualifier_for_spark_column(spark_col)}
             for i, spark_col in enumerate(
                 right_container.column_map.get_spark_columns()
             )

snowflake/snowpark_connect/relation/map_local_relation.py CHANGED Viewed

@@ -19,6 +19,8 @@ from snowflake.snowpark_connect.column_name_handler import (
     make_column_names_snowpark_compatible,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.type_mapping import (
     get_python_sql_utils_class,
     map_json_schema_to_snowpark,
@@ -327,9 +329,11 @@ def map_local_relation(
             column_metadata=column_metadata,
         )
     else:
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "LocalRelation without data & schema is not supported"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
 def map_range(

snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl