PyPI - snowpark-connect - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show

snowflake/snowpark_connect/relation/map_relation.py CHANGED Viewed

@@ -8,6 +8,8 @@ import pandas
 import pyspark.sql.connect.proto.relations_pb2 as relation_proto
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.utils.cache import (
     df_cache_map_get,
     df_cache_map_put_if_absent,
@@ -103,7 +105,9 @@ def map_relation(
     else:
         # This happens when the relation is empty, usually because the incoming message
         # type was incorrectly routed here.
-        raise SnowparkConnectNotImplementedError("No Relation Type")
+        exception = SnowparkConnectNotImplementedError("No Relation Type")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     result: DataFrameContainer | pandas.DataFrame
     operation = rel.WhichOneof("rel_type")
@@ -121,11 +125,19 @@ def map_relation(
                     case relation_proto.Aggregate.GroupType.GROUP_TYPE_PIVOT:
                         result = map_aggregate.map_pivot_aggregate(rel)
                     case other:
-                        raise SnowparkConnectNotImplementedError(f"AGGREGATE {other}")
+                        exception = SnowparkConnectNotImplementedError(
+                            f"AGGREGATE {other}"
+                        )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
             case "approx_quantile":
                 result = map_stats.map_approx_quantile(rel)
             case "as_of_join":
-                raise SnowparkConnectNotImplementedError("AS_OF_JOIN")
+                exception = SnowparkConnectNotImplementedError("AS_OF_JOIN")
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "catalog":  # TODO: order these alphabetically
                 result = map_catalog.map_catalog(rel.catalog)
             case "collect_metrics":
@@ -179,9 +191,11 @@ def map_relation(
                     (get_session_id(), rel.cached_local_relation.hash)
                 )
                 if cached_df is None:
-                    raise ValueError(
+                    exception = ValueError(
                         f"Local relation with hash {rel.cached_local_relation.hash} not found in cache."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                    raise exception
                 return cached_df
             case "map_partitions":
                 result = map_map_partitions.map_map_partitions(rel)
@@ -235,7 +249,13 @@ def map_relation(
                     case relation_proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT:
                         result = map_row_ops.map_except(rel)
                     case other:
-                        raise SnowparkConnectNotImplementedError(f"SET_OP {other}")
+                        exception = SnowparkConnectNotImplementedError(
+                            f"SET_OP {other}"
+                        )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
             case "show_string":
                 result = map_show_string.map_show_string(rel)
             case "sort":
@@ -261,11 +281,17 @@ def map_relation(
             case "with_columns_renamed":
                 result = map_column_ops.map_with_columns_renamed(rel)
             case "with_relations":
-                raise SnowparkConnectNotImplementedError("WITH_RELATIONS")
+                exception = SnowparkConnectNotImplementedError("WITH_RELATIONS")
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "group_map":
                 result = map_column_ops.map_group_map(rel)
             case other:
-                raise SnowparkConnectNotImplementedError(f"Other Relation {other}")
+                exception = SnowparkConnectNotImplementedError(
+                    f"Other Relation {other}"
+                )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
         # Store container in plan cache
         if isinstance(result, DataFrameContainer):

snowflake/snowpark_connect/relation/map_row_ops.py CHANGED Viewed

@@ -29,6 +29,8 @@ from snowflake.snowpark_connect.column_name_handler import (
 )
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
 from snowflake.snowpark_connect.expression.map_expression import (
     map_single_column_expression,
@@ -58,9 +60,11 @@ def map_deduplicate(
         rel.deduplicate.HasField("within_watermark")
         and rel.deduplicate.within_watermark
     ):
-        raise AnalysisException(
+        exception = AnalysisException(
             "dropDuplicatesWithinWatermark is not supported with batch DataFrames/DataSets"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     if (
         rel.deduplicate.HasField("all_columns_as_keys")
@@ -223,7 +227,9 @@ def map_union(
     spark_sql_ansi_enabled = global_config.spark_sql_ansi_enabled
     if left_dtypes != right_dtypes and not rel.set_op.by_name:
         if len(left_dtypes) != len(right_dtypes):
-            raise AnalysisException("UNION: the number of columns must match")
+            exception = AnalysisException("UNION: the number of columns must match")
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         target_left_dtypes, target_right_dtypes = [], []
         for left_type, right_type in zip(left_dtypes, right_dtypes):
             match (left_type, right_type):
@@ -259,9 +265,11 @@ def map_union(
                         not spark_sql_ansi_enabled
                         or snowpark.types.StringType() not in [left_type, right_type]
                     ):  # In ansi mode , string type union boolean type is acceptable
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"""[INCOMPATIBLE_COLUMN_TYPE] UNION can only be performed on tables with compatible column types. "{str(left_type)}" type which is not compatible with "{str(right_type)}". """
                         )
+                        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+                        raise exception
                     target_left_dtypes.append(left_type)
                     target_right_dtypes.append(right_type)
                 case _:
@@ -787,7 +795,9 @@ def map_sample(
     frac = rel.sample.upper_bound - rel.sample.lower_bound
     if frac < 0 or frac > 1:
-        raise IllegalArgumentException("Sample fraction must be between 0 and 1")
+        exception = IllegalArgumentException("Sample fraction must be between 0 and 1")
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     # The seed argument is not supported here. There are a number of reasons that implementing
     # this will be complicated in Snowflake. Here is a list of complications:
     #
@@ -802,9 +812,11 @@ def map_sample(
     # these issues.
     if rel.sample.with_replacement:
         # TODO: Use a random number generator with ROW_NUMBER and SELECT.
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Sample with replacement is not supported"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     else:
         result: snowpark.DataFrame = input_df.sample(frac=frac)
         return DataFrameContainer(
@@ -912,9 +924,13 @@ def _union_by_name_optimized(
             set_schema_getter(result, lambda: StructType(result_fields))
             return result
         else:
-            raise SnowparkClientExceptionMessages.DF_CANNOT_RESOLVE_COLUMN_NAME_AMONG(
-                missing_left, missing_right
+            exception = (
+                SnowparkClientExceptionMessages.DF_CANNOT_RESOLVE_COLUMN_NAME_AMONG(
+                    missing_left, missing_right
+                )
             )
+            attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+            raise exception
     result = left_df.unionAllByName(
         right_df, allow_missing_columns=allow_missing_columns

snowflake/snowpark_connect/relation/map_sql.py CHANGED Viewed

@@ -46,6 +46,8 @@ from snowflake.snowpark_connect.config import (
     unset_config_param,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.map_expression import (
     ColumnNameMap,
     map_single_column_expression,
@@ -257,7 +259,8 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
 def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
-    # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
+    # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase"
+    # if present, or to "spark.sql.caseSensitive".
     # and struct fields will be left as is. This should allow users to use the same names
     # in spark and Snowflake in most cases.
     if is_column:
@@ -377,14 +380,18 @@ def _get_assignments_from_action(
         or action.getClass().getSimpleName() == "UpdateStarAction"
     ):
         if len(column_mapping_source.columns) != len(column_mapping_target.columns):
-            raise ValueError(
+            exception = ValueError(
                 "source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         for i, col in enumerate(column_mapping_target.columns):
             if assignments.get(col.snowpark_name) is not None:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "UpdateStarAction or InsertStarAction is not supported with duplicate columns."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             assignments[col.snowpark_name] = snowpark_fn.col(
                 column_mapping_source.columns[i].snowpark_name
             )
@@ -489,9 +496,11 @@ def map_sql_to_pandas_df(
                     snowflake_sql = f"ALTER TABLE {table_name} ALTER COLUMN {column_name} {alter_clause}"
                     session.sql(snowflake_sql).collect()
                 else:
-                    raise ValueError(
+                    exception = ValueError(
                         f"No alter operations found in AlterColumn logical plan for table {table_name}, column {column_name}"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
+                    raise exception
             case "CreateNamespace":
                 name = get_relation_identifier_name(logical_plan.name(), True)
                 previous_name = session.connection.schema
@@ -603,9 +612,11 @@ def map_sql_to_pandas_df(
                 )
                 temp_view = get_temp_view(snowflake_view_name)
                 if temp_view is not None and not logical_plan.replace():
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{spark_view_name}` because it already exists."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 else:
                     unregister_temp_view(
                         spark_to_sf_single_id_with_unquoting(spark_view_name)
@@ -625,11 +636,13 @@ def map_sql_to_pandas_df(
                 df_container = execute_logical_plan(logical_plan.query())
                 df = df_container.dataframe
                 if _accessing_temp_object.get():
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
                         "of the type VIEW because it references to a temporary object of the type VIEW. Please "
                         f"make the temporary object persistent, or make the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` temporary."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 name = get_relation_identifier_name(logical_plan.child())
                 comment = logical_plan.comment()
@@ -661,6 +674,7 @@ def map_sql_to_pandas_df(
                         snowpark_column_names=df_container.column_map.get_snowpark_columns(),
                         parent_column_name_map=df_container.column_map,
                     )
                     is_global = isinstance(
                         logical_plan.viewType(),
                         jpype.JClass(
@@ -757,9 +771,11 @@ def map_sql_to_pandas_df(
                     del session._udtfs[func_name]
                 else:
                     if not logical_plan.ifExists():
-                        raise ValueError(
+                        exception = ValueError(
                             f"Function {func_name} not found among registered UDFs or UDTFs."
                         )
+                        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                        raise exception
                 if snowpark_name != "":
                     argument_string = f"({', '.join(convert_sp_to_sf_type(arg) for arg in input_types)})"
                     session.sql(
@@ -832,17 +848,25 @@ def map_sql_to_pandas_df(
                     rows = session.sql(final_sql).collect()
                 else:
                     # TODO: Support other logical plans
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"{logical_plan_name} is not supported yet with EXPLAIN."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
             case "InsertIntoStatement":
                 df_container = execute_logical_plan(logical_plan.query())
                 df = df_container.dataframe
                 queries = df.queries["queries"]
                 if len(queries) != 1:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"Unexpected number of queries: {len(queries)}"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 name = get_relation_identifier_name(logical_plan.table(), True)
@@ -996,9 +1020,13 @@ def map_sql_to_pandas_df(
                         clauses.append(when_not_matched(condition).insert(assignments))
                 if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         "Snowflake does not support 'not matched by source' actions in MERGE statements."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 target_table.merge(source_df, merge_condition_typed_col.col, clauses)
             case "DeleteFromTable":
@@ -1036,10 +1064,12 @@ def map_sql_to_pandas_df(
             case "UpdateTable":
                 # Databricks/Delta-specific extension not supported by SAS.
                 # Provide an actionable, clear error.
-                raise UnsupportedOperationException(
+                exception = UnsupportedOperationException(
                     "[UNSUPPORTED_SQL_EXTENSION] The UPDATE TABLE command failed.\n"
                     + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "RenameColumn":
                 full_table_identifier = get_relation_identifier_name(
                     logical_plan.table(), True
@@ -1049,11 +1079,15 @@ def map_sql_to_pandas_df(
                 if not check_table_supports_operation(
                     full_table_identifier, "rename_column"
                 ):
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
                         f"This table was created as a v1 table with a data source that doesn't support column renaming. "
                         f"To enable this operation, set 'snowpark.connect.enable_snowflake_extension_behavior' to 'true'."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 column_obj = logical_plan.column()
                 old_column_name = ".".join(
@@ -1094,6 +1128,7 @@ def map_sql_to_pandas_df(
                             f"ALTER ICEBERG TABLE {name} RENAME TO {new_name}"
                         ).collect()
                     else:
+                        attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
                         raise e
             case "ReplaceTableAsSelect":
                 _create_table_as_select(logical_plan, mode="overwrite")
@@ -1113,9 +1148,11 @@ def map_sql_to_pandas_df(
                 name = _spark_to_snowflake(logical_plan.namespace())
                 session.sql(f"USE SCHEMA {name}").collect()
             case "SetNamespaceLocation" | "SetNamespaceProperties":
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Altering databases is not currently supported."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "ShowCreateTable":
                 # Handle SHOW CREATE TABLE command
                 # Spark: SHOW CREATE TABLE table_name
@@ -1137,16 +1174,24 @@ def map_sql_to_pandas_df(
             case "ShowNamespaces":
                 name = get_relation_identifier_name(logical_plan.namespace(), True)
                 if name:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         "'IN' clause is not supported while listing databases"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 if logical_plan.pattern().isDefined():
                     # Snowflake SQL requires a "%" pattern.
                     # Snowpark catalog requires a regex and does client-side filtering.
                     # Spark, however, uses a regex-like pattern that treats '*' and '|' differently.
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         "'LIKE' clause is not supported while listing databases"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 rows = session.sql("SHOW SCHEMAS").collect()
                 if not rows:
                     rows = None
@@ -1247,9 +1292,13 @@ def map_sql_to_pandas_df(
                             spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
                             != db_name.casefold()
                         ):
-                            raise AnalysisException(
+                            exception = AnalysisException(
                                 f"database name is not matching:{db_name} and {db_and_table_name[0]}"
                             )
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INVALID_OPERATION
+                            )
+                            raise exception
                             # Just table name
                     snowflake_cmd = f"SHOW COLUMNS IN TABLE {table_name}"
@@ -1294,6 +1343,33 @@ def map_sql_to_pandas_df(
                 )
                 SNOWFLAKE_CATALOG.refreshTable(table_name_unquoted)
+                return pandas.DataFrame({"": [""]}), ""
+            case "RepairTable":
+                # No-Op. Snowflake doesn't have explicit partitions to repair.
+                table_relation = logical_plan.child()
+                db_and_table_name = as_java_list(table_relation.multipartIdentifier())
+                multi_part_len = len(db_and_table_name)
+                if multi_part_len == 1:
+                    table_name = db_and_table_name[0]
+                    db_name = None
+                    full_table_name = table_name
+                else:
+                    db_name = db_and_table_name[0]
+                    table_name = db_and_table_name[1]
+                    full_table_name = db_name + "." + table_name
+                df = SNOWFLAKE_CATALOG.tableExists(table_name, db_name)
+                table_exist = df.iloc[0, 0]
+                if not table_exist:
+                    exception = AnalysisException(
+                        f"[TABLE_OR_VIEW_NOT_FOUND] Table not found `{full_table_name}`."
+                    )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 return pandas.DataFrame({"": [""]}), ""
             case _:
                 execute_logical_plan(logical_plan)
@@ -1434,7 +1510,12 @@ def map_sql(
     snowpark_connect_sql_passthrough, sql_stmt = is_valid_passthrough_sql(rel.sql.query)
     if not snowpark_connect_sql_passthrough:
-        logical_plan = sql_parser().parseQuery(sql_stmt)
+        # Changed from parseQuery to parsePlan as Spark parseQuery() call generating wrong logical plan for
+        # query like this: SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
+        # As such other place in this file we use parsePlan.
+        # Main difference between parsePlan() and parseQuery() is, parsePlan() can be called for any SQL statement, while
+        # parseQuery() can only be called for query statements.
+        logical_plan = sql_parser().parsePlan(sql_stmt)
         parsed_pos_args = parse_pos_args(logical_plan, rel.sql.pos_args)
         set_sql_args(rel.sql.args, parsed_pos_args)
@@ -1471,7 +1552,7 @@ def map_logical_plan_relation(
                 # Extract aliases from the aggregate expressions (SELECT clause)
                 alias_map = {}
-                for agg_expr in as_java_list(rel.aggregateExpressions()):
+                for agg_expr in list(as_java_list(rel.aggregateExpressions())):
                     if str(agg_expr.getClass().getSimpleName()) == "Alias":
                         alias_map[str(agg_expr.name())] = agg_expr.child()
@@ -1534,9 +1615,13 @@ def map_logical_plan_relation(
                             group_type = snowflake_proto.Aggregate.GROUP_TYPE_CUBE
                         case "GroupingSets":
                             if not exp.userGivenGroupByExprs().isEmpty():
-                                raise SnowparkConnectNotImplementedError(
+                                exception = SnowparkConnectNotImplementedError(
                                     "User-defined group by expressions are not supported"
                                 )
+                                attach_custom_error_code(
+                                    exception, ErrorCodes.UNSUPPORTED_OPERATION
+                                )
+                                raise exception
                             group_type = (
                                 snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS
                             )
@@ -1552,9 +1637,13 @@ def map_logical_plan_relation(
                 if group_type != snowflake_proto.Aggregate.GROUP_TYPE_GROUPBY:
                     if len(group_expression_list) != 1:
-                        raise SnowparkConnectNotImplementedError(
+                        exception = SnowparkConnectNotImplementedError(
                             "Multiple grouping expressions are not supported"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
                     if group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
                         group_expression_list = []  # TODO: exp.userGivenGroupByExprs()?
                     else:
@@ -1786,12 +1875,14 @@ def map_logical_plan_relation(
             # Check for multi-column UNPIVOT which Snowflake doesn't support
             if len(value_column_names) > 1:
-                raise UnsupportedOperationException(
+                exception = UnsupportedOperationException(
                     f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
                     f"multiple value columns ({', '.join(value_column_names)}) in a single operation. "
                     f"Workaround: Use separate UNPIVOT operations for each value column and join the results, "
                     f"or restructure your query to unpivot columns individually."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             values = []
             values_groups = as_java_list(rel.values().get())
@@ -1799,11 +1890,13 @@ def map_logical_plan_relation(
             # Check if we have multi-column groups in the IN clause
             if values_groups and len(as_java_list(values_groups[0])) > 1:
                 group_sizes = [len(as_java_list(group)) for group in values_groups]
-                raise UnsupportedOperationException(
+                exception = UnsupportedOperationException(
                     f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
                     f"multiple columns together in groups. Found groups with {max(group_sizes)} columns. "
                     f"Workaround: Unpivot each column separately and then join/union the results as needed."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             for e1 in values_groups:
                 for e in as_java_list(e1):
@@ -1849,9 +1942,11 @@ def map_logical_plan_relation(
             # Store the having condition in context and process the child aggregate
             child_relation = rel.child()
             if str(child_relation.getClass().getSimpleName()) != "Aggregate":
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "UnresolvedHaving can only be applied to Aggregate relations"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             # Store having condition in a context variable for the Aggregate case to pick up
             having_condition = map_logical_plan_expression(rel.havingCondition())
@@ -2176,7 +2271,7 @@ def map_logical_plan_relation(
             function_name = rel.generator().name().toString()
             func_arguments = [
                 map_logical_plan_expression(e)
-                for e in as_java_list(rel.generator().children())
+                for e in list(as_java_list(rel.generator().children()))
             ]
             unresolved_fun_proto = expressions_proto.Expression.UnresolvedFunction(
                 function_name=function_name, arguments=func_arguments
@@ -2242,7 +2337,11 @@ def map_logical_plan_relation(
                 )
             proto = generator_dataframe_proto
         case other:
-            raise SnowparkConnectNotImplementedError(f"Unimplemented relation: {other}")
+            exception = SnowparkConnectNotImplementedError(
+                f"Unimplemented relation: {other}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     proto.common.plan_id = plan_id

snowflake/snowpark_connect/relation/map_stats.py CHANGED Viewed

@@ -15,6 +15,8 @@ from snowflake import snowpark
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark_connect.config import get_boolean_session_config_param
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.map_relation import map_relation
 from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
@@ -99,9 +101,11 @@ def map_approx_quantile(
                     else ""
                 )
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{col_name}` cannot be resolved.{suggestion_text}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
     cols = input_container.column_map.get_snowpark_column_names_from_spark_column_names(
         list(rel.approx_quantile.cols)

snowflake/snowpark_connect/relation/map_subquery_alias.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import pyspark.sql.connect.proto.relations_pb2 as relation_proto
+from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
 from snowflake.snowpark_connect.relation.map_relation import map_relation
@@ -18,7 +19,9 @@ def map_alias(
     # we set reuse_parsed_plan=False because we need new expr_id for the attributes (output columns) in aliased snowpark dataframe
     # reuse_parsed_plan will lead to ambiguous column name for operations like joining two dataframes that are aliased from the same dataframe
     input_container = map_relation(rel.subquery_alias.input, reuse_parsed_plan=False)
-    qualifiers = [[alias]] * len(input_container.column_map.columns)
+    qualifiers = [
+        {ColumnQualifier((alias,))} for _ in input_container.column_map.columns
+    ]
     return DataFrameContainer.create_with_column_mapping(
         dataframe=input_container.dataframe,

snowflake/snowpark_connect/relation/map_udtf.py CHANGED Viewed

@@ -22,6 +22,8 @@ from snowflake.snowpark_connect.config import (
     global_config,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.map_expression import (
     map_single_column_expression,
 )
@@ -163,17 +165,21 @@ def process_return_type(
         else:
             parsed_return = return_type
     except ValueError as e:
-        raise PythonException(
+        exception = PythonException(
             f"[UDTF_ARROW_TYPE_CAST_ERROR] Error parsing UDTF return type DDL: {e}"
         )
+        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+        raise exception
     original_output_schema = proto_to_snowpark_type(parsed_return)
     output_schema = proto_to_snowpark_type(parsed_return)
     # Snowflake UDTF does not support MapType, so we convert it to VariantType.
     output_schema = convert_maptype_to_variant(output_schema)
     if not isinstance(output_schema, StructType):
-        raise PySparkTypeError(
+        exception = PySparkTypeError(
             f"Invalid Python user-defined table function return type. Expect a struct type, but got {parsed_return}"
         )
+        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+        raise exception
     expected_types = None
     if is_arrow_enabled_in_udtf() or is_spark_compatible_udtf_mode_enabled():
@@ -276,12 +282,16 @@ def map_common_inline_user_defined_table_function(
         if require_creating_udtf_in_sproc(udtf_proto):
             snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
             if isinstance(snowpark_udtf_or_error, str):
-                raise PythonException(snowpark_udtf_or_error)
+                exception = PythonException(snowpark_udtf_or_error)
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                raise exception
             snowpark_udtf = snowpark_udtf_or_error
         else:
             udtf_or_error = create_udtf(**kwargs)
             if isinstance(udtf_or_error, str):
-                raise PythonException(udtf_or_error)
+                exception = PythonException(udtf_or_error)
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                raise exception
             udtf = udtf_or_error
             snowpark_udtf = SnowparkUDTF(
                 name=udtf.name,

snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl