PyPI - snowpark-connect - Versions diffs - 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

snowpark-connect 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (81) hide show

snowflake/snowpark_connect/relation/map_sql.py CHANGED Viewed

@@ -46,6 +46,8 @@ from snowflake.snowpark_connect.config import (
     unset_config_param,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.map_expression import (
     ColumnNameMap,
     map_single_column_expression,
@@ -257,7 +259,8 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
 def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
-    # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase",
+    # Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase"
+    # if present, or to "spark.sql.caseSensitive".
     # and struct fields will be left as is. This should allow users to use the same names
     # in spark and Snowflake in most cases.
     if is_column:
@@ -377,14 +380,18 @@ def _get_assignments_from_action(
         or action.getClass().getSimpleName() == "UpdateStarAction"
     ):
         if len(column_mapping_source.columns) != len(column_mapping_target.columns):
-            raise ValueError(
+            exception = ValueError(
                 "source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         for i, col in enumerate(column_mapping_target.columns):
             if assignments.get(col.snowpark_name) is not None:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "UpdateStarAction or InsertStarAction is not supported with duplicate columns."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             assignments[col.snowpark_name] = snowpark_fn.col(
                 column_mapping_source.columns[i].snowpark_name
             )
@@ -489,9 +496,11 @@ def map_sql_to_pandas_df(
                     snowflake_sql = f"ALTER TABLE {table_name} ALTER COLUMN {column_name} {alter_clause}"
                     session.sql(snowflake_sql).collect()
                 else:
-                    raise ValueError(
+                    exception = ValueError(
                         f"No alter operations found in AlterColumn logical plan for table {table_name}, column {column_name}"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
+                    raise exception
             case "CreateNamespace":
                 name = get_relation_identifier_name(logical_plan.name(), True)
                 previous_name = session.connection.schema
@@ -603,9 +612,11 @@ def map_sql_to_pandas_df(
                 )
                 temp_view = get_temp_view(snowflake_view_name)
                 if temp_view is not None and not logical_plan.replace():
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{spark_view_name}` because it already exists."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 else:
                     unregister_temp_view(
                         spark_to_sf_single_id_with_unquoting(spark_view_name)
@@ -625,11 +636,13 @@ def map_sql_to_pandas_df(
                 df_container = execute_logical_plan(logical_plan.query())
                 df = df_container.dataframe
                 if _accessing_temp_object.get():
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
                         "of the type VIEW because it references to a temporary object of the type VIEW. Please "
                         f"make the temporary object persistent, or make the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` temporary."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 name = get_relation_identifier_name(logical_plan.child())
                 comment = logical_plan.comment()
@@ -661,6 +674,7 @@ def map_sql_to_pandas_df(
                         snowpark_column_names=df_container.column_map.get_snowpark_columns(),
                         parent_column_name_map=df_container.column_map,
                     )
                     is_global = isinstance(
                         logical_plan.viewType(),
                         jpype.JClass(
@@ -757,9 +771,11 @@ def map_sql_to_pandas_df(
                     del session._udtfs[func_name]
                 else:
                     if not logical_plan.ifExists():
-                        raise ValueError(
+                        exception = ValueError(
                             f"Function {func_name} not found among registered UDFs or UDTFs."
                         )
+                        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                        raise exception
                 if snowpark_name != "":
                     argument_string = f"({', '.join(convert_sp_to_sf_type(arg) for arg in input_types)})"
                     session.sql(
@@ -832,17 +848,25 @@ def map_sql_to_pandas_df(
                     rows = session.sql(final_sql).collect()
                 else:
                     # TODO: Support other logical plans
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"{logical_plan_name} is not supported yet with EXPLAIN."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
             case "InsertIntoStatement":
                 df_container = execute_logical_plan(logical_plan.query())
                 df = df_container.dataframe
                 queries = df.queries["queries"]
                 if len(queries) != 1:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"Unexpected number of queries: {len(queries)}"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 name = get_relation_identifier_name(logical_plan.table(), True)
@@ -996,9 +1020,13 @@ def map_sql_to_pandas_df(
                         clauses.append(when_not_matched(condition).insert(assignments))
                 if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         "Snowflake does not support 'not matched by source' actions in MERGE statements."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 target_table.merge(source_df, merge_condition_typed_col.col, clauses)
             case "DeleteFromTable":
@@ -1036,10 +1064,12 @@ def map_sql_to_pandas_df(
             case "UpdateTable":
                 # Databricks/Delta-specific extension not supported by SAS.
                 # Provide an actionable, clear error.
-                raise UnsupportedOperationException(
+                exception = UnsupportedOperationException(
                     "[UNSUPPORTED_SQL_EXTENSION] The UPDATE TABLE command failed.\n"
                     + "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "RenameColumn":
                 full_table_identifier = get_relation_identifier_name(
                     logical_plan.table(), True
@@ -1049,11 +1079,15 @@ def map_sql_to_pandas_df(
                 if not check_table_supports_operation(
                     full_table_identifier, "rename_column"
                 ):
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
                         f"This table was created as a v1 table with a data source that doesn't support column renaming. "
                         f"To enable this operation, set 'snowpark.connect.enable_snowflake_extension_behavior' to 'true'."
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 column_obj = logical_plan.column()
                 old_column_name = ".".join(
@@ -1094,6 +1128,7 @@ def map_sql_to_pandas_df(
                             f"ALTER ICEBERG TABLE {name} RENAME TO {new_name}"
                         ).collect()
                     else:
+                        attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
                         raise e
             case "ReplaceTableAsSelect":
                 _create_table_as_select(logical_plan, mode="overwrite")
@@ -1113,9 +1148,11 @@ def map_sql_to_pandas_df(
                 name = _spark_to_snowflake(logical_plan.namespace())
                 session.sql(f"USE SCHEMA {name}").collect()
             case "SetNamespaceLocation" | "SetNamespaceProperties":
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Altering databases is not currently supported."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             case "ShowCreateTable":
                 # Handle SHOW CREATE TABLE command
                 # Spark: SHOW CREATE TABLE table_name
@@ -1137,16 +1174,24 @@ def map_sql_to_pandas_df(
             case "ShowNamespaces":
                 name = get_relation_identifier_name(logical_plan.namespace(), True)
                 if name:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         "'IN' clause is not supported while listing databases"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 if logical_plan.pattern().isDefined():
                     # Snowflake SQL requires a "%" pattern.
                     # Snowpark catalog requires a regex and does client-side filtering.
                     # Spark, however, uses a regex-like pattern that treats '*' and '|' differently.
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         "'LIKE' clause is not supported while listing databases"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 rows = session.sql("SHOW SCHEMAS").collect()
                 if not rows:
                     rows = None
@@ -1247,9 +1292,13 @@ def map_sql_to_pandas_df(
                             spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
                             != db_name.casefold()
                         ):
-                            raise AnalysisException(
+                            exception = AnalysisException(
                                 f"database name is not matching:{db_name} and {db_and_table_name[0]}"
                             )
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INVALID_OPERATION
+                            )
+                            raise exception
                             # Just table name
                     snowflake_cmd = f"SHOW COLUMNS IN TABLE {table_name}"
@@ -1471,7 +1520,7 @@ def map_logical_plan_relation(
                 # Extract aliases from the aggregate expressions (SELECT clause)
                 alias_map = {}
-                for agg_expr in as_java_list(rel.aggregateExpressions()):
+                for agg_expr in list(as_java_list(rel.aggregateExpressions())):
                     if str(agg_expr.getClass().getSimpleName()) == "Alias":
                         alias_map[str(agg_expr.name())] = agg_expr.child()
@@ -1534,9 +1583,13 @@ def map_logical_plan_relation(
                             group_type = snowflake_proto.Aggregate.GROUP_TYPE_CUBE
                         case "GroupingSets":
                             if not exp.userGivenGroupByExprs().isEmpty():
-                                raise SnowparkConnectNotImplementedError(
+                                exception = SnowparkConnectNotImplementedError(
                                     "User-defined group by expressions are not supported"
                                 )
+                                attach_custom_error_code(
+                                    exception, ErrorCodes.UNSUPPORTED_OPERATION
+                                )
+                                raise exception
                             group_type = (
                                 snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS
                             )
@@ -1552,9 +1605,13 @@ def map_logical_plan_relation(
                 if group_type != snowflake_proto.Aggregate.GROUP_TYPE_GROUPBY:
                     if len(group_expression_list) != 1:
-                        raise SnowparkConnectNotImplementedError(
+                        exception = SnowparkConnectNotImplementedError(
                             "Multiple grouping expressions are not supported"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
                     if group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
                         group_expression_list = []  # TODO: exp.userGivenGroupByExprs()?
                     else:
@@ -1786,12 +1843,14 @@ def map_logical_plan_relation(
             # Check for multi-column UNPIVOT which Snowflake doesn't support
             if len(value_column_names) > 1:
-                raise UnsupportedOperationException(
+                exception = UnsupportedOperationException(
                     f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
                     f"multiple value columns ({', '.join(value_column_names)}) in a single operation. "
                     f"Workaround: Use separate UNPIVOT operations for each value column and join the results, "
                     f"or restructure your query to unpivot columns individually."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             values = []
             values_groups = as_java_list(rel.values().get())
@@ -1799,11 +1858,13 @@ def map_logical_plan_relation(
             # Check if we have multi-column groups in the IN clause
             if values_groups and len(as_java_list(values_groups[0])) > 1:
                 group_sizes = [len(as_java_list(group)) for group in values_groups]
-                raise UnsupportedOperationException(
+                exception = UnsupportedOperationException(
                     f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
                     f"multiple columns together in groups. Found groups with {max(group_sizes)} columns. "
                     f"Workaround: Unpivot each column separately and then join/union the results as needed."
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             for e1 in values_groups:
                 for e in as_java_list(e1):
@@ -1849,9 +1910,11 @@ def map_logical_plan_relation(
             # Store the having condition in context and process the child aggregate
             child_relation = rel.child()
             if str(child_relation.getClass().getSimpleName()) != "Aggregate":
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "UnresolvedHaving can only be applied to Aggregate relations"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             # Store having condition in a context variable for the Aggregate case to pick up
             having_condition = map_logical_plan_expression(rel.havingCondition())
@@ -2176,7 +2239,7 @@ def map_logical_plan_relation(
             function_name = rel.generator().name().toString()
             func_arguments = [
                 map_logical_plan_expression(e)
-                for e in as_java_list(rel.generator().children())
+                for e in list(as_java_list(rel.generator().children()))
             ]
             unresolved_fun_proto = expressions_proto.Expression.UnresolvedFunction(
                 function_name=function_name, arguments=func_arguments
@@ -2242,7 +2305,11 @@ def map_logical_plan_relation(
                 )
             proto = generator_dataframe_proto
         case other:
-            raise SnowparkConnectNotImplementedError(f"Unimplemented relation: {other}")
+            exception = SnowparkConnectNotImplementedError(
+                f"Unimplemented relation: {other}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     proto.common.plan_id = plan_id

snowflake/snowpark_connect/relation/map_stats.py CHANGED Viewed

@@ -15,6 +15,8 @@ from snowflake import snowpark
 from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark_connect.config import get_boolean_session_config_param
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.map_relation import map_relation
 from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
@@ -99,9 +101,11 @@ def map_approx_quantile(
                     else ""
                 )
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{col_name}` cannot be resolved.{suggestion_text}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                raise exception
     cols = input_container.column_map.get_snowpark_column_names_from_spark_column_names(
         list(rel.approx_quantile.cols)
@@ -309,9 +313,28 @@ def map_freq_items(rel: relation_proto.Relation) -> DataFrameContainer:
     cols = input_container.column_map.get_snowpark_column_names_from_spark_column_names(
         list(rel.freq_items.cols)
     )
+    # handle empty DataFrame case
+    row_count = input_df.count()
+    for sp_col_name in cols:
+        spark_col_names.append(
+            f"{input_container.column_map.get_spark_column_name_from_snowpark_column_name(sp_col_name)}_freqItems"
+        )
+    if row_count == 0:
+        # If DataFrame is empty, return empty arrays for each column
+        empty_values = [[] for _ in cols]
+        approx_top_k_df = session.createDataFrame([empty_values], spark_col_names)
+        return DataFrameContainer.create_with_column_mapping(
+            dataframe=approx_top_k_df,
+            spark_column_names=spark_col_names,
+            snowpark_column_names=spark_col_names,
+        )
     approx_top_k_df = input_df.select(
         *[
-            fn.function("approx_top_k")(fn.col(col), round(input_df.count() / support))
+            fn.function("approx_top_k")(fn.col(col), round(row_count / support))
             for col in cols
         ]
     )
@@ -330,10 +353,6 @@ def map_freq_items(rel: relation_proto.Relation) -> DataFrameContainer:
         for value in approx_top_k_values
     ]
-    for sp_col_name in cols:
-        spark_col_names.append(
-            f"{input_container.column_map.get_spark_column_name_from_snowpark_column_name(sp_col_name)}_freqItems"
-        )
     approx_top_k_df = session.createDataFrame([filtered_values], spark_col_names)
     return DataFrameContainer.create_with_column_mapping(

snowflake/snowpark_connect/relation/map_udtf.py CHANGED Viewed

@@ -22,6 +22,8 @@ from snowflake.snowpark_connect.config import (
     global_config,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.map_expression import (
     map_single_column_expression,
 )
@@ -163,17 +165,21 @@ def process_return_type(
         else:
             parsed_return = return_type
     except ValueError as e:
-        raise PythonException(
+        exception = PythonException(
             f"[UDTF_ARROW_TYPE_CAST_ERROR] Error parsing UDTF return type DDL: {e}"
         )
+        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+        raise exception
     original_output_schema = proto_to_snowpark_type(parsed_return)
     output_schema = proto_to_snowpark_type(parsed_return)
     # Snowflake UDTF does not support MapType, so we convert it to VariantType.
     output_schema = convert_maptype_to_variant(output_schema)
     if not isinstance(output_schema, StructType):
-        raise PySparkTypeError(
+        exception = PySparkTypeError(
             f"Invalid Python user-defined table function return type. Expect a struct type, but got {parsed_return}"
         )
+        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+        raise exception
     expected_types = None
     if is_arrow_enabled_in_udtf() or is_spark_compatible_udtf_mode_enabled():
@@ -276,12 +282,16 @@ def map_common_inline_user_defined_table_function(
         if require_creating_udtf_in_sproc(udtf_proto):
             snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
             if isinstance(snowpark_udtf_or_error, str):
-                raise PythonException(snowpark_udtf_or_error)
+                exception = PythonException(snowpark_udtf_or_error)
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                raise exception
             snowpark_udtf = snowpark_udtf_or_error
         else:
             udtf_or_error = create_udtf(**kwargs)
             if isinstance(udtf_or_error, str):
-                raise PythonException(udtf_or_error)
+                exception = PythonException(udtf_or_error)
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                raise exception
             udtf = udtf_or_error
             snowpark_udtf = SnowparkUDTF(
                 name=udtf.name,

snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py CHANGED Viewed

@@ -38,6 +38,8 @@ from snowflake.snowpark.types import (
     TimeType,
     _NumericType,
 )
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read.utils import (
     DATA_SOURCE_SQL_COMMENT,
     Connection,
@@ -147,9 +149,11 @@ class JdbcDataFrameReader(DataFrameReader):
                     or upper_bound is not None
                     or num_partitions is not None
                 ):
-                    raise ValueError(
+                    exception = ValueError(
                         "when column is not specified, lower_bound, upper_bound, num_partitions are expected to be None"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 if table is not None:
                     partitioned_queries = []
                     table_query = f"SELECT * FROM {table}"
@@ -160,24 +164,32 @@ class JdbcDataFrameReader(DataFrameReader):
                 elif query is not None:
                     partitioned_queries = [query]
                 else:
-                    raise ValueError("table or query is not specified")
+                    exception = ValueError("table or query is not specified")
+                    attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
+                    raise exception
             else:
                 if lower_bound is None or upper_bound is None or num_partitions is None:
-                    raise ValueError(
+                    exception = ValueError(
                         "when column is specified, lower_bound, upper_bound, num_partitions must be specified"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 column_type = None
                 for field in struct_schema.fields:
                     if field.name.lower() == column.lower():
                         column_type = field.datatype
                 if column_type is None:
-                    raise ValueError("Column does not exist")
+                    exception = ValueError("Column does not exist")
+                    attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
+                    raise exception
                 if not isinstance(column_type, _NumericType) and not isinstance(
                     column_type, DateType
                 ):
-                    raise ValueError(f"unsupported type {column_type}")
+                    exception = ValueError(f"unsupported type {column_type}")
+                    attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
+                    raise exception
                 spark_column_name = f'"{column}"'
                 partitioned_queries = self._generate_partition(
                     table,
@@ -240,7 +252,11 @@ class JdbcDataFrameReader(DataFrameReader):
                             )
                             query_thread_executor.shutdown(wait=False)
                             upload_thread_executor.shutdown(wait=False)
-                            raise future.result()
+                            exception = future.result()
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INTERNAL_ERROR
+                            )
+                            raise exception
                         else:
                             path = future.result()
                             if not path:
@@ -266,7 +282,11 @@ class JdbcDataFrameReader(DataFrameReader):
                             )
                             query_thread_executor.shutdown(wait=False)
                             upload_thread_executor.shutdown(wait=False)
-                            raise f.result()
+                            exception = f.result()
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INTERNAL_ERROR
+                            )
+                            raise exception
         finally:
             close_connection(conn)
@@ -283,7 +303,9 @@ class JdbcDataFrameReader(DataFrameReader):
         elif query is not None:
             sql = f"SELECT * FROM ({query}) WHERE 1=0"
         else:
-            raise ValueError("table or query is not specified")
+            exception = ValueError("table or query is not specified")
+            attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
+            raise exception
         cursor = conn.cursor()
         cursor.execute(sql)
@@ -301,7 +323,11 @@ class JdbcDataFrameReader(DataFrameReader):
             dt = parser.parse(value)
             return int(dt.replace(tzinfo=pytz.UTC).timestamp())
         else:
-            raise TypeError(f"unsupported column type for partition: {column_type}")
+            exception = TypeError(
+                f"unsupported column type for partition: {column_type}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
+            raise exception
     # this function is only used in data source API for SQL server
     def _to_external_value(self, value: Union[int, str, float], column_type: DataType):
@@ -311,7 +337,11 @@ class JdbcDataFrameReader(DataFrameReader):
             # TODO: SNOW-1909315: support timezone
             return datetime.datetime.fromtimestamp(value, tz=pytz.UTC)
         else:
-            raise TypeError(f"unsupported column type for partition: {column_type}")
+            exception = TypeError(
+                f"unsupported column type for partition: {column_type}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
+            raise exception
     def _to_snowpark_type(self, schema: Tuple[tuple]) -> StructType:
         fields = []
@@ -339,7 +369,9 @@ class JdbcDataFrameReader(DataFrameReader):
                 case jaydebeapi.BINARY:
                     field = StructField(name, BinaryType(), is_nullable)
                 case _:
-                    raise ValueError(f"unsupported type: {dbapi_type}")
+                    exception = ValueError(f"unsupported type: {dbapi_type}")
+                    attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
+                    raise exception
             fields.append(field)
         return StructType(fields)
@@ -359,7 +391,9 @@ class JdbcDataFrameReader(DataFrameReader):
         processed_lower_bound = self._to_internal_value(lower_bound, column_type)
         processed_upper_bound = self._to_internal_value(upper_bound, column_type)
         if processed_lower_bound > processed_upper_bound:
-            raise ValueError("lower_bound cannot be greater than upper_bound")
+            exception = ValueError("lower_bound cannot be greater than upper_bound")
+            attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+            raise exception
         if processed_lower_bound == processed_upper_bound or num_partitions <= 1:
             return [select_query]
@@ -665,4 +699,6 @@ def get_jdbc_dialect(url: str) -> JdbcDialect:
     for jdbc_dialect in jdbc_dialects:
         if jdbc_dialect.can_handle(url):
             return jdbc_dialect
-    raise ValueError(f"Unsupported JDBC datasource: {url}")
+    exception = ValueError(f"Unsupported JDBC datasource: {url}")
+    attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+    raise exception

snowflake/snowpark_connect/relation/read/map_read.py CHANGED Viewed

@@ -15,8 +15,11 @@ from snowflake import snowpark
 from snowflake.snowpark.types import StructType
 from snowflake.snowpark_connect.config import global_config
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.io_utils import (
     convert_file_prefix_path,
+    get_compression_for_source_and_options,
     is_cloud_path,
 )
 from snowflake.snowpark_connect.relation.read.map_read_table import map_read_table
@@ -158,12 +161,20 @@ def map_read(
                                 options[DBTABLE_OPTION], session, rel.common.plan_id
                             )
                     case other:
-                        raise SnowparkConnectNotImplementedError(
+                        exception = SnowparkConnectNotImplementedError(
                             f"UNSUPPORTED FORMAT {other} WITH NO PATH"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
         case other:
             # TODO: Empty data source
-            raise SnowparkConnectNotImplementedError(f"Unsupported read type: {other}")
+            exception = SnowparkConnectNotImplementedError(
+                f"Unsupported read type: {other}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     return df_cache_map_put_if_absent(
         (get_session_id(), rel.common.plan_id),
@@ -237,6 +248,14 @@ def _read_file(
     )
     upload_files_if_needed(paths, clean_source_paths, session, read_format)
     paths = [_quote_stage_path(path) for path in paths]
+    if read_format in ("csv", "text", "json", "parquet"):
+        compression = get_compression_for_source_and_options(
+            read_format, options, from_read=True
+        )
+        if compression is not None:
+            options["compression"] = compression
     match read_format:
         case "csv":
             from snowflake.snowpark_connect.relation.read.map_read_csv import (
@@ -265,9 +284,11 @@ def _read_file(
             return map_read_text(rel, schema, session, paths)
         case _:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Unsupported format: {read_format}"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
 def _skip_upload(path: str, read_format: str):

snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl