PyPI - snowpark-connect - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show

snowflake/snowpark_connect/relation/write/map_write.py CHANGED Viewed

@@ -16,7 +16,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
     unquote_if_quoted,
 )
 from snowflake.snowpark.exceptions import SnowparkSQLException
-from snowflake.snowpark.functions import col, lit, object_construct, sql_expr
+from snowflake.snowpark.functions import col, lit, object_construct, sql_expr, when
 from snowflake.snowpark.types import (
     ArrayType,
     DataType,
@@ -28,11 +28,14 @@ from snowflake.snowpark.types import (
     _NumericType,
 )
 from snowflake.snowpark_connect.config import (
+    auto_uppercase_column_identifiers,
     global_config,
     sessions_config,
     str_to_bool,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.io_utils import (
     convert_file_prefix_path,
     get_compression_for_source_and_options,
@@ -254,9 +257,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
             get_param_from_options(parameters, write_op.options, write_op.source)
             if write_op.partitioning_columns:
                 if write_op.source != "parquet":
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         "Partitioning is only supported for parquet format"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
                 # Build Spark-style directory structure: col1=value1/col2=value2/...
                 # Example produced expression (Snowflake SQL):
                 #   'department=' || TO_VARCHAR("department") || '/' || 'region=' || TO_VARCHAR("region")
@@ -341,9 +348,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
                         snowpark_table_name, session
                     )
                     if isinstance(table_schema_or_error, DataType):  # Table exists
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"Table {snowpark_table_name} already exists"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.INVALID_OPERATION
+                        )
+                        raise exception
                     create_iceberg_table(
                         snowpark_table_name=snowpark_table_name,
                         location=write_op.options.get("location", None),
@@ -366,9 +377,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
                             "ICEBERG",
                             "TABLE",
                         ):
-                            raise AnalysisException(
+                            exception = AnalysisException(
                                 f"Table {snowpark_table_name} is not an iceberg table"
                             )
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INVALID_OPERATION
+                            )
+                            raise exception
                     else:
                         create_iceberg_table(
                             snowpark_table_name=snowpark_table_name,
@@ -412,9 +427,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
                             "ICEBERG",
                             "TABLE",
                         ):
-                            raise AnalysisException(
+                            exception = AnalysisException(
                                 f"Table {snowpark_table_name} is not an iceberg table"
                             )
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INVALID_OPERATION
+                            )
+                            raise exception
                     else:
                         create_iceberg_table(
                             snowpark_table_name=snowpark_table_name,
@@ -430,9 +449,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
                         column_order=_column_order_for_write,
                     )
                 case _:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"Write mode {write_mode} is not supported"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
         case _:
             snowpark_table_name = _spark_to_snowflake(write_op.table.table_name)
             save_method = write_op.table.save_method
@@ -448,9 +471,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
                 if len(write_op.table.table_name) == 0:
                     dbtable_name = write_op.options.get("dbtable", "")
                     if len(dbtable_name) == 0:
-                        raise SnowparkConnectNotImplementedError(
+                        exception = SnowparkConnectNotImplementedError(
                             "Save command is not supported without a table name"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.UNSUPPORTED_OPERATION
+                        )
+                        raise exception
                     else:
                         snowpark_table_name = _spark_to_snowflake(dbtable_name)
@@ -468,9 +495,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
                                 "NORMAL",
                                 "TABLE",
                             ):
-                                raise AnalysisException(
+                                exception = AnalysisException(
                                     f"Table {snowpark_table_name} is not a FDN table"
                                 )
+                                attach_custom_error_code(
+                                    exception, ErrorCodes.INVALID_OPERATION
+                                )
+                                raise exception
                             write_mode = "truncate"
                         _validate_schema_and_get_writer(
                             input_df,
@@ -494,9 +525,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
                             "NORMAL",
                             "TABLE",
                         ):
-                            raise AnalysisException(
+                            exception = AnalysisException(
                                 f"Table {snowpark_table_name} is not a FDN table"
                             )
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INVALID_OPERATION
+                            )
+                            raise exception
                         _validate_schema_and_get_writer(
                             input_df,
@@ -528,9 +563,11 @@ def map_write(request: proto_base.ExecutePlanRequest):
                     column_order=_column_order_for_write,
                 )
             else:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Save command not supported: {save_method}"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
 def map_write_v2(request: proto_base.ExecutePlanRequest):
@@ -555,9 +592,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
     session: snowpark.Session = get_or_create_snowpark_session()
     if write_op.table_name is None or write_op.table_name == "":
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Write operation V2 only support table writing now"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     if write_op.provider.lower() == "iceberg":
         match write_op.mode:
@@ -566,9 +605,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                     snowpark_table_name, session
                 )
                 if isinstance(table_schema_or_error, DataType):  # Table exists
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Table {snowpark_table_name} already exists"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 create_iceberg_table(
                     snowpark_table_name=snowpark_table_name,
                     location=write_op.table_properties.get("location"),
@@ -587,16 +628,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                     snowpark_table_name, session
                 )
                 if not isinstance(table_schema_or_error, DataType):  # Table not exists
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                    raise exception
                 if get_table_type(snowpark_table_name, session) not in (
                     "ICEBERG",
                     "TABLE",
                 ):
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Table {snowpark_table_name} is not an iceberg table"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 _validate_schema_and_get_writer(
                     input_df, "append", snowpark_table_name, table_schema_or_error
                 ).saveAsTable(
@@ -614,13 +659,19 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                         "ICEBERG",
                         "TABLE",
                     ):
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"Table {snowpark_table_name} is not an iceberg table"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.INVALID_OPERATION
+                        )
+                        raise exception
                 else:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 _validate_schema_and_get_writer(
                     input_df, "truncate", snowpark_table_name, table_schema_or_error
                 ).saveAsTable(
@@ -641,9 +692,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                         mode="replace",
                     )
                 else:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Table {snowpark_table_name} does not exist"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 _validate_schema_and_get_writer(
                     input_df, "replace", snowpark_table_name, table_schema_or_error
                 ).saveAsTable(
@@ -667,9 +720,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                     column_order=_column_order_for_write,
                 )
             case _:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
     else:
         match write_op.mode:
             case commands_proto.WriteOperationV2.MODE_CREATE:
@@ -685,16 +740,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                     snowpark_table_name, session
                 )
                 if not isinstance(table_schema_or_error, DataType):  # Table not exists
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 if get_table_type(snowpark_table_name, session) not in (
                     "NORMAL",
                     "TABLE",
                 ):
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Table {snowpark_table_name} is not a FDN table"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                    raise exception
                 _validate_schema_and_get_writer(
                     input_df, "append", snowpark_table_name, table_schema_or_error
                 ).saveAsTable(
@@ -712,13 +771,19 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                         "NORMAL",
                         "TABLE",
                     ):
-                        raise AnalysisException(
+                        exception = AnalysisException(
                             f"Table {snowpark_table_name} is not a FDN table"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.INVALID_OPERATION
+                        )
+                        raise exception
                 else:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
+                    raise exception
                 _validate_schema_and_get_writer(
                     input_df, "truncate", snowpark_table_name, table_schema_or_error
                 ).saveAsTable(
@@ -731,9 +796,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                     snowpark_table_name, session
                 )
                 if not isinstance(table_schema_or_error, DataType):  # Table not exists
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         f"Table {snowpark_table_name} does not exist"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
+                    raise exception
                 _validate_schema_and_get_writer(
                     input_df, "replace", snowpark_table_name, table_schema_or_error
                 ).saveAsTable(
@@ -750,9 +817,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
                     column_order=_column_order_for_write,
                 )
             case _:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
 def _get_table_schema_or_error(
@@ -764,6 +833,20 @@ def _get_table_schema_or_error(
         return e
+def _get_writer_for_table_creation(df: snowpark.DataFrame) -> snowpark.DataFrameWriter:
+    # When creating a new table, if case sensitivity is not enabled, we need to rename the columns
+    # to upper case so they are case-insensitive in Snowflake.
+    if auto_uppercase_column_identifiers():
+        for field in df.schema.fields:
+            col_name = field.name
+            # Uppercasing is fine, regardless of whether the original name was quoted or not.
+            # In Snowflake these are equivalent "COL" == COL == col == coL
+            uppercased_name = col_name.upper()
+            if col_name != uppercased_name:
+                df = df.withColumnRenamed(col_name, uppercased_name)
+    return df.write
 def _validate_schema_and_get_writer(
     input_df: snowpark.DataFrame,
     write_mode: str,
@@ -774,7 +857,7 @@ def _validate_schema_and_get_writer(
         "replace",
         "create_or_replace",
     ):
-        return input_df.write
+        return _get_writer_for_table_creation(input_df)
     table_schema = None
     if table_schema_or_error is not None:
@@ -783,6 +866,9 @@ def _validate_schema_and_get_writer(
             if "SQL compilation error" in msg and "does not exist" in msg:
                 pass
             else:
+                attach_custom_error_code(
+                    table_schema_or_error, ErrorCodes.INTERNAL_ERROR
+                )
                 raise table_schema_or_error
         elif isinstance(table_schema_or_error, DataType):
             table_schema = table_schema_or_error
@@ -796,16 +882,17 @@ def _validate_schema_and_get_writer(
             if "SQL compilation error" in msg and "does not exist" in msg:
                 pass
             else:
+                attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
                 raise e
     if table_schema is None:
         # If table does not exist, we can skip the schema validation
-        return input_df.write
+        return _get_writer_for_table_creation(input_df)
     _validate_schema_for_append(table_schema, input_df.schema, snowpark_table_name)
     # if table exists and case sensitivity is not enabled, we need to rename the columns to match existing table schema
-    if not global_config.spark_sql_caseSensitive:
+    if auto_uppercase_column_identifiers():
         for field in input_df.schema.fields:
             # Find the matching field in the table schema (case-insensitive)
@@ -815,8 +902,8 @@ def _validate_schema_and_get_writer(
                 (
                     f
                     for f in table_schema.fields
-                    if unquote_if_quoted(f.name).lower()
-                    == unquote_if_quoted(col_name).lower()
+                    if unquote_if_quoted(f.name).upper()
+                    == unquote_if_quoted(col_name).upper()
                 ),
                 None,
             )
@@ -851,21 +938,25 @@ def _validate_schema_for_append(
         case (StructType() as table_struct, StructType() as data_struct):
             def _comparable_col_name(col: str) -> str:
-                name = col if global_config.spark_sql_caseSensitive else col.lower()
+                name = col.upper() if auto_uppercase_column_identifiers() else col
                 if compare_structs:
                     return name
                 else:
                     return unquote_if_quoted(name)
             def invalid_struct_schema():
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"Cannot resolve columns for the existing table {snowpark_table_name} ({table_schema.simple_string()}) with the data schema ({data_schema.simple_string()})."
                 )
+                attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                raise exception
             if len(table_struct.fields) != len(data_struct.fields):
-                raise AnalysisException(
+                exception = AnalysisException(
                     f"The column number of the existing table {snowpark_table_name} ({table_schema.simple_string()}) doesn't match the data schema ({data_schema.simple_string()}).)"
                 )
+                attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+                raise exception
             table_field_names = {
                 _comparable_col_name(field.name) for field in table_struct.fields
@@ -928,9 +1019,11 @@ def _validate_schema_for_append(
         case (DateType(), _) if isinstance(data_schema, (DateType, TimestampType)):
             return
         case (_, _):
-            raise AnalysisException(
+            exception = AnalysisException(
                 f"[INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST] Cannot write incompatible data for the table {snowpark_table_name}: Cannot safely cast {data_schema.simple_string()} to {table_schema.simple_string()}"
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
 def create_iceberg_table(
@@ -970,9 +1063,11 @@ def create_iceberg_table(
         case "create_or_replace":
             create_sql = "CREATE OR REPLACE"
         case _:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Write mode {mode} is not supported for iceberg table"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
     sql = f"""
         {create_sql} ICEBERG TABLE {snowpark_table_name} ({",".join(table_schema)})
         CATALOG = 'SNOWFLAKE'
@@ -988,20 +1083,35 @@ def rewrite_df(input_df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
         json: construct the dataframe to 1 column in json format
             1. Append columns which represents the column name
             2. Use object_construct to aggregate the dataframe into 1 column
+        csv:
+            Use "" to replace empty string
     """
-    if source != "json":
-        return input_df
-    rand_salt = random_string(10, "_")
-    rewritten_df = input_df.with_columns(
-        [co + rand_salt for co in input_df.columns],
-        [lit(unquote_if_quoted(co)) for co in input_df.columns],
-    )
-    construct_key_values = []
-    for co in input_df.columns:
-        construct_key_values.append(col(co + rand_salt))
-        construct_key_values.append(col(co))
-    return rewritten_df.select(object_construct(*construct_key_values))
+    match source:
+        case "json":
+            rand_salt = random_string(10, "_")
+            rewritten_df = input_df.with_columns(
+                [co + rand_salt for co in input_df.columns],
+                [lit(unquote_if_quoted(co)) for co in input_df.columns],
+            )
+            construct_key_values = []
+            for co in input_df.columns:
+                construct_key_values.append(col(co + rand_salt))
+                construct_key_values.append(col(co))
+            return rewritten_df.select(object_construct(*construct_key_values))
+        case "csv":
+            new_cols = []
+            for co in input_df.columns:
+                if isinstance(input_df.schema[co].datatype, StringType):
+                    new_col = col(co)
+                    new_col = when(
+                        new_col.isNotNull() & (new_col == ""), lit('""')
+                    ).otherwise(new_col)
+                    new_cols.append(new_col.alias(co))
+                else:
+                    new_cols.append(col(co))
+            return input_df.select(new_cols)
+        case _:
+            return input_df
 def handle_column_names(
@@ -1079,9 +1189,11 @@ def store_files_locally(
 def _truncate_directory(directory_path: Path) -> None:
     if not directory_path.exists():
-        raise FileNotFoundError(
+        exception = FileNotFoundError(
             f"The specified directory {directory_path} does not exist."
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     # Iterate over all the files and directories in the specified directory
     for file in directory_path.iterdir():
         # Check if it is a file or directory and remove it

snowflake/snowpark_connect/relation/write/map_write_jdbc.py CHANGED Viewed

@@ -4,6 +4,8 @@
 from snowflake import snowpark
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read.map_read_jdbc import (
     close_connection,
     create_connection,
@@ -35,7 +37,9 @@ def map_write_jdbc(
         dbtable = None
     if dbtable is None:
-        raise ValueError("Include dbtable is required option")
+        exception = ValueError("Include dbtable is required option")
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     try:
         JdbcDataFrameWriter(session, jdbc_options).jdbc_write_dbapi(
@@ -46,4 +50,6 @@ def map_write_jdbc(
             write_mode=write_mode,
         )
     except Exception as e:
-        raise Exception(f"Error accessing JDBC datasource for write: {e}")
+        exception = Exception(f"Error accessing JDBC datasource for write: {e}")
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception

snowflake/snowpark_connect/resources_initializer.py CHANGED Viewed

@@ -5,6 +5,8 @@ import pathlib
 import threading
 import time
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
 from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
@@ -119,9 +121,11 @@ def wait_for_resource_initialization() -> None:
         logger.error(
             "Resource initialization failed - initializer thread has been running for over 300 seconds."
         )
-        raise RuntimeError(
+        exception = RuntimeError(
             "Resource initialization failed - initializer thread has been running for over 300 seconds."
         )
+        attach_custom_error_code(exception, ErrorCodes.RESOURCE_INITIALIZATION_FAILED)
+        raise exception
 def set_upload_jars(upload: bool) -> None:

snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl