PyPI - snowpark-connect - Versions diffs - 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

snowpark-connect 0.30.1py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (78) hide show

snowflake/snowpark_connect/relation/read/map_read_jdbc.py CHANGED Viewed

@@ -9,6 +9,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
 from snowflake import snowpark
 from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDataFrameReader
 from snowflake.snowpark_connect.relation.read.utils import (
     Connection,
@@ -28,7 +30,9 @@ def create_connection(jdbc_options: dict[str, str]) -> Connection:
         return jaydebeapi.connect(driver, url, jdbc_options)
     except Exception as e:
         jpype.detachThreadFromJVM()
-        raise Exception(f"Error connecting JDBC datasource: {e}")
+        exception = Exception(f"Error connecting JDBC datasource: {e}")
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception
 def close_connection(conn: Connection) -> None:
@@ -70,17 +74,23 @@ def map_read_jdbc(
         dbtable = None
     if not dbtable and not query:
-        raise ValueError("Include dbtable or query is required option")
+        exception = ValueError("Include dbtable or query is required option")
+        attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
+        raise exception
     if query is not None and dbtable is not None:
-        raise ValueError(
+        exception = ValueError(
             "Not allowed to specify dbtable and query options at the same time"
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     if query is not None and partition_column is not None:
-        raise ValueError(
+        exception = ValueError(
             "Not allowed to specify partitionColumn and query options at the same time"
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     try:
         df = JdbcDataFrameReader(session, jdbc_options).jdbc_read_dbapi(
@@ -105,4 +115,6 @@ def map_read_jdbc(
             snowpark_column_types=[f.datatype for f in df.schema.fields],
         )
     except Exception as e:
-        raise Exception(f"Error accessing JDBC datasource for read: {e}")
+        exception = Exception(f"Error accessing JDBC datasource for read: {e}")
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception

snowflake/snowpark_connect/relation/read/map_read_json.py CHANGED Viewed

@@ -28,6 +28,8 @@ from snowflake.snowpark.types import (
     TimestampType,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read.map_read import JsonReaderConfig
 from snowflake.snowpark_connect.relation.read.metadata_utils import (
     add_filename_metadata_to_reader,
@@ -64,9 +66,11 @@ def map_read_json(
     if rel.read.is_streaming is True:
         # TODO: Structured streaming implementation.
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Streaming is not supported for JSON files."
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     else:
         snowpark_options = options.convert_to_snowpark_args()
         raw_options = rel.read.data_source.options
@@ -363,9 +367,11 @@ def construct_row_by_schema(
                     content.get(col_name, None), sf.datatype, snowpark_options
                 )
         else:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"JSON construct {str(content)} to StructType failed"
             )
+            attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+            raise exception
         return result
     elif isinstance(schema, ArrayType):
         result = []

snowflake/snowpark_connect/relation/read/map_read_parquet.py CHANGED Viewed

@@ -22,6 +22,8 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
 from snowflake.snowpark.column import METADATA_FILENAME
 from snowflake.snowpark.types import DataType, DoubleType, IntegerType, StringType
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read.metadata_utils import (
     add_filename_metadata_to_reader,
 )
@@ -44,9 +46,11 @@ def map_read_parquet(
     """Read a Parquet file into a Snowpark DataFrame."""
     if rel.read.is_streaming is True:
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Streaming is not supported for Parquet files."
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     snowpark_options = options.convert_to_snowpark_args()
     raw_options = rel.read.data_source.options
@@ -155,10 +159,14 @@ def _discover_partition_columns(
                     if i not in dir_level_to_column_name:
                         dir_level_to_column_name[i] = key
                     elif dir_level_to_column_name[i] != key:
-                        raise ValueError(
+                        exception = ValueError(
                             f"Conflicting partition column names detected: '{dir_level_to_column_name[i]}' and '{key}' "
                             f"at the same directory level"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.INVALID_OPERATION
+                        )
+                        raise exception
                     partition_columns_values[key].add(value)
@@ -166,10 +174,12 @@ def _discover_partition_columns(
     for level in sorted(dir_level_to_column_name.keys()):
         col_name = dir_level_to_column_name[level]
         if col_name in seen_columns:
-            raise ValueError(
+            exception = ValueError(
                 f"Found partition column '{col_name}' at multiple directory levels. "
                 f"A partition column can only appear at a single level."
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+            raise exception
         seen_columns.add(col_name)
     ordered_columns = [

snowflake/snowpark_connect/relation/read/map_read_socket.py CHANGED Viewed

@@ -9,6 +9,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
 from snowflake import snowpark
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.utils.telemetry import (
     SnowparkConnectNotImplementedError,
 )
@@ -30,7 +32,9 @@ def map_read_socket(
         host = options.get("host", None)
         port = options.get("port", None)
         if not host or not port:
-            raise ValueError("Host and port must be provided in options.")
+            exception = ValueError("Host and port must be provided in options.")
+            attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+            raise exception
         with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
             try:
                 s.connect((host, int(port)))
@@ -56,8 +60,12 @@ def map_read_socket(
                     snowpark_column_names=[snowpark_cname],
                 )
             except OSError as e:
-                raise Exception(f"Error connecting to {host}:{port} - {e}")
+                exception = Exception(f"Error connecting to {host}:{port} - {e}")
+                attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+                raise exception
     else:
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Socket reads are only supported in streaming mode."
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception

snowflake/snowpark_connect/relation/read/map_read_table.py CHANGED Viewed

@@ -18,6 +18,8 @@ from snowflake.snowpark_connect.column_name_handler import (
 )
 from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read.utils import (
     rename_columns_as_snowflake_standard,
 )
@@ -64,9 +66,11 @@ def post_process_df(
         # Check if this is a table/view not found error
         # Snowflake error codes: 002003 (42S02) - Object does not exist or not authorized
         if hasattr(e, "sql_error_code") and e.sql_error_code == 2003:
-            raise AnalysisException(
+            exception = AnalysisException(
                 f"[TABLE_OR_VIEW_NOT_FOUND] The table or view cannot be found. {source_table_name}"
-            ) from None  # Suppress original exception to reduce message size
+            )
+            attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+            raise exception from None  # Suppress original exception to reduce message size
         # Re-raise if it's not a table not found error
         raise
@@ -118,9 +122,11 @@ def get_table_from_name(
     # Verify if recursive view read is not attempted
     if table_name in get_processed_views():
-        raise AnalysisException(
+        exception = AnalysisException(
             f"[RECURSIVE_VIEW] Recursive view `{table_name}` detected (cycle: `{table_name}` -> `{table_name}`)"
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+        raise exception
     snowpark_name = ".".join(
         quote_name_without_upper_casing(part)
@@ -159,10 +165,14 @@ def map_read_table(
         and rel.read.data_source.format.lower() == "iceberg"
     ):
         if len(rel.read.data_source.paths) != 1:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Unexpected paths: {rel.read.data_source.paths}"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         table_identifier = rel.read.data_source.paths[0]
     else:
-        raise ValueError("The relation must have a table identifier.")
+        exception = ValueError("The relation must have a table identifier.")
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     return get_table_from_name(table_identifier, session, rel.common.plan_id)

snowflake/snowpark_connect/relation/read/map_read_text.py CHANGED Viewed

@@ -8,6 +8,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
 from snowflake import snowpark
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read.utils import (
     get_spark_column_names_from_snowpark_columns,
     rename_columns_as_snowflake_standard,
@@ -82,9 +84,11 @@ def map_read_text(
     """
     if rel.read.is_streaming is True:
         # TODO: Structured streaming implementation.
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Streaming is not supported for CSV files."
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     df = read_text(paths[0], schema, session, rel.read.data_source.options)
     if len(paths) > 1:

snowflake/snowpark_connect/relation/read/metadata_utils.py CHANGED Viewed

@@ -16,6 +16,8 @@ from snowflake.snowpark.column import METADATA_FILENAME
 from snowflake.snowpark.functions import col
 from snowflake.snowpark.types import StructField
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 # Constant for the metadata filename column name
 METADATA_FILENAME_COLUMN = "METADATA$FILENAME"
@@ -129,9 +131,11 @@ def filter_metadata_columns(
     if len(non_metadata_columns) == 0:
         # DataFrame contains only metadata columns (METADATA$FILENAME), no actual data columns remaining.
         # We don't have a way to return an empty dataframe.
-        raise AnalysisException(
+        exception = AnalysisException(
             "[DATAFRAME_MISSING_DATA_COLUMNS] Cannot perform operation on DataFrame that contains no data columns."
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+        raise exception
     filtered_df = result_df.select([col(name) for name in non_metadata_columns])

snowflake/snowpark_connect/relation/stage_locator.py CHANGED Viewed

@@ -11,6 +11,8 @@ from s3fs.core import S3FileSystem
 from snowflake import snowpark
 from snowflake.snowpark.session import Session
 from snowflake.snowpark_connect.config import sessions_config
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.io_utils import (
     get_cloud_from_url,
     parse_azure_url,
@@ -42,9 +44,11 @@ def get_paths_from_stage(
                 rewrite_paths.append(f"{stage_name}/{path}")
             paths = rewrite_paths
         case "gcp":
-            raise AnalysisException(
+            exception = AnalysisException(
                 "You must configure an integration for Google Cloud Storage to perform I/O operations rather than accessing the URL directly. Reference: https://docs.snowflake.com/en/user-guide/data-load-gcs-config"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         case _:
             filesystem, parsed_path = url_to_fs(paths[0])
             if isinstance(filesystem, S3FileSystem):  # aws

snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py CHANGED Viewed

@@ -11,6 +11,8 @@ from snowflake import snowpark
 from snowflake.snowpark import DataFrameWriter
 from snowflake.snowpark.dataframe import DataFrame
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.read import jdbc_read_dbapi
 from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDialect
 from snowflake.snowpark_connect.relation.read.utils import Connection
@@ -65,9 +67,13 @@ class JdbcDataFrameWriter(DataFrameWriter):
                         self._create_table(conn, table, container, jdbc_dialect)
                 case "errorifexists":
                     if table_exist:
-                        raise ValueError(
+                        exception = ValueError(
                             "table is already exist and write mode is ERROR_IF_EXISTS"
                         )
+                        attach_custom_error_code(
+                            exception, ErrorCodes.INVALID_OPERATION
+                        )
+                        raise exception
                     else:
                         self._create_table(conn, table, container, jdbc_dialect)
                 case "overwrite":
@@ -82,7 +88,9 @@ class JdbcDataFrameWriter(DataFrameWriter):
                     else:
                         self._create_table(conn, table, container, jdbc_dialect)
                 case _:
-                    raise ValueError(f"Invalid write mode value{write_mode}")
+                    exception = ValueError(f"Invalid write mode value{write_mode}")
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
             task_insert_into_data_source_with_retry(
                 input_df,
@@ -141,6 +149,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
                 cursor.execute(sql)
         except Exception as e:
             logger.error(f"failed to drop table {table} from the data source {e}")
+            attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
             raise e
     def _create_table(
@@ -189,6 +198,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
                 cursor.execute(sql)
         except Exception as e:
             logger.error(f"failed to create a table {table} from the data source {e}")
+            attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
             raise e
@@ -218,6 +228,7 @@ def _task_insert_into_data_source(
     except Exception as e:
         logger.debug(f"failed to insert into data source  {e}")
         conn.rollback()
+        attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
         raise e
     finally:
         cursor.close()
@@ -274,6 +285,7 @@ def task_insert_into_data_source_with_retry(
             )
     except Exception as e:
         logger.debug(f"failed to insert into data source  {e}")
+        attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
         raise e
     finally:
         close_connection(conn)
@@ -339,4 +351,8 @@ def convert_sp_to_sql_type(
                 case _:
                     return "TIMESTAMP"
         case _:
-            raise TypeError(f"Unsupported data type: {datatype.__class__.__name__}")
+            exception = TypeError(
+                f"Unsupported data type: {datatype.__class__.__name__}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
+            raise exception

snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.1py3-none-any.whl → 0.31.0py3-none-any.whl