PyPI - snowpark-connect - Versions diffs - 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl - Mend

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (87) hide show

snowflake/snowpark_connect/expression/map_unresolved_star.py CHANGED Viewed

@@ -11,6 +11,9 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
 )
 from snowflake.snowpark.types import StructType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
+from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
 from snowflake.snowpark_connect.utils.context import get_outer_dataframes
@@ -26,7 +29,7 @@ def check_struct_and_get_field_datatype(field_name, schema):
         else:
             return None
     else:
-        None
+        return None
 def map_unresolved_star(
@@ -53,16 +56,17 @@ def map_unresolved_star(
             return spark_names, typed_column
         # scenario where it is expanding * to mulitple columns
-        spark_names = []
-        snowpark_names = []
-        qualifiers = []
+        spark_names: list[str] = []
+        snowpark_names: list[str] = []
+        qualifiers: list[set[ColumnQualifier]] = []
+        target_qualifier = ColumnQualifier(tuple(name_parts[:-1]))
         (
             spark_names,
             snowpark_names,
             qualifiers,
         ) = column_mapping.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
-            name_parts[:-1]
+            target_qualifier
         )
         if len(spark_names) == 0:
@@ -73,7 +77,7 @@ def map_unresolved_star(
                     snowpark_names,
                     qualifiers,
                 ) = column_mapping_for_outer_df.get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
-                    name_parts[:-1]
+                    target_qualifier
                 )
                 if len(spark_names) > 0:
                     break
@@ -139,7 +143,7 @@ def map_unresolved_star(
                     final_sql_expr,
                     lambda final_sql_expr=final_sql_expr: typer.type(final_sql_expr),
                 )
-                typed_column.set_multi_col_qualifiers([[] for _ in spark_names])
+                typed_column.set_multi_col_qualifiers([set() for _ in spark_names])
                 return spark_names, typed_column
     else:
         result_exp = snowpark_fn.sql_expr(
@@ -152,9 +156,11 @@ def map_unresolved_star(
         typed_column.set_multi_col_qualifiers(column_mapping.get_qualifiers())
         return spark_names, typed_column
-    raise AnalysisException(
+    exception = AnalysisException(
         f"[UNRESOLVED_STAR] The unresolved star expression {exp} is not supported."
     )
+    attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+    raise exception
 def map_unresolved_star_struct(

snowflake/snowpark_connect/expression/map_update_fields.py CHANGED Viewed

@@ -8,6 +8,8 @@ from pyspark.errors.exceptions.base import AnalysisException
 import snowflake.snowpark.functions as snowpark_fn
 from snowflake.snowpark.types import DataType, StringType, StructField, StructType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
 from snowflake.snowpark_connect.utils.identifiers import (
@@ -39,9 +41,11 @@ def update_field_in_schema(
                         field.name, updated_subschema, field.nullable, _is_column=False
                     )
                 else:
-                    raise AnalysisException(
+                    exception = AnalysisException(
                         message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}` in `{field}`"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
             field_updated = True
         else:
             new_field = field  # leave unchanged
@@ -59,9 +63,11 @@ def update_field_in_schema(
             # if the value type is None that means we want to drop the field and spark does not throw an error if the field does not exists
             # but if the value type is not None, it means we should add or update this field which has already been covered above
             # if we reach this code, it means the field should have existed
-            raise AnalysisException(
+            exception = AnalysisException(
                 message=f"[FIELD_NOT_FOUND] No such struct field `{field_str}`"
             )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+            raise exception
     return StructType(new_fields)
@@ -99,9 +105,11 @@ def map_update_fields(
     )
     if not isinstance(struct_typed_column.typ, StructType):
-        raise AnalysisException(
+        exception = AnalysisException(
             f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "update_fields({struct_name}, ...)" due to data type mismatch: Parameter 1 requires the "STRUCT" type'
         )
+        attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
+        raise exception
     final_schema = struct_typed_column.typ
     value_column_list = []
@@ -137,9 +145,11 @@ def map_update_fields(
     final_name = f"update_fields({struct_name}, {update_operations_str})"
     if len(final_schema.fields) == 0:
-        raise AnalysisException(
+        exception = AnalysisException(
             f'[DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "{final_name}" due to data type mismatch: Cannot drop all fields in struct.'
         )
+        attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
+        raise exception
     @snowpark_fn.udf(
         input_types=input_types_to_the_udf,

snowflake/snowpark_connect/expression/map_window_function.py CHANGED Viewed

@@ -6,7 +6,11 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
 from snowflake import snowpark
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
-from snowflake.snowpark_connect.error.error_utils import SparkException
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import (
+    SparkException,
+    attach_custom_error_code,
+)
 from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
@@ -29,6 +33,8 @@ SPARK_RANKING_FUNCTIONS = frozenset(
     ]
 )
+RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS = frozenset(["percent_rank"])
 CAPITAL_FUNCTION_NAMES = frozenset(["rank()", "dense_rank()", "percent_rank()"])
@@ -128,6 +134,11 @@ def map_window_function(
             case expressions_proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW:
                 frame_name.append("ROWS BETWEEN")
                 frame_type_func_string = "rows_between"
+                if proto_func_name in RANGE_BASED_WINDOW_FRAME_ONLY_SNOWFLAKE_FUNCTIONS:
+                    # Seems like Snowflake and Spark have different understanding of some functions. For those,
+                    # Spark only allows rows_between while Snowflake only allows range_between. To be compatible
+                    # with Spark, we have to use range_between here.
+                    frame_type_func_string = "range_between"
                 lower_name, lower = parse_frame_boundary(
                     exp.window.frame_spec.lower, is_upper=False
                 )
@@ -138,9 +149,11 @@ def map_window_function(
                     lower != snowpark.Window.UNBOUNDED_PRECEDING
                     or upper != snowpark.Window.CURRENT_ROW
                 ):
-                    raise SparkException.invalid_ranking_function_window_frame(
+                    exception = SparkException.invalid_ranking_function_window_frame(
                         window_frame=f"specifiedwindowframe(RowFrame, {lower_name}, {upper_name})"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 is_unbounded = (
                     lower == snowpark.Window.UNBOUNDED_PRECEDING
@@ -165,9 +178,11 @@ def map_window_function(
                     orders = orders[:1]
                 if proto_func_name in SPARK_RANKING_FUNCTIONS:
-                    raise SparkException.invalid_ranking_function_window_frame(
+                    exception = SparkException.invalid_ranking_function_window_frame(
                         window_frame=f"specifiedwindowframe(RangeFrame, {lower_name}, {upper_name})"
                     )
+                    attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+                    raise exception
                 is_unbounded = (
                     lower == snowpark.Window.UNBOUNDED_PRECEDING

snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py CHANGED Viewed

@@ -11,6 +11,8 @@ import pyspark.sql.connect.proto.types_pb2 as types_proto
 from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.error.exceptions import MissingDatabase, MissingSchema
 from snowflake.snowpark_connect.utils.identifiers import (
     split_fully_qualified_spark_name,
@@ -39,28 +41,40 @@ class AbstractSparkCatalog(ABC):
         description: str,
         **options: typing.Any,
     ) -> DataFrameContainer:
-        raise SnowparkConnectNotImplementedError("createTable is not implemented")
+        exception = SnowparkConnectNotImplementedError("createTable is not implemented")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def listDatabases(
         self,
         pattern: str | None = None,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("listDatabases is not implemented")
+        exception = SnowparkConnectNotImplementedError(
+            "listDatabases is not implemented"
+        )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def getDatabase(
         self,
         spark_dbName: str,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("getDatabase is not implemented")
+        exception = SnowparkConnectNotImplementedError("getDatabase is not implemented")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def databaseExists(
         self,
         spark_dbName: str,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("databaseExists is not implemented")
+        exception = SnowparkConnectNotImplementedError(
+            "databaseExists is not implemented"
+        )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def listTables(
@@ -68,14 +82,18 @@ class AbstractSparkCatalog(ABC):
         spark_dbName: str | None = None,
         pattern: str | None = None,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("listTables is not implemented")
+        exception = SnowparkConnectNotImplementedError("listTables is not implemented")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def getTable(
         self,
         spark_tableName: str,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("getTable is not implemented")
+        exception = SnowparkConnectNotImplementedError("getTable is not implemented")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def tableExists(
@@ -83,7 +101,9 @@ class AbstractSparkCatalog(ABC):
         spark_tableName: str,
         spark_dbName: str | None,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("tableExists is not implemented")
+        exception = SnowparkConnectNotImplementedError("tableExists is not implemented")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def listColumns(
@@ -91,36 +111,50 @@ class AbstractSparkCatalog(ABC):
         spark_tableName: str,
         spark_dbName: str | None = None,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("listColumns is not implemented")
+        exception = SnowparkConnectNotImplementedError("listColumns is not implemented")
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def currentDatabase(self) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError("currentDatabase is not implemented")
+        exception = SnowparkConnectNotImplementedError(
+            "currentDatabase is not implemented"
+        )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def setCurrentDatabase(
         self,
         spark_dbName: str,
     ) -> pandas.DataFrame:
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "setCurrentDatabase is not implemented"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def dropGlobalTempView(
         self,
         spark_view_name: str,
     ) -> DataFrameContainer:
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "dropGlobalTempView is not implemented"
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     @abstractmethod
     def dropTempView(
         self,
         spark_view_name: str,
     ) -> DataFrameContainer:
-        raise SnowparkConnectNotImplementedError("dropTempView is not implemented")
+        exception = SnowparkConnectNotImplementedError(
+            "dropTempView is not implemented"
+        )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     def cacheTable(
         self,
@@ -135,9 +169,11 @@ class AbstractSparkCatalog(ABC):
             spark_tableName
         )
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         if sf_database is None:
             sf_database = _get_current_snowflake_database()
         if sf_schema is None:
@@ -168,9 +204,11 @@ class AbstractSparkCatalog(ABC):
             spark_tableName
         )
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         if sf_database is None:
             sf_database = _get_current_snowflake_database()
         if sf_schema is None:
@@ -194,9 +232,11 @@ class AbstractSparkCatalog(ABC):
             spark_tableName
         )
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         if sf_database is None:
             sf_database = _get_current_snowflake_database()
         if sf_schema is None:
@@ -249,7 +289,11 @@ def _process_multi_layer_database(
             else:
                 return None, c, d
         case _:
-            raise ValueError(f"Unexpected database identifier format: {spark_mli}")
+            exception = ValueError(
+                f"Unexpected database identifier format: {spark_mli}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+            raise exception
 def _process_multi_layer_identifier(
@@ -283,5 +327,9 @@ def _process_multi_layer_identifier(
             else:
                 snowflake_database, snowflake_schema, snowflake_obj = d, s, t
         case _:
-            raise ValueError(f"Unexpected table/view identifier format: {spark_mli}")
+            exception = ValueError(
+                f"Unexpected table/view identifier format: {spark_mli}"
+            )
+            attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+            raise exception
     return spark_catalog, snowflake_database, snowflake_schema, snowflake_obj

snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py CHANGED Viewed

@@ -19,11 +19,14 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
 )
 from snowflake.snowpark.functions import lit
 from snowflake.snowpark.types import BooleanType, StringType
+from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
 from snowflake.snowpark_connect.config import (
     auto_uppercase_non_column_identifiers,
     global_config,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.error.exceptions import MaxRetryExceeded
 from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
     AbstractSparkCatalog,
@@ -109,9 +112,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         catalog, sf_database, sf_schema = _process_multi_layer_database(pattern)
         sf_schema = sf_schema.replace("*", ".*")
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         sp_catalog = get_or_create_snowpark_session().catalog
         dbs: list[Schema] | None = None
@@ -131,7 +136,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 )
         if dbs is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts"
+                f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         names: list[str] = list()
         catalogs: list[str] = list()
@@ -163,9 +169,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         """Listing a single database that's accessible in Snowflake."""
         catalog, sf_database, sf_schema = _process_multi_layer_database(spark_dbName)
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         sp_catalog = get_or_create_snowpark_session().catalog
         db: Schema | None = None
@@ -184,7 +192,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 )
         if db is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch database {spark_dbName} after all retry attempts"
+                f"Failed to fetch database {spark_dbName} after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         name = unquote_if_quoted(db.name)
@@ -307,9 +316,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 spark_dbName
             )
             if catalog is not None and self != catalog:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Calling into another catalog is not currently supported"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
         else:
             catalog = sf_database = sf_schema = None
@@ -440,9 +451,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
             spark_tableName
         )
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         table: Table | None = None
         for attempt in Retrying(
@@ -463,7 +476,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         if table is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch table {spark_tableName} after all retry attempts"
+                f"Failed to fetch table {spark_tableName} after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         return pandas.DataFrame(
@@ -563,9 +577,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 spark_tableName
             )
             if catalog is not None and self != catalog:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Calling into another catalog is not currently supported"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             for attempt in Retrying(
                 max_retries=5,
                 initial_backoff=100,  # 100ms
@@ -598,7 +614,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                     )
         if columns is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch columns of {spark_tableName} after all retry attempts"
+                f"Failed to fetch columns of {spark_tableName} after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         names: list[str] = list()
         descriptions: list[str | None] = list()
@@ -702,26 +719,34 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         if source == "":
             source = global_config.get("spark.sql.sources.default")
         if source not in ("csv", "json", "avro", "parquet", "orc", "xml"):
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Source '{source}' is not currently supported by Catalog.createTable. "
                 "Maybe default value through 'spark.sql.sources.default' should be set."
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         if path != "":
             # External table creation is not supported currently.
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "External table creation is not supported currently."
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         session = get_or_create_snowpark_session()
         # Managed table
         if schema.ByteSize() == 0:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Unable to infer schema for {source.upper()}. It must be specified manually.",
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         sp_schema = proto_to_snowpark_type(schema)
         columns = [c.name for c in schema.struct.fields]
         table_name_parts = split_fully_qualified_spark_name(tableName)
-        qualifiers = [table_name_parts for _ in columns]
+        qualifiers: list[set[ColumnQualifier]] = [
+            {ColumnQualifier(tuple(table_name_parts))} for _ in columns
+        ]
         column_types = [f.datatype for f in sp_schema.fields]
         return DataFrameContainer.create_with_column_mapping(
             dataframe=session.createDataFrame([], sp_schema),

snowflake/snowpark_connect/relation/catalogs/utils.py CHANGED Viewed

@@ -5,6 +5,8 @@
 from collections import defaultdict
 from snowflake.connector.errors import ProgrammingError
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.catalogs import CATALOGS, SNOWFLAKE_CATALOG
 from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
     AbstractSparkCatalog,
@@ -27,11 +29,15 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
     # Validate input parameters to match PySpark behavior
     if catalog_name is None:
-        raise ValueError("Catalog name cannot be None")
+        exception = ValueError("Catalog name cannot be None")
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     if catalog_name == "":
-        raise ValueError(
+        exception = ValueError(
             "Catalog '' plugin class not found: spark.sql.catalog. is not defined"
         )
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception
     CURRENT_CATALOG_NAME = catalog_name
     if catalog_name in CATALOGS:
@@ -42,9 +48,11 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
         sf_catalog.setCurrentDatabase(catalog_name if catalog_name is not None else "")
         return get_current_catalog()
     except ProgrammingError as e:
-        raise Exception(
+        exception = Exception(
             f"Catalog '{catalog_name}' plugin class not found: spark.sql.catalog.{catalog_name} is not defined"
-        ) from e
+        )
+        attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
+        raise exception from e
 def _get_current_temp_objects() -> set[tuple[str | None, str | None, str]]:

snowflake/snowpark_connect/relation/io_utils.py CHANGED Viewed

@@ -6,6 +6,9 @@ from urllib.parse import urlparse
 from pyspark.errors.exceptions.base import AnalysisException
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 CLOUD_PREFIX_TO_CLOUD = {
     "abfss": "azure",
     "wasbs": "azure",
@@ -74,7 +77,7 @@ def get_compression_for_source_and_options(
     if not is_supported_compression(source, compression):
         supported_compressions = supported_compressions_for_format(source)
-        raise AnalysisException(
+        exception = AnalysisException(
             f"Compression {compression} is not supported for {source} format. "
             + (
                 f"Supported compressions: {sorted(supported_compressions)}"
@@ -82,6 +85,8 @@ def get_compression_for_source_and_options(
                 else "None compression supported for this format."
             )
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     return compression

snowflake/snowpark_connect/relation/map_aggregate.py CHANGED Viewed

@@ -16,6 +16,7 @@ from snowflake.snowpark.types import DataType
 from snowflake.snowpark_connect.column_name_handler import (
     make_column_names_snowpark_compatible,
 )
+from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
 from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
 from snowflake.snowpark_connect.expression.map_expression import (
@@ -200,7 +201,9 @@ def map_pivot_aggregate(
                 dataframe=result.select(*column_selectors),
                 spark_column_names=reordered_spark_names,
                 snowpark_column_names=reordered_snowpark_names,
-                column_qualifiers=[[]] * len(reordered_spark_names),
+                column_qualifiers=[
+                    {ColumnQualifier.no_qualifier()} for _ in reordered_spark_names
+                ],
                 parent_column_name_map=input_container.column_map,
                 snowpark_column_types=reordered_types,
             )
@@ -349,7 +352,7 @@ class _ColumnMetadata:
     spark_name: str
     snowpark_name: str
     data_type: DataType
-    qualifiers: list[str]
+    qualifiers: set[ColumnQualifier]
 @dataclass(frozen=True)
@@ -385,7 +388,7 @@ class _Columns:
             col.spark_name for col in self.grouping_columns + self.aggregation_columns
         ]
-    def get_qualifiers(self) -> list[list[str]]:
+    def get_qualifiers(self) -> list[set[ColumnQualifier]]:
         return [
             col.qualifiers for col in self.grouping_columns + self.aggregation_columns
         ]
@@ -429,7 +432,7 @@ def map_aggregate_helper(
                     new_name,
                     None if skip_alias else alias,
                     None if pivot else snowpark_column.typ,
-                    snowpark_column.get_qualifiers(),
+                    qualifiers=snowpark_column.get_qualifiers(),
                 )
             )
@@ -469,7 +472,7 @@ def map_aggregate_helper(
                     new_name,
                     None if skip_alias else alias,
                     agg_col_typ,
-                    [],
+                    qualifiers={ColumnQualifier.no_qualifier()},
                 )
             )

snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.30.1py3-none-any.whl → 0.32.0py3-none-any.whl