PyPI - snowpark-connect - Versions diffs - 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

snowpark-connect 0.27.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (200) hide show

snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py CHANGED Viewed

@@ -8,23 +8,29 @@ import typing
 import pandas
 import pyspark.sql.connect.proto.common_pb2 as common_proto
 import pyspark.sql.connect.proto.types_pb2 as types_proto
+from pyspark.errors.exceptions.base import AnalysisException
 from pyspark.sql.connect.client.core import Retrying
 from snowflake.core.exceptions import APIError, NotFoundError
 from snowflake.core.schema import Schema
 from snowflake.core.table import Table, TableColumn
-from snowflake.snowpark import functions
 from snowflake.snowpark._internal.analyzer.analyzer_utils import (
     quote_name_without_upper_casing,
     unquote_if_quoted,
 )
 from snowflake.snowpark.functions import lit
 from snowflake.snowpark.types import BooleanType, StringType
+from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
 from snowflake.snowpark_connect.config import (
     auto_uppercase_non_column_identifiers,
     global_config,
 )
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import (
+    TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS,
+    attach_custom_error_code,
+)
 from snowflake.snowpark_connect.error.exceptions import MaxRetryExceeded
 from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
     AbstractSparkCatalog,
@@ -34,12 +40,19 @@ from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import
 )
 from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
 from snowflake.snowpark_connect.utils.identifiers import (
+    FQN,
+    spark_to_sf_single_id_with_unquoting,
     split_fully_qualified_spark_name,
 )
 from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
 from snowflake.snowpark_connect.utils.telemetry import (
     SnowparkConnectNotImplementedError,
 )
+from snowflake.snowpark_connect.utils.temporary_view_helper import (
+    get_temp_view,
+    get_temp_view_normalized_names,
+    unregister_temp_view,
+)
 from snowflake.snowpark_connect.utils.udf_cache import cached_udf
@@ -103,9 +116,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         catalog, sf_database, sf_schema = _process_multi_layer_database(pattern)
         sf_schema = sf_schema.replace("*", ".*")
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         sp_catalog = get_or_create_snowpark_session().catalog
         dbs: list[Schema] | None = None
@@ -125,7 +140,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 )
         if dbs is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts"
+                f"Failed to fetch databases {f'with pattern {pattern} ' if pattern is not None else ''}after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         names: list[str] = list()
         catalogs: list[str] = list()
@@ -157,9 +173,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         """Listing a single database that's accessible in Snowflake."""
         catalog, sf_database, sf_schema = _process_multi_layer_database(spark_dbName)
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         sp_catalog = get_or_create_snowpark_session().catalog
         db: Schema | None = None
@@ -178,7 +196,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 )
         if db is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch database {spark_dbName} after all retry attempts"
+                f"Failed to fetch database {spark_dbName} after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         name = unquote_if_quoted(db.name)
@@ -203,6 +222,93 @@ class SnowflakeCatalog(AbstractSparkCatalog):
             exists = False
         return pandas.DataFrame({"exists": [exists]})
+    def _get_temp_view_prefixes(self, spark_dbName: str | None) -> list[str]:
+        if spark_dbName is None:
+            return []
+        return [
+            quote_name_without_upper_casing(part)
+            for part in split_fully_qualified_spark_name(spark_dbName)
+        ]
+    def _list_temp_views(
+        self,
+        spark_dbName: str | None = None,
+        pattern: str | None = None,
+    ) -> typing.Tuple[
+        list[str | None],
+        list[list[str | None]],
+        list[str],
+        list[str | None],
+        list[str | None],
+        list[bool],
+    ]:
+        catalogs: list[str | None] = list()
+        namespaces: list[list[str | None]] = list()
+        names: list[str] = list()
+        descriptions: list[str | None] = list()
+        table_types: list[str | None] = list()
+        is_temporaries: list[bool] = list()
+        temp_views_prefix = ".".join(self._get_temp_view_prefixes(spark_dbName))
+        normalized_spark_dbName = (
+            temp_views_prefix.lower()
+            if global_config.spark_sql_caseSensitive
+            else temp_views_prefix
+        )
+        normalized_global_temp_database_name = (
+            quote_name_without_upper_casing(
+                global_config.spark_sql_globalTempDatabase.lower()
+            )
+            if global_config.spark_sql_caseSensitive
+            else quote_name_without_upper_casing(
+                global_config.spark_sql_globalTempDatabase
+            )
+        )
+        temp_views = get_temp_view_normalized_names()
+        null_safe_pattern = pattern if pattern is not None else ""
+        for temp_view in temp_views:
+            normalized_temp_view = (
+                temp_view.lower()
+                if global_config.spark_sql_caseSensitive
+                else temp_view
+            )
+            fqn = FQN.from_string(temp_view)
+            normalized_schema = (
+                fqn.schema.lower()
+                if fqn.schema is not None and global_config.spark_sql_caseSensitive
+                else fqn.schema
+            )
+            is_global_view = normalized_global_temp_database_name == normalized_schema
+            is_local_temp_view = fqn.schema is None
+            # Temporary views are always shown if they match the pattern
+            matches_prefix = (
+                normalized_spark_dbName == normalized_schema or is_local_temp_view
+            )
+            if matches_prefix and bool(
+                re.match(null_safe_pattern, normalized_temp_view)
+            ):
+                names.append(unquote_if_quoted(fqn.name))
+                catalogs.append(None)
+                namespaces.append(
+                    [global_config.spark_sql_globalTempDatabase]
+                    if is_global_view
+                    else []
+                )
+                descriptions.append(None)
+                table_types.append("TEMPORARY")
+                is_temporaries.append(True)
+        return (
+            catalogs,
+            namespaces,
+            names,
+            descriptions,
+            table_types,
+            is_temporaries,
+        )
     def listTables(
         self,
         spark_dbName: str | None = None,
@@ -214,9 +320,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 spark_dbName
             )
             if catalog is not None and self != catalog:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Calling into another catalog is not currently supported"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
         else:
             catalog = sf_database = sf_schema = None
@@ -232,8 +340,7 @@ class SnowflakeCatalog(AbstractSparkCatalog):
             schema=sf_quote(sf_schema),
             pattern=_normalize_identifier(pattern),
         )
-        names: list[str] = list()
-        catalogs: list[str] = list()
+        catalogs: list[str | None] = list()
         namespaces: list[list[str | None]] = list()
         names: list[str] = list()
         descriptions: list[str | None] = list()
@@ -253,6 +360,22 @@ class SnowflakeCatalog(AbstractSparkCatalog):
             descriptions.append(o[6] if o[6] else None)
             table_types.append("PERMANENT")
             is_temporaries.append(False)
+        (
+            non_materialized_catalogs,
+            non_materialized_namespaces,
+            non_materialized_names,
+            non_materialized_descriptions,
+            non_materialized_table_types,
+            non_materialized_is_temporaries,
+        ) = self._list_temp_views(spark_dbName, pattern)
+        catalogs.extend(non_materialized_catalogs)
+        namespaces.extend(non_materialized_namespaces)
+        names.extend(non_materialized_names)
+        descriptions.extend(non_materialized_descriptions)
+        table_types.extend(non_materialized_table_types)
+        is_temporaries.extend(non_materialized_is_temporaries)
         return pandas.DataFrame(
             {
                 "name": names,
@@ -297,35 +420,76 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         spark_tableName: str,
     ) -> pandas.DataFrame:
         """Listing a single table/view with provided name that's accessible in Snowflake."""
+        def _get_temp_view():
+            spark_table_name_parts = [
+                quote_name_without_upper_casing(part)
+                for part in split_fully_qualified_spark_name(spark_tableName)
+            ]
+            spark_view_name = ".".join(spark_table_name_parts)
+            temp_view = get_temp_view(spark_view_name)
+            if temp_view:
+                return pandas.DataFrame(
+                    {
+                        "name": [unquote_if_quoted(spark_table_name_parts[-1])],
+                        "catalog": [None],
+                        "namespace": [
+                            [unquote_if_quoted(spark_table_name_parts[-2])]
+                            if len(spark_table_name_parts) > 1
+                            else []
+                        ],
+                        "description": [None],
+                        "tableType": ["TEMPORARY"],
+                        "isTemporary": [True],
+                    }
+                )
+            return None
+        # Attempt to get the view from the non materialized views first
+        temp_view = _get_temp_view()
+        if temp_view is not None:
+            return temp_view
         sp_catalog = get_or_create_snowpark_session().catalog
         catalog, sf_database, sf_schema, table_name = _process_multi_layer_identifier(
             spark_tableName
         )
         if catalog is not None and self != catalog:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "Calling into another catalog is not currently supported"
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         table: Table | None = None
-        for attempt in Retrying(
-            max_retries=5,
-            initial_backoff=100,  # 100ms
-            max_backoff=5000,  # 5 s
-            backoff_multiplier=2.0,
-            jitter=100,
-            min_jitter_threshold=200,
-            can_retry=_is_retryable_api_error,
-        ):
-            with attempt:
-                table = sp_catalog.get_table(
-                    database=sf_quote(sf_database),
-                    schema=sf_quote(sf_schema),
-                    table_name=sf_quote(table_name),
-                )
+        try:
+            for attempt in Retrying(
+                max_retries=5,
+                initial_backoff=100,  # 100ms
+                max_backoff=5000,  # 5 s
+                backoff_multiplier=2.0,
+                jitter=100,
+                min_jitter_threshold=200,
+                can_retry=_is_retryable_api_error,
+            ):
+                with attempt:
+                    table = sp_catalog.get_table(
+                        database=sf_quote(sf_database),
+                        schema=sf_quote(sf_schema),
+                        table_name=sf_quote(table_name),
+                    )
+        except NotFoundError:
+            exception = AnalysisException(
+                error_class=TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS,
+                message_parameters={"relationName": spark_tableName},
+            )
+            attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
+            raise exception
         if table is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch table {spark_tableName} after all retry attempts"
+                f"Failed to fetch table {spark_tableName} after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         return pandas.DataFrame(
@@ -356,16 +520,74 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         try:
             self.getTable(table_mli)
             exists = True
-        except NotFoundError:
-            exists = False
+        except AnalysisException as ex:
+            if ex.error_class == TABLE_OR_VIEW_NOT_FOUND_ERROR_CLASS:
+                exists = False
         return pandas.DataFrame({"exists": [exists]})
+    def _list_temp_view_columns(
+        self,
+        spark_tableName: str,
+        spark_dbName: typing.Optional[str] = None,
+    ):
+        spark_view_name_parts = [
+            quote_name_without_upper_casing(part)
+            for part in split_fully_qualified_spark_name(spark_tableName)
+        ]
+        spark_view_name_parts = (
+            self._get_temp_view_prefixes(spark_dbName) + spark_view_name_parts
+        )
+        spark_view_name = ".".join(spark_view_name_parts)
+        temp_view = get_temp_view(spark_view_name)
+        if not temp_view:
+            return None
+        return self._list_columns_from_dataframe_container(temp_view)
+    def _list_columns_from_dataframe_container(
+        self, container: DataFrameContainer
+    ) -> pandas.DataFrame:
+        names: list[str] = list()
+        descriptions: list[str | None] = list()
+        data_types: list[str] = list()
+        nullables: list[bool] = list()
+        is_partitions: list[bool] = list()
+        is_buckets: list[bool] = list()
+        for field, spark_column in zip(
+            container.dataframe.schema.fields,
+            container.column_map.get_spark_columns(),
+        ):
+            names.append(spark_column)
+            descriptions.append(None)
+            data_types.append(field.datatype.simpleString())
+            nullables.append(field.nullable)
+            is_partitions.append(False)
+            is_buckets.append(False)
+        return pandas.DataFrame(
+            {
+                "name": names,
+                "description": descriptions,
+                "dataType": data_types,
+                "nullable": nullables,
+                "isPartition": is_partitions,
+                "isBucket": is_buckets,
+            }
+        )
     def listColumns(
         self,
         spark_tableName: str,
         spark_dbName: typing.Optional[str] = None,
     ) -> pandas.DataFrame:
         """List all columns in a table/view, optionally database name filter can be provided."""
+        temp_view_columns = self._list_temp_view_columns(spark_tableName, spark_dbName)
+        if temp_view_columns is not None:
+            return temp_view_columns
         sp_catalog = get_or_create_snowpark_session().catalog
         columns: list[TableColumn] | None = None
         if spark_dbName is None:
@@ -373,9 +595,11 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                 spark_tableName
             )
             if catalog is not None and self != catalog:
-                raise SnowparkConnectNotImplementedError(
+                exception = SnowparkConnectNotImplementedError(
                     "Calling into another catalog is not currently supported"
                 )
+                attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+                raise exception
             for attempt in Retrying(
                 max_retries=5,
                 initial_backoff=100,  # 100ms
@@ -408,7 +632,8 @@ class SnowflakeCatalog(AbstractSparkCatalog):
                     )
         if columns is None:
             raise MaxRetryExceeded(
-                f"Failed to fetch columns of {spark_tableName} after all retry attempts"
+                f"Failed to fetch columns of {spark_tableName} after all retry attempts",
+                custom_error_code=ErrorCodes.INTERNAL_ERROR,
             )
         names: list[str] = list()
         descriptions: list[str | None] = list()
@@ -456,16 +681,23 @@ class SnowflakeCatalog(AbstractSparkCatalog):
     ) -> DataFrameContainer:
         session = get_or_create_snowpark_session()
         schema = global_config.spark_sql_globalTempDatabase
-        result_df = session.sql(
-            "drop view if exists identifier(?)",
-            params=[f"{sf_quote(schema)}.{sf_quote(spark_view_name)}"],
-        )
-        result_df = result_df.select(
-            functions.contains('"status"', functions.lit("successfully dropped")).alias(
-                "value"
+        result = False
+        if spark_view_name:
+            result = unregister_temp_view(
+                f"{spark_to_sf_single_id_with_unquoting(schema)}.{spark_to_sf_single_id_with_unquoting(spark_view_name)}"
+            )
+        if not result:
+            drop_result = session.sql(
+                "drop view if exists identifier(?)",
+                params=[f"{sf_quote(schema)}.{sf_quote(spark_view_name)}"],
+            ).collect()
+            result = (
+                len(drop_result) == 1
+                and "successfully dropped" in drop_result[0]["status"]
             )
-        )
         columns = ["value"]
+        result_df = session.createDataFrame([result], schema=columns)
         return DataFrameContainer.create_with_column_mapping(
             dataframe=result_df,
             spark_column_names=columns,
@@ -479,15 +711,23 @@ class SnowflakeCatalog(AbstractSparkCatalog):
     ) -> DataFrameContainer:
         """Drop the current temporary view."""
         session = get_or_create_snowpark_session()
-        result = session.sql(
-            "drop view if exists identifier(?)",
-            params=[sf_quote(spark_view_name)],
-        ).collect()
-        view_was_dropped = (
-            len(result) == 1 and "successfully dropped" in result[0]["status"]
-        )
-        result_df = session.createDataFrame([(view_was_dropped,)], schema=["value"])
         columns = ["value"]
+        result = False
+        if spark_view_name:
+            result = unregister_temp_view(
+                spark_to_sf_single_id_with_unquoting(spark_view_name)
+            )
+        if not result:
+            drop_result = session.sql(
+                "drop view if exists identifier(?)",
+                params=[sf_quote(spark_view_name)],
+            ).collect()
+            result = (
+                len(drop_result) == 1
+                and "successfully dropped" in drop_result[0]["status"]
+            )
+        result_df = session.createDataFrame([result], schema=columns)
         return DataFrameContainer.create_with_column_mapping(
             dataframe=result_df,
             spark_column_names=columns,
@@ -515,26 +755,34 @@ class SnowflakeCatalog(AbstractSparkCatalog):
         if source == "":
             source = global_config.get("spark.sql.sources.default")
         if source not in ("csv", "json", "avro", "parquet", "orc", "xml"):
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Source '{source}' is not currently supported by Catalog.createTable. "
                 "Maybe default value through 'spark.sql.sources.default' should be set."
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         if path != "":
             # External table creation is not supported currently.
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 "External table creation is not supported currently."
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         session = get_or_create_snowpark_session()
         # Managed table
         if schema.ByteSize() == 0:
-            raise SnowparkConnectNotImplementedError(
+            exception = SnowparkConnectNotImplementedError(
                 f"Unable to infer schema for {source.upper()}. It must be specified manually.",
             )
+            attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+            raise exception
         sp_schema = proto_to_snowpark_type(schema)
         columns = [c.name for c in schema.struct.fields]
         table_name_parts = split_fully_qualified_spark_name(tableName)
-        qualifiers = [table_name_parts for _ in columns]
+        qualifiers: list[set[ColumnQualifier]] = [
+            {ColumnQualifier(tuple(table_name_parts))} for _ in columns
+        ]
         column_types = [f.datatype for f in sp_schema.fields]
         return DataFrameContainer.create_with_column_mapping(
             dataframe=session.createDataFrame([], sp_schema),

snowflake/snowpark_connect/relation/catalogs/utils.py CHANGED Viewed

@@ -5,6 +5,8 @@
 from collections import defaultdict
 from snowflake.connector.errors import ProgrammingError
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.relation.catalogs import CATALOGS, SNOWFLAKE_CATALOG
 from snowflake.snowpark_connect.relation.catalogs.abstract_spark_catalog import (
     AbstractSparkCatalog,
@@ -27,11 +29,15 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
     # Validate input parameters to match PySpark behavior
     if catalog_name is None:
-        raise ValueError("Catalog name cannot be None")
+        exception = ValueError("Catalog name cannot be None")
+        attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
+        raise exception
     if catalog_name == "":
-        raise ValueError(
+        exception = ValueError(
             "Catalog '' plugin class not found: spark.sql.catalog. is not defined"
         )
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception
     CURRENT_CATALOG_NAME = catalog_name
     if catalog_name in CATALOGS:
@@ -42,9 +48,11 @@ def set_current_catalog(catalog_name: str | None) -> AbstractSparkCatalog:
         sf_catalog.setCurrentDatabase(catalog_name if catalog_name is not None else "")
         return get_current_catalog()
     except ProgrammingError as e:
-        raise Exception(
+        exception = Exception(
             f"Catalog '{catalog_name}' plugin class not found: spark.sql.catalog.{catalog_name} is not defined"
-        ) from e
+        )
+        attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
+        raise exception from e
 def _get_current_temp_objects() -> set[tuple[str | None, str | None, str]]:

snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

snowpark-connect 0.27.0py3-none-any.whl → 1.7.0py3-none-any.whl