PyPI - snowpark-connect - Versions diffs - 0.28.1__py3-none-any.whl → 0.30.0__py3-none-any.whl - Mend

snowpark-connect 0.28.1py3-none-any.whl → 0.30.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (47) hide show

snowflake/snowpark_connect/relation/stage_locator.py CHANGED Viewed

@@ -5,6 +5,7 @@
 import os
 from fsspec.core import url_to_fs
+from pyspark.errors.exceptions.base import AnalysisException
 from s3fs.core import S3FileSystem
 from snowflake import snowpark
@@ -33,37 +34,42 @@ def get_paths_from_stage(
     # TODO : What if GCP?
     # TODO: What if already stage path?
-    if get_cloud_from_url(paths[0]) == "azure":
-        rewrite_paths = []
-        for p in paths:
-            _, bucket_name, path = parse_azure_url(p)
-            rewrite_paths.append(f"{stage_name}/{path}")
-        paths = rewrite_paths
-    else:
-        filesystem, parsed_path = url_to_fs(paths[0])
-        if isinstance(filesystem, S3FileSystem):  # aws
-            # Remove bucket name from the path since the stage name will replace
-            # the bucket name in the path.
-            paths = [
-                f"{stage_name}/{'/'.join(url_to_fs(p)[1].split('/')[1:])}"
-                for p in paths
-            ]
-        else:  # local
-            # For local files, we need to preserve directory structure for partitioned data
-            # Instead of just using basename, we'll use the last few path components
-            new_paths = []
+    match get_cloud_from_url(paths[0]):
+        case "azure":
+            rewrite_paths = []
             for p in paths:
-                # Split the path and take the last 2-3 components to preserve structure
-                # but avoid very long paths
-                path_parts = p.split(os.sep)
-                if len(path_parts) >= 2:
-                    # Take last 2 components (e.g., "base_case/x=abc")
-                    relative_path = "/".join(path_parts[-2:])
-                else:
-                    # Single component, use basename
-                    relative_path = os.path.basename(p)
-                new_paths.append(f"{stage_name}/{relative_path}")
-            paths = new_paths
+                _, bucket_name, path = parse_azure_url(p)
+                rewrite_paths.append(f"{stage_name}/{path}")
+            paths = rewrite_paths
+        case "gcp":
+            raise AnalysisException(
+                "You must configure an integration for Google Cloud Storage to perform I/O operations rather than accessing the URL directly. Reference: https://docs.snowflake.com/en/user-guide/data-load-gcs-config"
+            )
+        case _:
+            filesystem, parsed_path = url_to_fs(paths[0])
+            if isinstance(filesystem, S3FileSystem):  # aws
+                # Remove bucket name from the path since the stage name will replace
+                # the bucket name in the path.
+                paths = [
+                    f"{stage_name}/{'/'.join(url_to_fs(p)[1].split('/')[1:])}"
+                    for p in paths
+                ]
+            else:  # local
+                # For local files, we need to preserve directory structure for partitioned data
+                # Instead of just using basename, we'll use the last few path components
+                new_paths = []
+                for p in paths:
+                    # Split the path and take the last 2-3 components to preserve structure
+                    # but avoid very long paths
+                    path_parts = p.split(os.sep)
+                    if len(path_parts) >= 2:
+                        # Take last 2 components (e.g., "base_case/x=abc")
+                        relative_path = "/".join(path_parts[-2:])
+                    else:
+                        # Single component, use basename
+                        relative_path = os.path.basename(p)
+                    new_paths.append(f"{stage_name}/{relative_path}")
+                paths = new_paths
     return paths
@@ -102,15 +108,21 @@ class StageLocator:
                 sql_query = f"CREATE OR REPLACE TEMP STAGE {stage_name[1:]} URL='azure://{account}.blob.core.windows.net/{bucket_name}'"
                 credential_session_key = (
-                    f"fs.azure.sas.{bucket_name}.{account}.blob.core.windows.net"
+                    f"fs.azure.sas.fixed.token.{account}.dfs.core.windows.net",
+                    f"fs.azure.sas.{bucket_name}.{account}.blob.core.windows.net",
                 )
                 credential = sessions_config.get(spark_session_id, None)
-                if (
-                    credential is not None
-                    and credential.get(credential_session_key) is not None
-                    and credential.get(credential_session_key).strip() != ""
-                ):
-                    sql_query += f" CREDENTIALS = (AZURE_SAS_TOKEN = '{credential.get(credential_session_key)}')"
+                sas_token = None
+                for session_key in credential_session_key:
+                    if (
+                        credential is not None
+                        and credential.get(session_key) is not None
+                        and credential.get(session_key).strip() != ""
+                    ):
+                        sas_token = credential.get(session_key)
+                        break
+                if sas_token is not None:
+                    sql_query += f" CREDENTIALS = (AZURE_SAS_TOKEN = '{sas_token}')"
                 logger.info(self.session.sql(sql_query).collect())
                 self.stages_for_azure[bucket_name] = stage_name
@@ -128,24 +140,44 @@ class StageLocator:
                     # but the rest of the time it's used, it does. We just drop it here.
                     sql_query = f"CREATE OR REPLACE TEMP STAGE {stage_name[1:]} URL='s3://{parsed_path.split('/')[0]}'"
                     credential = sessions_config.get(spark_session_id, None)
-                    if (
-                        credential is not None
-                        and credential.get("spark.hadoop.fs.s3a.access.key") is not None
-                        and credential.get("spark.hadoop.fs.s3a.secret.key") is not None
-                        and credential.get("spark.hadoop.fs.s3a.access.key").strip()
-                        != ""
-                        and credential.get("spark.hadoop.fs.s3a.secret.key").strip()
-                        != ""
-                    ):
-                        aws_keys = f" AWS_KEY_ID = '{credential.get('spark.hadoop.fs.s3a.access.key')}'"
-                        aws_keys += f" AWS_SECRET_KEY = '{credential.get('spark.hadoop.fs.s3a.secret.key')}'"
-                        if (
-                            credential.get("spark.hadoop.fs.s3a.session.token")
+                    if credential is not None:
+                        if (  # USE AWS KEYS to connect
+                            credential.get("spark.hadoop.fs.s3a.access.key") is not None
+                            and credential.get("spark.hadoop.fs.s3a.secret.key")
+                            is not None
+                            and credential.get("spark.hadoop.fs.s3a.access.key").strip()
+                            != ""
+                            and credential.get("spark.hadoop.fs.s3a.secret.key").strip()
+                            != ""
+                        ):
+                            aws_keys = f" AWS_KEY_ID = '{credential.get('spark.hadoop.fs.s3a.access.key')}'"
+                            aws_keys += f" AWS_SECRET_KEY = '{credential.get('spark.hadoop.fs.s3a.secret.key')}'"
+                            if (
+                                credential.get("spark.hadoop.fs.s3a.session.token")
+                                is not None
+                            ):
+                                aws_keys += f" AWS_TOKEN = '{credential.get('spark.hadoop.fs.s3a.session.token')}'"
+                            sql_query += f" CREDENTIALS = ({aws_keys})"
+                            sql_query += " ENCRYPTION = ( TYPE = 'AWS_SSE_S3' )"
+                        elif (  # USE AWS ROLE and KMS KEY to connect
+                            credential.get(
+                                "spark.hadoop.fs.s3a.server-side-encryption.key"
+                            )
+                            is not None
+                            and credential.get(
+                                "spark.hadoop.fs.s3a.server-side-encryption.key"
+                            ).strip()
+                            != ""
+                            and credential.get("spark.hadoop.fs.s3a.assumed.role.arn")
                             is not None
+                            and credential.get(
+                                "spark.hadoop.fs.s3a.assumed.role.arn"
+                            ).strip()
+                            != ""
                         ):
-                            aws_keys += f" AWS_TOKEN = '{credential.get('spark.hadoop.fs.s3a.session.token')}'"
-                        sql_query += f" CREDENTIALS = ({aws_keys})"
-                        sql_query += " ENCRYPTION = ( TYPE = 'AWS_SSE_S3' )"
+                            aws_role = f" AWS_ROLE = '{credential.get('spark.hadoop.fs.s3a.assumed.role.arn')}'"
+                            sql_query += f" CREDENTIALS = ({aws_role})"
+                            sql_query += f" ENCRYPTION = ( TYPE='AWS_SSE_KMS' KMS_KEY_ID = '{credential.get('spark.hadoop.fs.s3a.server-side-encryption.key')}' )"
                     logger.info(self.session.sql(sql_query).collect())
                     self.stages_for_aws[bucket_name] = stage_name

snowflake/snowpark_connect/relation/write/map_write.py CHANGED Viewed

@@ -36,8 +36,13 @@ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
 from snowflake.snowpark_connect.relation.io_utils import (
     convert_file_prefix_path,
     is_cloud_path,
+    is_supported_compression,
+    supported_compressions_for_format,
 )
 from snowflake.snowpark_connect.relation.map_relation import map_relation
+from snowflake.snowpark_connect.relation.read.metadata_utils import (
+    filter_metadata_columns,
+)
 from snowflake.snowpark_connect.relation.read.reader_config import CsvWriterConfig
 from snowflake.snowpark_connect.relation.stage_locator import get_paths_from_stage
 from snowflake.snowpark_connect.relation.utils import (
@@ -127,6 +132,19 @@ def map_write(request: proto_base.ExecutePlanRequest):
     result = map_relation(write_op.input)
     input_df: snowpark.DataFrame = handle_column_names(result, write_op.source)
+    # Create updated container with transformed dataframe, then filter METADATA$FILENAME columns
+    # Update the container to use the transformed dataframe from handle_column_names
+    updated_result = DataFrameContainer(
+        dataframe=input_df,
+        column_map=result.column_map,
+        table_name=result.table_name,
+        alias=result.alias,
+        partition_hint=result.partition_hint,
+    )
+    updated_result = filter_metadata_columns(updated_result)
+    input_df = updated_result.dataframe
     session: snowpark.Session = get_or_create_snowpark_session()
     # Snowflake saveAsTable doesn't support format
@@ -179,7 +197,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
                             f"Skipping REMOVE for root path {write_path} - too broad scope"
                         )
                     else:
-                        remove_command = f"REMOVE {write_path}/"
+                        remove_command = f"REMOVE '{write_path}/'"
                         session.sql(remove_command).collect()
                         logger.info(f"Successfully cleared directory: {write_path}")
                 except Exception as e:
@@ -208,6 +226,20 @@ def map_write(request: proto_base.ExecutePlanRequest):
             compression = write_op.options.get(
                 "compression", default_compression
             ).upper()
+            if not is_supported_compression(write_op.source, compression):
+                supported_compressions = supported_compressions_for_format(
+                    write_op.source
+                )
+                raise AnalysisException(
+                    f"Compression {compression} is not supported for {write_op.source} format. "
+                    + (
+                        f"Supported compressions: {sorted(supported_compressions)}"
+                        if supported_compressions
+                        else "No compression supported for this format."
+                    )
+                )
             parameters = {
                 "location": temp_file_prefix_on_stage,
                 "file_format_type": write_op.source
@@ -417,9 +449,27 @@ def map_write(request: proto_base.ExecutePlanRequest):
                     )
         case _:
             snowpark_table_name = _spark_to_snowflake(write_op.table.table_name)
+            save_method = write_op.table.save_method
             if (
-                write_op.table.save_method
+                write_op.source == "snowflake"
+                and write_op.table.save_method
+                == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_UNSPECIFIED
+            ):
+                save_method = (
+                    commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
+                )
+                if len(write_op.table.table_name) == 0:
+                    dbtable_name = write_op.options.get("dbtable", "")
+                    if len(dbtable_name) == 0:
+                        raise SnowparkConnectNotImplementedError(
+                            "Save command is not supported without a table name"
+                        )
+                    else:
+                        snowpark_table_name = _spark_to_snowflake(dbtable_name)
+            if (
+                save_method
                 == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
             ):
                 match write_mode:
@@ -481,7 +531,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
                             column_order=_column_order_for_write,
                         )
             elif (
-                write_op.table.save_method
+                save_method
                 == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
             ):
                 _validate_schema_and_get_writer(
@@ -493,7 +543,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
                 )
             else:
                 raise SnowparkConnectNotImplementedError(
-                    f"Save command not supported: {write_op.table.save_method}"
+                    f"Save command not supported: {save_method}"
                 )
@@ -503,6 +553,19 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
     snowpark_table_name = _spark_to_snowflake(write_op.table_name)
     result = map_relation(write_op.input)
     input_df: snowpark.DataFrame = handle_column_names(result, "table")
+    # Create updated container with transformed dataframe, then filter METADATA$FILENAME columns
+    # Update the container to use the transformed dataframe from handle_column_names
+    updated_result = DataFrameContainer(
+        dataframe=input_df,
+        column_map=result.column_map,
+        table_name=result.table_name,
+        alias=result.alias,
+        partition_hint=result.partition_hint,
+    )
+    updated_result = filter_metadata_columns(updated_result)
+    input_df = updated_result.dataframe
     session: snowpark.Session = get_or_create_snowpark_session()
     if write_op.table_name is None or write_op.table_name == "":

snowflake/snowpark_connect/server.py CHANGED Viewed

@@ -232,12 +232,20 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
             match request.WhichOneof("analyze"):
                 case "schema":
                     result = map_relation(request.schema.plan.root)
-                    snowpark_df = result.dataframe
-                    snowpark_schema: snowpark.types.StructType = snowpark_df.schema
+                    from snowflake.snowpark_connect.relation.read.metadata_utils import (
+                        filter_metadata_columns,
+                    )
+                    filtered_result = filter_metadata_columns(result)
+                    filtered_df = filtered_result.dataframe
                     schema = proto_base.AnalyzePlanResponse.Schema(
                         schema=types_proto.DataType(
                             **snowpark_to_proto_type(
-                                snowpark_schema, result.column_map, snowpark_df
+                                filtered_df.schema,
+                                filtered_result.column_map,
+                                filtered_df,
                             )
                         )
                     )
@@ -1161,23 +1169,28 @@ def get_session(url: Optional[str] = None, conf: SparkConf = None) -> SparkSessi
 def init_spark_session(conf: SparkConf = None) -> SparkSession:
-    try:
-        # For Notebooks on SPCS
-        from jdk4py import JAVA_HOME
-        os.environ["JAVA_HOME"] = str(JAVA_HOME)
-    except ModuleNotFoundError:
-        # For notebooks on Warehouse
-        os.environ["JAVA_HOME"] = os.environ["CONDA_PREFIX"]
-        os.environ["JAVA_LD_LIBRARY_PATH"] = os.path.join(
-            os.environ["CONDA_PREFIX"], "lib", "server"
-        )
-    logger.info("JAVA_HOME=%s", os.environ["JAVA_HOME"])
+    if os.environ.get("JAVA_HOME") is None:
+        try:
+            # For Notebooks on SPCS
+            from jdk4py import JAVA_HOME
+            os.environ["JAVA_HOME"] = str(JAVA_HOME)
+        except ModuleNotFoundError:
+            # For notebooks on Warehouse
+            conda_prefix = os.environ.get("CONDA_PREFIX")
+            if conda_prefix is not None:
+                os.environ["JAVA_HOME"] = conda_prefix
+                os.environ["JAVA_LD_LIBRARY_PATH"] = os.path.join(
+                    conda_prefix, "lib", "server"
+                )
+    logger.info("JAVA_HOME=%s", os.environ.get("JAVA_HOME", "Not defined"))
     os.environ["SPARK_LOCAL_HOSTNAME"] = "127.0.0.1"
     os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
-    snowpark_session = snowpark.context.get_active_session()
+    from snowflake.snowpark_connect.utils.session import _get_current_snowpark_session
+    snowpark_session = _get_current_snowpark_session()
     start_session(snowpark_session=snowpark_session)
     return get_session(conf=conf)

snowflake/snowpark_connect/type_mapping.py CHANGED Viewed

@@ -30,6 +30,10 @@ from snowflake.snowpark_connect.date_time_format_mapping import (
     convert_spark_format_to_snowflake,
 )
 from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
+from snowflake.snowpark_connect.expression.map_sql_expression import (
+    _INTERVAL_DAYTIME_PATTERN_RE,
+    _INTERVAL_YEARMONTH_PATTERN_RE,
+)
 from snowflake.snowpark_connect.utils.context import get_is_evaluating_sql
 from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
 from snowflake.snowpark_connect.utils.telemetry import (
@@ -274,6 +278,18 @@ def snowpark_to_proto_type(
         case snowpark.types.VariantType:
             # For now we are returning a string type for variant types.
             return {"string": types_proto.DataType.String()}
+        case snowpark.types.YearMonthIntervalType:
+            return {
+                "year_month_interval": types_proto.DataType.YearMonthInterval(
+                    start_field=data_type.start_field, end_field=data_type.end_field
+                )
+            }
+        case snowpark.types.DayTimeIntervalType:
+            return {
+                "day_time_interval": types_proto.DataType.DayTimeInterval(
+                    start_field=data_type.start_field, end_field=data_type.end_field
+                )
+            }
         case _:
             raise SnowparkConnectNotImplementedError(
                 f"Unsupported snowpark data type: {data_type}"
@@ -328,6 +344,24 @@ def cast_to_match_snowpark_type(
             return str(content)
         case snowpark.types.TimestampType:
             return str(content)
+        case snowpark.types.YearMonthIntervalType:
+            if isinstance(content, (int, float)):
+                total_months = int(content)
+                years = total_months // 12
+                months = total_months % 12
+                return f"INTERVAL '{years}-{months}' YEAR TO MONTH"
+            elif isinstance(content, str) and content.startswith(("+", "-")):
+                # Handle Snowflake's native interval format (e.g., "+11-08" or "-2-3")
+                # Convert to Spark's format: "INTERVAL 'Y-M' YEAR TO MONTH"
+                sign = content[0]
+                interval_part = content[1:]  # Remove sign
+                if sign == "-":
+                    return f"INTERVAL '-{interval_part}' YEAR TO MONTH"
+                else:
+                    return f"INTERVAL '{interval_part}' YEAR TO MONTH"
+            return str(content)
+        case snowpark.types.DayTimeIntervalType:
+            return str(content)
         case _:
             raise SnowparkConnectNotImplementedError(
                 f"Unsupported snowpark data type in casting: {data_type}"
@@ -411,6 +445,18 @@ def proto_to_snowpark_type(
             # For UDT types, return the underlying SQL type
             logger.debug("Returning underlying sql type for udt")
             return proto_to_snowpark_type(data_type.udt.sql_type)
+        case "year_month_interval":
+            # Preserve start_field and end_field from protobuf
+            return snowpark.types.YearMonthIntervalType(
+                start_field=data_type.year_month_interval.start_field,
+                end_field=data_type.year_month_interval.end_field,
+            )
+        case "day_time_interval":
+            # Preserve start_field and end_field from protobuf
+            return snowpark.types.DayTimeIntervalType(
+                start_field=data_type.day_time_interval.start_field,
+                end_field=data_type.day_time_interval.end_field,
+            )
         case _:
             return map_simple_types(data_type.WhichOneof("kind"))
@@ -523,6 +569,12 @@ def map_snowpark_types_to_pyarrow_types(
             return pa.timestamp(unit, tz=tz)
         case snowpark.types.VariantType:
             return pa.string()
+        case snowpark.types.YearMonthIntervalType:
+            # Return string type so formatted intervals are preserved in display
+            return pa.string()
+        case snowpark.types.DayTimeIntervalType:
+            # Return string type so formatted intervals are preserved in display
+            return pa.string()
         case _:
             raise SnowparkConnectNotImplementedError(
                 f"Unsupported snowpark data type: {snowpark_type}"
@@ -676,6 +728,14 @@ def map_pyspark_types_to_snowpark_types(
         return snowpark.types.TimestampType()
     if isinstance(type_to_map, pyspark.sql.types.TimestampNTZType):
         return snowpark.types.TimestampType(timezone=TimestampTimeZone.NTZ)
+    if isinstance(type_to_map, pyspark.sql.types.YearMonthIntervalType):
+        return snowpark.types.YearMonthIntervalType(
+            type_to_map.startField, type_to_map.endField
+        )
+    if isinstance(type_to_map, pyspark.sql.types.DayTimeIntervalType):
+        return snowpark.types.DayTimeIntervalType(
+            type_to_map.startField, type_to_map.endField
+        )
     raise SnowparkConnectNotImplementedError(
         f"Unsupported spark data type: {type_to_map}"
     )
@@ -743,6 +803,14 @@ def map_snowpark_to_pyspark_types(
         if type_to_map.tz == snowpark.types.TimestampTimeZone.NTZ:
             return pyspark.sql.types.TimestampNTZType()
         return pyspark.sql.types.TimestampType()
+    if isinstance(type_to_map, snowpark.types.YearMonthIntervalType):
+        return pyspark.sql.types.YearMonthIntervalType(
+            type_to_map.start_field, type_to_map.end_field
+        )
+    if isinstance(type_to_map, snowpark.types.DayTimeIntervalType):
+        return pyspark.sql.types.DayTimeIntervalType(
+            type_to_map.start_field, type_to_map.end_field
+        )
     raise SnowparkConnectNotImplementedError(f"Unsupported data type: {type_to_map}")
@@ -785,10 +853,14 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
             return snowpark.types.TimestampType(snowpark.types.TimestampTimeZone.NTZ)
         case "timestamp_ltz":
             return snowpark.types.TimestampType(snowpark.types.TimestampTimeZone.LTZ)
+        case "year_month_interval":
+            return snowpark.types.YearMonthIntervalType()
         case "day_time_interval":
-            # this is not a column type in snowflake so there won't be a dataframe column
-            # with this, for now this type won't make any sense
-            return snowpark.types.StringType()
+            return snowpark.types.DayTimeIntervalType()
+        case type_name if _INTERVAL_YEARMONTH_PATTERN_RE.match(type_name):
+            return snowpark.types.YearMonthIntervalType()
+        case type_name if _INTERVAL_DAYTIME_PATTERN_RE.match(type_name):
+            return snowpark.types.DayTimeIntervalType()
         case _:
             if simple_type.startswith("decimal"):
                 precision = int(simple_type.split("(")[1].split(",")[0])

snowflake/snowpark_connect/utils/context.py CHANGED Viewed

@@ -30,9 +30,6 @@ _sql_aggregate_function_count = ContextVar[int](
     "_contains_aggregate_function", default=0
 )
-# Context for parsing map_partitions
-_map_partitions_stack = ContextVar[int]("_map_partitions_stack", default=0)
 # We have to generate our own plan IDs that are different from Spark's.
 # Spark plan IDs start at 0, so pick a "big enough" number to avoid overlaps.
 _STARTING_SQL_PLAN_ID = 0x80000000
@@ -230,16 +227,6 @@ def push_evaluating_join_condition(join_type, left_keys, right_keys):
         _is_evaluating_join_condition.set(prev)
-@contextmanager
-def push_map_partitions():
-    _map_partitions_stack.set(_map_partitions_stack.get() + 1)
-    yield
-def map_partitions_depth() -> int:
-    return _map_partitions_stack.get()
 @contextmanager
 def push_sql_scope():
     """
@@ -410,7 +397,6 @@ def clear_context_data() -> None:
     _view_process_context.set([])
     _next_sql_plan_id.set(_STARTING_SQL_PLAN_ID)
     _sql_plan_name_map.set({})
-    _map_partitions_stack.set(0)
     _sql_aggregate_function_count.set(0)
     _sql_named_args.set({})
     _sql_pos_args.set({})

snowflake/snowpark_connect/utils/describe_query_cache.py CHANGED Viewed

@@ -16,7 +16,6 @@ from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
 from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
 from snowflake.snowpark_connect.utils.telemetry import telemetry
-DESCRIBE_CACHE_TTL_SECONDS = 15
 USE_DESCRIBE_QUERY_CACHE = True
 DDL_DETECTION_PATTERN = re.compile(r"\s*(CREATE|ALTER|DROP)\b", re.IGNORECASE)
@@ -51,6 +50,8 @@ class DescribeQueryCache:
         return sql_query
     def get(self, sql_query: str) -> list[ResultMetadataV2] | None:
+        from snowflake.snowpark_connect.config import get_describe_cache_ttl_seconds
         telemetry.report_describe_query_cache_lookup()
         cache_key = self._get_cache_key(sql_query)
@@ -59,7 +60,9 @@ class DescribeQueryCache:
         if key in self._cache:
             result, timestamp = self._cache[key]
-            if current_time < timestamp + DESCRIBE_CACHE_TTL_SECONDS:
+            expired_by = current_time - (timestamp + get_describe_cache_ttl_seconds())
+            if expired_by < 0:
                 logger.debug(
                     f"Returning query result from cache for query: {sql_query[:20]}"
                 )
@@ -92,7 +95,7 @@ class DescribeQueryCache:
                 telemetry.report_describe_query_cache_hit()
                 return result
             else:
-                telemetry.report_describe_query_cache_expired()
+                telemetry.report_describe_query_cache_expired(expired_by)
                 del self._cache[key]
         return None

snowflake/snowpark_connect/utils/io_utils.py CHANGED Viewed

@@ -3,10 +3,46 @@
 #
 import contextlib
 import functools
+import re
 from snowflake.snowpark import Session
+from snowflake.snowpark._internal.analyzer.analyzer_utils import (
+    create_file_format_statement,
+)
 from snowflake.snowpark_connect.utils.identifiers import FQN
+_MINUS_AT_THE_BEGINNING_REGEX = re.compile(r"^-")
+def cached_file_format(
+    session: Session, file_format: str, format_type_options: dict[str, str]
+) -> str:
+    """
+    Cache and return a file format name based on the given options.
+    """
+    function_name = _MINUS_AT_THE_BEGINNING_REGEX.sub(
+        "1", str(hash(frozenset(format_type_options.items())))
+    )
+    file_format_name = f"__SNOWPARK_CONNECT_FILE_FORMAT__{file_format}_{function_name}"
+    if file_format_name in session._file_formats:
+        return file_format_name
+    session.sql(
+        create_file_format_statement(
+            file_format_name,
+            file_format,
+            format_type_options,
+            temp=True,
+            if_not_exist=True,
+            use_scoped_temp_objects=False,
+            is_generated=True,
+        )
+    ).collect()
+    session._file_formats.add(file_format_name)
+    return file_format_name
 @functools.cache
 def file_format(

snowflake/snowpark_connect/utils/session.py CHANGED Viewed

@@ -71,6 +71,9 @@ def configure_snowpark_session(session: snowpark.Session):
     init_builtin_udf_cache(session)
     init_external_udxf_cache(session)
+    # file format cache
+    session._file_formats = set()
     # Set experimental parameters (warnings globally suppressed)
     session.ast_enabled = False
     session.eliminate_numeric_sql_value_cast_enabled = False
@@ -117,6 +120,7 @@ def configure_snowpark_session(session: snowpark.Session):
         "PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false",  # this is required for creating udfs from sproc
         "ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
         "QUERY_TAG": f"'{query_tag}'",
+        "FEATURE_INTERVAL_TYPES": "enabled",
     }
     session.sql(

snowpark-connect 0.28.1__py3-none-any.whl → 0.30.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.28.1py3-none-any.whl → 0.30.0py3-none-any.whl