PyPI - snowpark-connect - Versions diffs - 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

snowpark-connect 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

snowflake/snowpark_connect/client/server.py CHANGED Viewed

@@ -58,6 +58,7 @@ from snowflake.snowpark_connect.server_common import (  # noqa: F401 - re-export
 from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
 from snowflake.snowpark_connect.utils.env_utils import get_int_from_env
 from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
+from snowflake.snowpark_connect.utils.telemetry import telemetry
 from spark.connect import envelope_pb2
@@ -194,6 +195,7 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
         """Execute a Spark plan by forwarding to GS backend."""
         logger.debug("Received Execute Plan request")
         query_id = None
+        telemetry.initialize_request_summary(request)
         try:
             spark_resource = self._get_spark_resource()
@@ -216,12 +218,16 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
                 )
         except GrpcErrorStatusException as e:
+            telemetry.report_request_failure(e)
             context.abort_with_status(rpc_status.to_status(e.status))
         except Exception as e:
+            telemetry.report_request_failure(e)
             logger.error(f"Error in ExecutePlan, query id {query_id}", exc_info=True)
             return _log_and_return_error(
                 "Error in ExecutePlan call", e, grpc.StatusCode.INTERNAL, context
             )
+        finally:
+            telemetry.send_request_summary_telemetry()
     def _call_backend_config(
         self, request: base_pb2.ConfigRequest
@@ -299,6 +305,7 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
         self, request: base_pb2.ConfigRequest, context: grpc.ServicerContext
     ) -> base_pb2.ConfigResponse:
         logger.debug("Received Config request")
+        telemetry.initialize_request_summary(request)
         try:
             op = request.operation
@@ -370,18 +377,23 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
                     return self._call_backend_config(request)
         except GrpcErrorStatusException as e:
+            telemetry.report_request_failure(e)
             context.abort_with_status(rpc_status.to_status(e.status))
         except Exception as e:
+            telemetry.report_request_failure(e)
             logger.error("Error in Config", exc_info=True)
             return _log_and_return_error(
                 "Error in Config call", e, grpc.StatusCode.INTERNAL, context
             )
+        finally:
+            telemetry.send_request_summary_telemetry()
     def AnalyzePlan(
         self, request: base_pb2.AnalyzePlanRequest, context: grpc.ServicerContext
     ) -> base_pb2.AnalyzePlanResponse:
         logger.debug("Received Analyze Plan request")
         query_id = None
+        telemetry.initialize_request_summary(request)
         try:
             spark_resource = self._get_spark_resource()
@@ -403,12 +415,16 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
             return resp_envelope.analyze_plan_response
         except GrpcErrorStatusException as e:
+            telemetry.report_request_failure(e)
             context.abort_with_status(rpc_status.to_status(e.status))
         except Exception as e:
+            telemetry.report_request_failure(e)
             logger.error(f"Error in AnalyzePlan, query id {query_id}", exc_info=True)
             return _log_and_return_error(
                 "Error in AnalyzePlan call", e, grpc.StatusCode.INTERNAL, context
             )
+        finally:
+            telemetry.send_request_summary_telemetry()
     def AddArtifacts(
         self,
@@ -422,6 +438,7 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
         for request in request_iterator:
             query_id = None
+            telemetry.initialize_request_summary(request)
             try:
                 response_bytes = spark_resource.add_artifacts(
                     request.SerializeToString()
@@ -444,14 +461,18 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
                 add_artifacts_response = resp_envelope.add_artifacts_response
             except GrpcErrorStatusException as e:
+                telemetry.report_request_failure(e)
                 context.abort_with_status(rpc_status.to_status(e.status))
             except Exception as e:
+                telemetry.report_request_failure(e)
                 logger.error(
                     f"Error in AddArtifacts, query id {query_id}", exc_info=True
                 )
                 return _log_and_return_error(
                     "Error in AddArtifacts call", e, grpc.StatusCode.INTERNAL, context
                 )
+            finally:
+                telemetry.send_request_summary_telemetry()
         if add_artifacts_response is None:
             raise ValueError("AddArtifacts received empty request_iterator")
@@ -464,6 +485,7 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
         """Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]]"""
         logger.debug("Received ArtifactStatus request")
         query_id = None
+        telemetry.initialize_request_summary(request)
         try:
             spark_resource = self._get_spark_resource()
@@ -485,12 +507,16 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
             return resp_envelope.artifact_status_response
         except GrpcErrorStatusException as e:
+            telemetry.report_request_failure(e)
             context.abort_with_status(rpc_status.to_status(e.status))
         except Exception as e:
+            telemetry.report_request_failure(e)
             logger.error(f"Error in ArtifactStatus, query id {query_id}", exc_info=True)
             return _log_and_return_error(
                 "Error in ArtifactStatus call", e, grpc.StatusCode.INTERNAL, context
             )
+        finally:
+            telemetry.send_request_summary_telemetry()
     def Interrupt(
         self, request: base_pb2.InterruptRequest, context: grpc.ServicerContext
@@ -505,16 +531,20 @@ class SnowflakeConnectClientServicer(base_pb2_grpc.SparkConnectServiceServicer):
     ) -> base_pb2.ReleaseExecuteResponse:
         """Release an execution."""
         logger.debug("Received Release Execute request")
+        telemetry.initialize_request_summary(request)
         try:
             return base_pb2.ReleaseExecuteResponse(
                 session_id=request.session_id,
                 operation_id=request.operation_id or str(uuid.uuid4()),
             )
         except Exception as e:
+            telemetry.report_request_failure(e)
             logger.error("Error in ReleaseExecute", exc_info=True)
             return _log_and_return_error(
                 "Error in ReleaseExecute call", e, grpc.StatusCode.INTERNAL, context
             )
+        finally:
+            telemetry.send_request_summary_telemetry()
     def ReattachExecute(
         self, request: base_pb2.ReattachExecuteRequest, context: grpc.ServicerContext
@@ -542,6 +572,9 @@ def _serve(
         if session is None:
             session = get_or_create_snowpark_session()
+        # Initialize telemetry with session and thin client source identifier
+        telemetry.initialize(session, source="SparkConnectLightWeightClient")
         server_options = _get_default_grpc_options()
         max_workers = get_int_from_env("SPARK_CONNECT_CLIENT_GRPC_MAX_WORKERS", 10)
@@ -560,6 +593,7 @@ def _serve(
         server.start()
         server_running.set()
         logger.info("Snowpark Connect server started!")
+        telemetry.send_server_started_telemetry()
         if stop_event is not None:
             # start a background thread to listen for stop event and terminate the server
@@ -579,6 +613,9 @@ def _serve(
             logger.error("Error starting up Snowpark Connect server", exc_info=True)
         attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
         raise e
+    finally:
+        # Flush the telemetry queue if possible
+        telemetry.shutdown()
 def start_session(

snowflake/snowpark_connect/config.py CHANGED Viewed

@@ -23,6 +23,7 @@ from snowflake.snowpark.exceptions import SnowparkSQLException
 from snowflake.snowpark.types import TimestampTimeZone, TimestampType
 from snowflake.snowpark_connect.error.error_codes import ErrorCodes
 from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
+from snowflake.snowpark_connect.type_support import set_integral_types_conversion
 from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
 from snowflake.snowpark_connect.utils.context import (
     get_jpype_jclass_lock,
@@ -159,7 +160,16 @@ class GlobalConfig:
         # USE_VECTORIZED_SCANNER will become the default in a future BCR; Snowflake recommends setting it to TRUE for new workloads.
         # This significantly reduces latency for loading Parquet files by downloading only relevant columnar sections into memory.
         "snowpark.connect.parquet.useVectorizedScanner": "true",
+        # USE_LOGICAL_TYPE enables proper handling of Parquet logical types (TIMESTAMP, DATE, DECIMAL).
+        # Without useLogicalType set to "true", Parquet TIMESTAMP (INT64 physical) is incorrectly read as NUMBER(38,0).
+        "snowpark.connect.parquet.useLogicalType": "false",
         "spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue": "false",
+        "spark.sql.parquet.outputTimestampType": "TIMESTAMP_MILLIS",
+        "snowpark.connect.handleIntegralOverflow": "false",
+        "snowpark.connect.scala.version": "2.12",
+        # Control whether to convert decimal - to integral types and vice versa: DecimalType(p,0) <-> ByteType/ShortType/IntegerType/LongType
+        # Values: "client_default" (behavior based on client type), "enabled", "disabled"
+        "snowpark.connect.integralTypesEmulation": "client_default",
     }
     boolean_config_list = [
@@ -170,12 +180,14 @@ class GlobalConfig:
         "spark.sql.caseSensitive",
         "snowpark.connect.localRelation.optimizeSmallData",
         "snowpark.connect.parquet.useVectorizedScanner",
+        "snowpark.connect.parquet.useLogicalType",
         "spark.sql.ansi.enabled",
         "spark.sql.legacy.allowHashOnMapType",
         "spark.Catalog.databaseFilterInformationSchema",
         "spark.sql.parser.quotedRegexColumnNames",
         "snowflake.repartition.for.writes",
         "spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue",
+        "snowpark.connect.handleIntegralOverflow",
     ]
     int_config_list = [
@@ -192,8 +204,15 @@ class GlobalConfig:
         "spark.app.name": lambda session, name: setattr(
             session, "query_tag", f"Spark-Connect-App-Name={name}"
         ),
+        # TODO SNOW-2896871: Remove with version 1.10.0
         "snowpark.connect.udf.imports": lambda session, imports: parse_imports(
-            session, imports
+            session, imports, "python"
+        ),
+        "snowpark.connect.udf.python.imports": lambda session, imports: parse_imports(
+            session, imports, "python"
+        ),
+        "snowpark.connect.udf.java.imports": lambda session, imports: parse_imports(
+            session, imports, "java"
         ),
     }
@@ -359,6 +378,11 @@ CONFIG_ALLOWED_VALUES: dict[str, tuple] = {
         "all",
         "none",
     ),
+    "snowpark.connect.integralTypesEmulation": (
+        "client_default",
+        "enabled",
+        "disabled",
+    ),
 }
 # Set some default configuration that are necessary for the driver.
@@ -641,6 +665,27 @@ def set_snowflake_parameters(
             # TODO: SNOW-2367714 Remove this once the fix is automatically enabled in Snowpark
             snowpark.context._enable_fix_2360274 = str_to_bool(value)
             logger.info(f"Updated snowpark session structured types fix: {value}")
+        case "spark.sql.parquet.outputTimestampType":
+            if value == "TIMESTAMP_MICROS":
+                snowpark_session.sql(
+                    "ALTER SESSION SET UNLOAD_PARQUET_TIME_TIMESTAMP_MILLIS = false"
+                ).collect()
+            else:
+                # Default: TIMESTAMP_MILLIS (or any other value)
+                snowpark_session.sql(
+                    "ALTER SESSION SET UNLOAD_PARQUET_TIME_TIMESTAMP_MILLIS = true"
+                ).collect()
+            logger.info(f"Updated parquet timestamp output type to: {value}")
+        case "snowpark.connect.scala.version":
+            # force java udf helper recreation
+            set_java_udf_creator_initialized_state(False)
+        case "snowpark.connect.integralTypesEmulation":
+            # "client_default" - don't change, let set_spark_version handle it
+            # "enabled" / "disabled" - explicitly set
+            if value.lower() == "enabled":
+                set_integral_types_conversion(True)
+            elif value.lower() == "disabled":
+                set_integral_types_conversion(False)
         case _:
             pass
@@ -726,15 +771,22 @@ def external_table_location() -> Optional[str]:
     )
-def parse_imports(session: snowpark.Session, imports: str | None) -> None:
+def parse_imports(
+    session: snowpark.Session, imports: str | None, language: str
+) -> None:
     if not imports:
         return
     # UDF needs to be recreated to include new imports
     clear_external_udxf_cache(session)
+    if language == "java":
+        set_java_udf_creator_initialized_state(False)
     for udf_import in imports.strip("[] ").split(","):
-        session.add_import(udf_import)
+        udf_import = udf_import.strip()
+        if udf_import:
+            session.add_import(udf_import)
 def get_timestamp_type():
@@ -827,3 +879,20 @@ def check_table_supports_operation(table_identifier: str, operation: str) -> boo
         return table_metadata.get("supports_column_rename", True)
     return True
+def get_scala_version() -> str:
+    return global_config.get("snowpark.connect.scala.version")
+_java_udf_creator_initialized = False
+def is_java_udf_creator_initialized() -> bool:
+    global _java_udf_creator_initialized
+    return _java_udf_creator_initialized
+def set_java_udf_creator_initialized_state(value: bool) -> None:
+    global _java_udf_creator_initialized
+    _java_udf_creator_initialized = value

snowflake/snowpark_connect/expression/error_utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+#
+# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
+#
+import snowflake.snowpark.functions as snowpark_fn
+from snowflake.snowpark.column import Column
+from snowflake.snowpark.types import DataType, StringType
+def raise_error_helper(return_type: DataType, error_class=None):
+    error_class_str = (
+        f":{error_class.__name__}"
+        if error_class and hasattr(error_class, "__name__")
+        else ""
+    )
+    def _raise_fn(*msgs: Column) -> Column:
+        return snowpark_fn.cast(
+            snowpark_fn.abs(
+                snowpark_fn.concat(
+                    snowpark_fn.lit(f"[snowpark-connect-exception{error_class_str}]"),
+                    *(msg.try_cast(StringType()) for msg in msgs),
+                )
+            ).cast(StringType()),
+            return_type,
+        )
+    return _raise_fn

snowflake/snowpark_connect/expression/integral_types_support.py ADDED Viewed

@@ -0,0 +1,219 @@
+#
+# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
+#
+from pyspark.errors.exceptions.base import ArithmeticException
+import snowflake.snowpark.functions as snowpark_fn
+from snowflake.snowpark.column import Column
+from snowflake.snowpark.types import (
+    ByteType,
+    DataType,
+    IntegerType,
+    LongType,
+    ShortType,
+    StringType,
+)
+from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.expression.error_utils import raise_error_helper
+def get_integral_type_bounds(typ: DataType) -> tuple[int, int]:
+    if isinstance(typ, ByteType):
+        return (-128, 127)
+    elif isinstance(typ, ShortType):
+        return (-32768, 32767)
+    elif isinstance(typ, IntegerType):
+        return (-2147483648, 2147483647)
+    elif isinstance(typ, LongType):
+        return (-9223372036854775808, 9223372036854775807)
+    else:
+        raise ValueError(f"Unsupported integral type: {typ}")
+def apply_integral_overflow(col: Column, to_type: DataType) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return col.cast(to_type)
+    min_val, max_val = get_integral_type_bounds(to_type)
+    range_size = max_val - min_val + 1
+    offset_value = col - snowpark_fn.lit(min_val)
+    wrapped_offset = snowpark_fn.function("MOD")(
+        offset_value, snowpark_fn.lit(range_size)
+    )
+    wrapped_offset = snowpark_fn.when(
+        wrapped_offset < 0, wrapped_offset + snowpark_fn.lit(range_size)
+    ).otherwise(wrapped_offset)
+    wrapped_result = wrapped_offset + snowpark_fn.lit(min_val)
+    return snowpark_fn.when(
+        (col >= snowpark_fn.lit(min_val)) & (col <= snowpark_fn.lit(max_val)),
+        col.cast(to_type),
+    ).otherwise(wrapped_result.cast(to_type))
+def apply_fractional_to_integral_cast(col: Column, to_type: DataType) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return col.cast(to_type)
+    min_val, max_val = get_integral_type_bounds(to_type)
+    clamped = (
+        snowpark_fn.when(col > snowpark_fn.lit(max_val), snowpark_fn.lit(max_val))
+        .when(col < snowpark_fn.lit(min_val), snowpark_fn.lit(min_val))
+        .otherwise(col)
+    )
+    return clamped.cast(to_type)
+def apply_integral_overflow_with_ansi_check(
+    col: Column, to_type: DataType, ansi_enabled: bool
+) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return col.cast(to_type)
+    if not ansi_enabled:
+        return apply_integral_overflow(col, to_type)
+    min_val, max_val = get_integral_type_bounds(to_type)
+    type_name = to_type.typeName().upper()
+    raise_error = raise_error_helper(to_type, ArithmeticException)
+    return snowpark_fn.when(
+        (col < snowpark_fn.lit(min_val)) | (col > snowpark_fn.lit(max_val)),
+        raise_error(
+            snowpark_fn.lit("[CAST_OVERFLOW] The value "),
+            col.cast(StringType()),
+            snowpark_fn.lit(
+                f" of the type BIGINT cannot be cast to {type_name} due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead."
+            ),
+        ),
+    ).otherwise(col.cast(to_type))
+def apply_fractional_to_integral_cast_with_ansi_check(
+    col: Column, to_type: DataType, ansi_enabled: bool
+) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return col.cast(to_type)
+    if not ansi_enabled:
+        return apply_fractional_to_integral_cast(col, to_type)
+    min_val, max_val = get_integral_type_bounds(to_type)
+    type_name = to_type.typeName().upper()
+    raise_error = raise_error_helper(to_type, ArithmeticException)
+    return snowpark_fn.when(
+        (col < snowpark_fn.lit(min_val)) | (col > snowpark_fn.lit(max_val)),
+        raise_error(
+            snowpark_fn.lit("[CAST_OVERFLOW] The value "),
+            col.cast(StringType()),
+            snowpark_fn.lit(
+                f" of the type DOUBLE cannot be cast to {type_name} "
+                f"due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead."
+            ),
+        ),
+    ).otherwise(col.cast(to_type))
+def apply_arithmetic_overflow_with_ansi_check(
+    result_col: Column, result_type: DataType, ansi_enabled: bool, operation_name: str
+) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return result_col.cast(result_type)
+    if not ansi_enabled:
+        return apply_integral_overflow(result_col, result_type)
+    min_val, max_val = get_integral_type_bounds(result_type)
+    raise_error = raise_error_helper(result_type, ArithmeticException)
+    return snowpark_fn.when(
+        (result_col < snowpark_fn.lit(min_val))
+        | (result_col > snowpark_fn.lit(max_val)),
+        raise_error(
+            snowpark_fn.lit(
+                f"[ARITHMETIC_OVERFLOW] {operation_name} overflow. "
+                f"Use 'try_{operation_name.lower()}' to tolerate overflow and return NULL instead. "
+                f'If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
+            ),
+        ),
+    ).otherwise(result_col.cast(result_type))
+def apply_unary_overflow(value_col: Column, result_type: DataType) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return (value_col * snowpark_fn.lit(-1)).cast(result_type)
+    min_val, _ = get_integral_type_bounds(result_type)
+    return snowpark_fn.when(
+        value_col == snowpark_fn.lit(min_val),
+        snowpark_fn.lit(min_val).cast(result_type),
+    ).otherwise((value_col * snowpark_fn.lit(-1)).cast(result_type))
+def apply_unary_overflow_with_ansi_check(
+    value_col: Column, result_type: DataType, ansi_enabled: bool, operation_name: str
+) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return (value_col * snowpark_fn.lit(-1)).cast(result_type)
+    if not ansi_enabled:
+        return apply_unary_overflow(value_col, result_type)
+    min_val, _ = get_integral_type_bounds(result_type)
+    raise_error = raise_error_helper(result_type, ArithmeticException)
+    return snowpark_fn.when(
+        value_col == snowpark_fn.lit(min_val),
+        raise_error(
+            snowpark_fn.lit(
+                f"[ARITHMETIC_OVERFLOW] {operation_name} overflow. "
+                f'If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
+            ),
+        ),
+    ).otherwise((value_col * snowpark_fn.lit(-1)).cast(result_type))
+def apply_abs_overflow(value_col: Column, result_type: DataType) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return snowpark_fn.abs(value_col).cast(result_type)
+    min_val, _ = get_integral_type_bounds(result_type)
+    return snowpark_fn.when(
+        value_col == snowpark_fn.lit(min_val),
+        snowpark_fn.lit(min_val).cast(result_type),
+    ).otherwise(snowpark_fn.abs(value_col).cast(result_type))
+def apply_abs_overflow_with_ansi_check(
+    value_col: Column, result_type: DataType, ansi_enabled: bool
+) -> Column:
+    if not global_config.snowpark_connect_handleIntegralOverflow:
+        return snowpark_fn.abs(value_col).cast(result_type)
+    if not ansi_enabled:
+        return apply_abs_overflow(value_col, result_type)
+    min_val, _ = get_integral_type_bounds(result_type)
+    raise_error = raise_error_helper(result_type, ArithmeticException)
+    return snowpark_fn.when(
+        value_col == snowpark_fn.lit(min_val),
+        raise_error(
+            snowpark_fn.lit(
+                "[ARITHMETIC_OVERFLOW] abs overflow. "
+                'If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
+            ),
+        ),
+    ).otherwise(snowpark_fn.abs(value_col).cast(result_type))

snowpark-connect 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

snowpark-connect 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl