PyPI - snowpark-connect - Versions diffs - 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

snowpark-connect 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

snowflake/snowpark_connect/type_support.py ADDED Viewed

@@ -0,0 +1,130 @@
+#
+# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
+#
+import threading
+from snowflake import snowpark
+from snowflake.snowpark.types import (
+    ArrayType,
+    ByteType,
+    DataType,
+    DecimalType,
+    IntegerType,
+    LongType,
+    MapType,
+    ShortType,
+    StructField,
+    StructType,
+    _IntegralType,
+)
+_integral_types_conversion_enabled: bool = False
+_client_mode_lock = threading.Lock()
+def set_integral_types_conversion(enabled: bool) -> None:
+    global _integral_types_conversion_enabled
+    with _client_mode_lock:
+        if _integral_types_conversion_enabled == enabled:
+            return
+        _integral_types_conversion_enabled = enabled
+        if enabled:
+            snowpark.context._integral_type_default_precision = {
+                LongType: 19,
+                IntegerType: 10,
+                ShortType: 5,
+                ByteType: 3,
+            }
+        else:
+            snowpark.context._integral_type_default_precision = {}
+def set_integral_types_for_client_default(is_python_client: bool) -> None:
+    """
+    Set integral types based on client type when config is 'client_default'.
+    """
+    from snowflake.snowpark_connect.config import global_config
+    config_key = "snowpark.connect.integralTypesEmulation"
+    if global_config.get(config_key) != "client_default":
+        return
+    # if client mode matches, no action needed (no lock overhead)
+    if _integral_types_conversion_enabled == (not is_python_client):
+        return
+    set_integral_types_conversion(not is_python_client)
+def emulate_integral_types(t: DataType) -> DataType:
+    """
+    Map LongType based on precision attribute to appropriate integral types.
+    Mappings:
+    - _IntegralType with precision=19 -> LongType
+    - _IntegralType with precision=10 -> IntegerType
+    - _IntegralType with precision=5 -> ShortType
+    - _IntegralType with precision=3 -> ByteType
+    - _IntegralType with other precision -> DecimalType(precision, 0)
+    This conversion is controlled by the 'snowpark.connect.integralTypesEmulation' config.
+    When disabled, the function returns the input type unchanged.
+    Args:
+        t: The DataType to transform
+    Returns:
+        The transformed DataType with integral type conversions applied based on precision.
+    """
+    global _integral_types_conversion_enabled
+    with _client_mode_lock:
+        enabled = _integral_types_conversion_enabled
+    if not enabled:
+        return t
+    if isinstance(t, _IntegralType):
+        precision = getattr(t, "_precision", None)
+        if precision is None:
+            return t
+        elif precision == 19:
+            return LongType()
+        elif precision == 10:
+            return IntegerType()
+        elif precision == 5:
+            return ShortType()
+        elif precision == 3:
+            return ByteType()
+        else:
+            return DecimalType(precision, 0)
+    elif isinstance(t, StructType):
+        new_fields = [
+            StructField(
+                field.name,
+                emulate_integral_types(field.datatype),
+                field.nullable,
+                _is_column=field._is_column,
+            )
+            for field in t.fields
+        ]
+        return StructType(new_fields)
+    elif isinstance(t, ArrayType):
+        return ArrayType(
+            emulate_integral_types(t.element_type),
+            t.contains_null,
+        )
+    elif isinstance(t, MapType):
+        return MapType(
+            emulate_integral_types(t.key_type),
+            emulate_integral_types(t.value_type),
+            t.value_contains_null,
+        )
+    return t

snowflake/snowpark_connect/utils/context.py CHANGED Viewed

@@ -12,6 +12,9 @@ from typing import Iterator, Mapping, Optional
 import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
 from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
+from snowflake.snowpark_connect.type_support import (
+    set_integral_types_for_client_default,
+)
 from snowflake.snowpark_connect.typed_column import TypedColumn
 # TODO: remove session id from context when we host SAS in Snowflake server
@@ -267,6 +270,11 @@ def set_spark_version(client_type: str) -> None:
     version = match.group("spark_version") if match else ""
     _spark_version.set(version)
+    # enable integral types (only if config is "client_default")
+    is_python_client = "_SPARK_CONNECT_PYTHON" in client_type
+    set_integral_types_for_client_default(is_python_client)
 def get_is_aggregate_function() -> tuple[str, bool]:
     """

snowflake/snowpark_connect/utils/java_stored_procedure.py CHANGED Viewed

@@ -7,11 +7,22 @@ from pyspark.errors import AnalysisException
 import snowflake.snowpark.types as snowpark_type
 from snowflake.snowpark import Session
 from snowflake.snowpark._internal.type_utils import type_string_to_type_object
+from snowflake.snowpark_connect.client.error_utils import attach_custom_error_code
+from snowflake.snowpark_connect.config import (
+    get_scala_version,
+    is_java_udf_creator_initialized,
+    set_java_udf_creator_initialized_state,
+)
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
 from snowflake.snowpark_connect.resources_initializer import (
     RESOURCE_PATH,
-    SPARK_COMMON_UTILS_JAR,
-    SPARK_CONNECT_CLIENT_JAR,
-    SPARK_SQL_JAR,
+    SPARK_COMMON_UTILS_JAR_212,
+    SPARK_COMMON_UTILS_JAR_213,
+    SPARK_CONNECT_CLIENT_JAR_212,
+    SPARK_CONNECT_CLIENT_JAR_213,
+    SPARK_SQL_JAR_212,
+    SPARK_SQL_JAR_213,
+    ensure_scala_udf_jars_uploaded,
 )
 from snowflake.snowpark_connect.utils.upload_java_jar import upload_java_udf_jar
@@ -22,7 +33,7 @@ CREATE OR REPLACE TEMPORARY PROCEDURE __SC_JAVA_SP_CREATE_JAVA_UDF(udf_name VARC
 RETURNS VARCHAR
 LANGUAGE JAVA
 RUNTIME_VERSION = 17
-PACKAGES = ('com.snowflake:snowpark:latest')
+PACKAGES = ('com.snowflake:snowpark___scala_version__:latest')
 __snowflake_udf_imports__
 HANDLER = 'com.snowflake.snowpark_connect.procedures.JavaUDFCreator.process'
 EXECUTE AS CALLER
@@ -30,19 +41,6 @@ EXECUTE AS CALLER
 """
-_is_initialized = False
-def is_initialized() -> bool:
-    global _is_initialized
-    return _is_initialized
-def set_java_udf_creator_initialized_state(value: bool) -> None:
-    global _is_initialized
-    _is_initialized = value
 class JavaUdf:
     """
     Reference class for Java UDFs, providing similar properties like Python UserDefinedFunction.
@@ -70,12 +68,33 @@ class JavaUdf:
         self._return_type = return_type
+def _scala_static_imports_for_sproc(stage_resource_path: str) -> set[str]:
+    scala_version = get_scala_version()
+    if scala_version == "2.12":
+        return {
+            f"{stage_resource_path}/{SPARK_CONNECT_CLIENT_JAR_212}",
+            f"{stage_resource_path}/{SPARK_COMMON_UTILS_JAR_212}",
+            f"{stage_resource_path}/{SPARK_SQL_JAR_212}",
+        }
+    if scala_version == "2.13":
+        return {
+            f"{stage_resource_path}/{SPARK_CONNECT_CLIENT_JAR_213}",
+            f"{stage_resource_path}/{SPARK_COMMON_UTILS_JAR_213}",
+            f"{stage_resource_path}/{SPARK_SQL_JAR_213}",
+        }
+    # invalid Scala version
+    exception = ValueError(
+        f"Unsupported Scala version: {scala_version}. Snowpark Connect supports Scala 2.12 and 2.13"
+    )
+    attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
+    raise exception
 def get_quoted_imports(session: Session) -> str:
     stage_resource_path = session.get_session_stage() + RESOURCE_PATH
-    spark_imports = {
-        f"{stage_resource_path}/{SPARK_CONNECT_CLIENT_JAR}",
-        f"{stage_resource_path}/{SPARK_COMMON_UTILS_JAR}",
-        f"{stage_resource_path}/{SPARK_SQL_JAR}",
+    spark_imports = _scala_static_imports_for_sproc(stage_resource_path) | {
         f"{stage_resource_path}/java_udfs-1.0-SNAPSHOT.jar",
     }
@@ -83,14 +102,21 @@ def get_quoted_imports(session: Session) -> str:
         """Helper function to wrap strings in single quotes for SQL."""
         return "'" + s + "'"
-    return ", ".join(quote_single(x) for x in session._artifact_jars | spark_imports)
+    from snowflake.snowpark_connect.config import global_config
+    config_imports = global_config.get("snowpark.connect.udf.java.imports", "")
+    config_imports = (
+        {x.strip() for x in config_imports.strip("[] ").split(",") if x.strip()}
+        if config_imports
+        else set()
+    )
-def create_snowflake_imports(session: Session) -> str:
-    from snowflake.snowpark_connect.resources_initializer import (
-        ensure_scala_udf_jars_uploaded,
+    return ", ".join(
+        quote_single(x) for x in session._artifact_jars | spark_imports | config_imports
     )
+def create_snowflake_imports(session: Session) -> str:
     # Make sure that the resource initializer thread is completed before creating Java UDFs since we depend on the jars
     # uploaded by it.
     ensure_scala_udf_jars_uploaded()
@@ -99,12 +125,12 @@ def create_snowflake_imports(session: Session) -> str:
 def create_java_udf(session: Session, function_name: str, java_class: str):
-    if not is_initialized():
+    if not is_java_udf_creator_initialized():
         upload_java_udf_jar(session)
         session.sql(
             SP_TEMPLATE.replace(
                 "__snowflake_udf_imports__", create_snowflake_imports(session)
-            )
+            ).replace("__scala_version__", get_scala_version())
         ).collect()
         set_java_udf_creator_initialized_state(True)
     name = CREATE_JAVA_UDF_PREFIX + function_name

snowflake/snowpark_connect/utils/java_udaf_utils.py CHANGED Viewed

@@ -12,7 +12,6 @@ from snowflake.snowpark_connect.utils.jvm_udf_utils import (
     ReturnType,
     Signature,
     build_jvm_udxf_imports,
-    cast_java_map_args_from_given_type,
     map_type_to_java_type,
 )
 from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
@@ -41,19 +40,20 @@ import com.snowflake.snowpark_java.types.*;
 public class JavaUDAF {
     private final static String OPERATION_FILE = "__operation_file__";
-    private static scala.Function2<__accumulator_type__, __value_type__, __value_type__> operation = null;
+    private static scala.Function2<__reduce_type__, __reduce_type__, __reduce_type__> operation = null;
+    private static UdfPacket udfPacket = null;
     private static void loadOperation() throws IOException, ClassNotFoundException {
         if (operation != null) {
             return; // Already loaded
         }
-        final UdfPacket udfPacket = com.snowflake.sas.scala.Utils$.MODULE$.deserializeUdfPacket(OPERATION_FILE);
-        operation = (scala.Function2<__accumulator_type__, __value_type__, __value_type__>) udfPacket.function();
+        udfPacket = com.snowflake.sas.scala.Utils$.MODULE$.deserializeUdfPacket(OPERATION_FILE);
+        operation = (scala.Function2<__reduce_type__, __reduce_type__, __reduce_type__>) udfPacket.function();
     }
     public static class State implements Serializable {
-        public __accumulator_type__ value = null;
+        public __reduce_type__ value = null;
         public boolean initialized = false;
     }
@@ -69,10 +69,10 @@ public class JavaUDAF {
         }
         if (!state.initialized) {
-            state.value = input;
+            state.value = __mapped_value__;
             state.initialized = true;
         } else {
-            state.value = operation.apply(state.value, input);
+            state.value = operation.apply(state.value, __mapped_value__);
         }
         return state;
     }
@@ -115,7 +115,6 @@ class JavaUDAFDef:
     name: str
     signature: Signature
     java_signature: Signature
-    java_invocation_args: list[str]
     imports: list[str]
     null_handling: NullHandling = NullHandling.RETURNS_NULL_ON_NULL_INPUT
@@ -131,17 +130,31 @@ class JavaUDAFDef:
         Returns:
             String containing the complete Java code for the UDAF body
         """
-        returns_variant = self.signature.returns.data_type == "VARIANT"
+        returns_variant = self.signature.returns.data_type.lower() == "variant"
         return_type = (
             "Variant" if returns_variant else self.java_signature.params[0].data_type
         )
         response_wrapper = (
-            "new Variant(state.value)" if returns_variant else "state.value"
+            "com.snowflake.sas.scala.Utils$.MODULE$.toVariant(state.value, udfPacket)"
+            if returns_variant
+            else "state.value"
+        )
+        is_variant_input = self.java_signature.params[0].data_type.lower() == "variant"
+        reduce_type = (
+            "Object" if is_variant_input else self.java_signature.params[0].data_type
         )
         return (
             UDAF_TEMPLATE.replace("__operation_file__", self.imports[0].split("/")[-1])
             .replace("__accumulator_type__", self.java_signature.params[0].data_type)
             .replace("__value_type__", self.java_signature.params[1].data_type)
+            .replace(
+                "__mapped_value__",
+                "com.snowflake.sas.scala.UdfPacketUtils$.MODULE$.fromVariant(udfPacket, input, 0)"
+                if is_variant_input
+                else "input",
+            )
+            .replace("__reduce_type__", reduce_type)
             .replace("__return_type__", return_type)
             .replace("__response_wrapper__", response_wrapper)
         )
@@ -231,12 +244,11 @@ def create_java_udaf_for_reduce_scala_function(
         A JavaUdaf object representing the Java UDAF.
     """
     from snowflake.snowpark_connect.resources_initializer import (
-        wait_for_resource_initialization,
+        ensure_scala_udf_jars_uploaded,
     )
-    # Make sure that the resource initializer thread is completed before creating Java UDFs since we depend on the jars
-    # uploaded by it.
-    wait_for_resource_initialization()
+    # Make sure Scala UDF jars are uploaded before creating Java UDAFs since we depend on them.
+    ensure_scala_udf_jars_uploaded()
     from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
@@ -252,23 +264,26 @@ def create_java_udaf_for_reduce_scala_function(
     java_input_params: list[Param] = []
     sql_input_params: list[Param] = []
-    java_invocation_args: list[str] = []  # arguments passed into the udf function
     if input_types:  # input_types can be None when no arguments are provided
         for i, input_type in enumerate(input_types):
             param_name = "arg" + str(i)
+            if isinstance(
+                input_type,
+                (
+                    snowpark_type.ArrayType,
+                    snowpark_type.MapType,
+                    snowpark_type.VariantType,
+                ),
+            ):
+                java_type = "Variant"
+                snowflake_type = "Variant"
+            else:
+                java_type = map_type_to_java_type(input_type)
+                snowflake_type = map_type_to_snowflake_type(input_type)
             # Create the Java arguments and input types string: "arg0: Type0, arg1: Type1, ...".
-            java_input_params.append(
-                Param(param_name, map_type_to_java_type(input_type))
-            )
+            java_input_params.append(Param(param_name, java_type))
             # Create the Snowflake SQL arguments and input types string: "arg0 TYPE0, arg1 TYPE1, ...".
-            sql_input_params.append(
-                Param(param_name, map_type_to_snowflake_type(input_type))
-            )
-            # In the case of Map input types, we need to cast the argument to the correct type in Java.
-            # Snowflake SQL Java can only handle MAP[VARCHAR, VARCHAR] as input types.
-            java_invocation_args.append(
-                cast_java_map_args_from_given_type(param_name, input_type)
-            )
+            sql_input_params.append(Param(param_name, snowflake_type))
     java_return_type = map_type_to_java_type(pciudf._original_return_type)
     # If the SQL return type is a MAP or STRUCT, change this to VARIANT because of issues with Java UDAFs.
@@ -282,7 +297,11 @@ def create_java_udaf_for_reduce_scala_function(
     )
     sql_return_type = (
         "VARIANT"
-        if (sql_return_type.startswith("MAP") or sql_return_type.startswith("OBJECT"))
+        if (
+            sql_return_type.startswith("MAP")
+            or sql_return_type.startswith("OBJECT")
+            or sql_return_type.startswith("ARRAY")
+        )
         else sql_return_type
     )
@@ -295,7 +314,6 @@ def create_java_udaf_for_reduce_scala_function(
         java_signature=Signature(
             params=java_input_params, returns=ReturnType(java_return_type)
         ),
-        java_invocation_args=java_invocation_args,
     )
     create_udf_sql = udf_def.to_create_function_sql()
     logger.info(f"Creating Java UDAF: {create_udf_sql}")

snowflake/snowpark_connect/utils/java_udtf_utils.py CHANGED Viewed

@@ -95,7 +95,7 @@ public class JavaUdtfHandler {
         java.util.Iterator<Variant> javaResult = new java.util.Iterator<Variant>() {
             public boolean hasNext() { return scalaResult.hasNext(); }
             public Variant next() {
-                return com.snowflake.sas.scala.Utils$.MODULE$.toVariant(scalaResult.next());
+                return com.snowflake.sas.scala.Utils$.MODULE$.toVariant(scalaResult.next(), udfPacket);
             }
         };

snowflake/snowpark_connect/utils/jvm_udf_utils.py CHANGED Viewed

@@ -9,16 +9,23 @@ from typing import List, Union
 import snowflake.snowpark.types as snowpark_type
 import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
 from snowflake import snowpark
+from snowflake.snowpark_connect.config import get_scala_version
 from snowflake.snowpark_connect.error.error_codes import ErrorCodes
 from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
 from snowflake.snowpark_connect.resources_initializer import (
-    JSON_4S_JAR,
+    JSON_4S_JAR_212,
+    JSON_4S_JAR_213,
     RESOURCE_PATH,
-    SAS_SCALA_UDF_JAR,
-    SCALA_REFLECT_JAR,
-    SPARK_COMMON_UTILS_JAR,
-    SPARK_CONNECT_CLIENT_JAR,
-    SPARK_SQL_JAR,
+    SAS_SCALA_UDF_JAR_212,
+    SAS_SCALA_UDF_JAR_213,
+    SCALA_REFLECT_JAR_212,
+    SCALA_REFLECT_JAR_213,
+    SPARK_COMMON_UTILS_JAR_212,
+    SPARK_COMMON_UTILS_JAR_213,
+    SPARK_CONNECT_CLIENT_JAR_212,
+    SPARK_CONNECT_CLIENT_JAR_213,
+    SPARK_SQL_JAR_212,
+    SPARK_SQL_JAR_213,
 )
@@ -108,15 +115,41 @@ def build_jvm_udxf_imports(
     )
     # Format the user jars to be used in the IMPORTS clause of the stored procedure.
-    return [
-        closure_binary_file,
-        f"{stage_resource_path}/{SPARK_CONNECT_CLIENT_JAR}",
-        f"{stage_resource_path}/{SPARK_COMMON_UTILS_JAR}",
-        f"{stage_resource_path}/{SPARK_SQL_JAR}",
-        f"{stage_resource_path}/{JSON_4S_JAR}",
-        f"{stage_resource_path}/{SAS_SCALA_UDF_JAR}",
-        f"{stage_resource_path}/{SCALA_REFLECT_JAR}",  # Required for deserializing Scala lambdas
-    ] + list(session._artifact_jars)
+    return (
+        [closure_binary_file]
+        + _scala_static_imports_for_udf(stage_resource_path)
+        + list(session._artifact_jars)
+    )
+def _scala_static_imports_for_udf(stage_resource_path: str) -> list[str]:
+    scala_version = get_scala_version()
+    if scala_version == "2.12":
+        return [
+            f"{stage_resource_path}/{SPARK_CONNECT_CLIENT_JAR_212}",
+            f"{stage_resource_path}/{SPARK_COMMON_UTILS_JAR_212}",
+            f"{stage_resource_path}/{SPARK_SQL_JAR_212}",
+            f"{stage_resource_path}/{JSON_4S_JAR_212}",
+            f"{stage_resource_path}/{SAS_SCALA_UDF_JAR_212}",
+            f"{stage_resource_path}/{SCALA_REFLECT_JAR_212}",  # Required for deserializing Scala lambdas
+        ]
+    if scala_version == "2.13":
+        return [
+            f"{stage_resource_path}/{SPARK_CONNECT_CLIENT_JAR_213}",
+            f"{stage_resource_path}/{SPARK_COMMON_UTILS_JAR_213}",
+            f"{stage_resource_path}/{SPARK_SQL_JAR_213}",
+            f"{stage_resource_path}/{JSON_4S_JAR_213}",
+            f"{stage_resource_path}/{SAS_SCALA_UDF_JAR_213}",
+            f"{stage_resource_path}/{SCALA_REFLECT_JAR_213}",  # Required for deserializing Scala lambdas
+        ]
+    # invalid Scala version
+    exception = ValueError(
+        f"Unsupported Scala version: {scala_version}. Snowpark Connect supports Scala 2.12 and 2.13"
+    )
+    attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
+    raise exception
 def map_type_to_java_type(

snowpark-connect 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

snowpark-connect 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl