PyPI - snowpark-connect - Versions diffs - 0.25.0__py3-none-any.whl → 0.27.0__py3-none-any.whl - Mend

snowpark-connect 0.25.0py3-none-any.whl → 0.27.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

snowflake/snowpark_connect/utils/scala_udf_utils.py CHANGED Viewed

@@ -15,10 +15,10 @@ Key components:
 - Type mapping functions for different type systems
 - UDF creation and management utilities
 """
+import re
 from dataclasses import dataclass
 from enum import Enum
-from typing import Callable, List
+from typing import List, Union
 import snowflake.snowpark.types as snowpark_type
 import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
@@ -130,12 +130,13 @@ class ScalaUDFDef:
     name: str
     signature: Signature
     scala_signature: Signature
+    scala_invocation_args: List[str]
     imports: List[str]
     null_handling: NullHandling = NullHandling.RETURNS_NULL_ON_NULL_INPUT
     # -------------------- DDL Emitter --------------------
-    def _gen_body_sql(self) -> str:
+    def _gen_body_scala(self) -> str:
         """
         Generate the Scala code body for the UDF.
@@ -145,36 +146,78 @@ class ScalaUDFDef:
         Returns:
             String containing the complete Scala code for the UDF body
         """
-        scala_return_type = self.scala_signature.returns.data_type
-        # Convert Array to Seq for Scala compatibility in function signatures
-        cast_scala_input_types = (
+        # Convert Array to Seq for Scala compatibility in function signatures.
+        udf_func_input_types = (
             ", ".join(p.data_type for p in self.scala_signature.params)
         ).replace("Array", "Seq")
-        scala_arg_and_input_types_str = ", ".join(
+        # Create the Scala arguments and input types string: "arg0: Type0, arg1: Type1, ...".
+        joined_wrapper_arg_and_input_types_str = ", ".join(
             f"{p.name}: {p.data_type}" for p in self.scala_signature.params
         )
-        scala_args_str = ", ".join(f"{p.name}" for p in self.scala_signature.params)
-        return f"""import org.apache.spark.sql.connect.common.UdfPacket
+        # This is used in defining the input types for the wrapper function. For Maps to work correctly with Scala UDFs,
+        # we need to set the Map types to Map[String, String]. These get cast to the respective original types
+        # when the original UDF function is invoked.
+        wrapper_arg_and_input_types_str = re.sub(
+            pattern=r"Map\[\w+,\s\w+\]",
+            repl="Map[String, String]",
+            string=joined_wrapper_arg_and_input_types_str,
+        )
+        invocation_args = ", ".join(self.scala_invocation_args)
+        # Cannot directly return a map from a Scala UDF due to issues with non-String values. Snowflake SQL Scala only
+        # supports Map[String, String] as input types. Therefore, we convert the map to a JSON string before returning.
+        # This is processed as a Variant by SQL.
+        udf_func_return_type = self.scala_signature.returns.data_type
+        is_map_return = udf_func_return_type.startswith("Map")
+        wrapper_return_type = "String" if is_map_return else udf_func_return_type
+        # Need to call the map to JSON string converter when a map is returned by the user's function.
+        invoke_udf_func = (
+            f"write(func({invocation_args}))"
+            if is_map_return
+            else f"func({invocation_args})"
+        )
+        # The lines of code below are required only when a Map is returned by the UDF. This is needed to serialize the
+        # map output to a JSON string.
+        map_return_imports = (
+            ""
+            if not is_map_return
+            else """
+import org.json4s._
+import org.json4s.native.Serialization._
+import org.json4s.native.Serialization
+"""
+        )
+        map_return_formatter = (
+            ""
+            if not is_map_return
+            else """
+  implicit val formats = Serialization.formats(NoTypeHints)
+"""
+        )
+        return f"""import org.apache.spark.sql.connect.common.UdfPacket
+{map_return_imports}
 import java.io.{{ByteArrayInputStream, ObjectInputStream}}
 import java.nio.file.{{Files, Paths}}
-object SparkUdf {{
-  lazy val func: ({cast_scala_input_types}) => {scala_return_type} = {{
+object __RecreatedSparkUdf {{
+  {map_return_formatter}
+  private lazy val func: ({udf_func_input_types}) => {udf_func_return_type} = {{
     val importDirectory = System.getProperty("com.snowflake.import_directory")
     val fPath = importDirectory + "{self.name}.bin"
     val bytes = Files.readAllBytes(Paths.get(fPath))
     val ois = new ObjectInputStream(new ByteArrayInputStream(bytes))
     try {{
-      ois.readObject().asInstanceOf[UdfPacket].function.asInstanceOf[({cast_scala_input_types}) => {scala_return_type}]
+      ois.readObject().asInstanceOf[UdfPacket].function.asInstanceOf[({udf_func_input_types}) => {udf_func_return_type}]
     }} finally {{
       ois.close()
     }}
   }}
-  def run({scala_arg_and_input_types_str}): {scala_return_type} = {{
-    func({scala_args_str})
+  def __wrapperFunc({wrapper_arg_and_input_types_str}): {wrapper_return_type} = {{
+    {invoke_udf_func}
   }}
 }}
 """
@@ -210,14 +253,14 @@ LANGUAGE SCALA
 RUNTIME_VERSION = 2.12
 PACKAGES = ('com.snowflake:snowpark:latest')
 {imports_sql}
-HANDLER = 'SparkUdf.run'
+HANDLER = '__RecreatedSparkUdf.__wrapperFunc'
 AS
 $$
-{self._gen_body_sql()}
+{self._gen_body_scala()}
 $$;"""
-def build_scala_udf_imports(session, payload, udf_name):
+def build_scala_udf_imports(session, payload, udf_name, is_map_return) -> List[str]:
     """
     Build the list of imports needed for the Scala UDF.
@@ -230,6 +273,7 @@ def build_scala_udf_imports(session, payload, udf_name):
         session: Snowpark session
         payload: Binary payload containing the serialized UDF
         udf_name: Name of the UDF (used for the binary file name)
+        is_map_return: Indicates if the UDF returns a Map (affects imports)
     Returns:
         List of JAR file paths to be imported by the UDF
@@ -254,14 +298,30 @@ def build_scala_udf_imports(session, payload, udf_name):
         if RESOURCE_PATH not in row[0]:
             # Remove the stage path since it is not properly formatted.
             user_jars.append(row[0][row[0].find("/") :])
+    # Jars used when the return type is a Map.
+    map_jars = (
+        []
+        if not is_map_return
+        else [
+            f"{stage_resource_path}/json4s-core_2.12-3.7.0-M11.jar",
+            f"{stage_resource_path}/json4s-native_2.12-3.7.0-M11.jar",
+            f"{stage_resource_path}/paranamer-2.8.3.jar",
+        ]
+    )
     # Format the user jars to be used in the IMPORTS clause of the stored procedure.
-    return [
-        closure_binary_file,
-        f"{stage_resource_path}/spark-connect-client-jvm_2.12-3.5.6.jar",
-        f"{stage_resource_path}/spark-common-utils_2.12-3.5.6.jar",
-        f"{stage_resource_path}/spark-sql_2.12-3.5.6.jar",
-        f"{stage_resource_path}/json4s-ast_2.12-3.7.0-M11.jar",
-    ] + [f"{stage + jar}" for jar in user_jars]
+    return (
+        [
+            closure_binary_file,
+            f"{stage_resource_path}/spark-connect-client-jvm_2.12-3.5.6.jar",
+            f"{stage_resource_path}/spark-common-utils_2.12-3.5.6.jar",
+            f"{stage_resource_path}/spark-sql_2.12-3.5.6.jar",
+            f"{stage_resource_path}/json4s-ast_2.12-3.7.0-M11.jar",
+        ]
+        + map_jars
+        + [f"{stage + jar}" for jar in user_jars]
+    )
 def create_scala_udf(pciudf: ProcessCommonInlineUserDefinedFunction) -> ScalaUdf:
@@ -298,49 +358,48 @@ def create_scala_udf(pciudf: ProcessCommonInlineUserDefinedFunction) -> ScalaUdf
         cached_udf = session._udfs[udf_name]
         return ScalaUdf(cached_udf.name, cached_udf.input_types, cached_udf.return_type)
-    imports = build_scala_udf_imports(session, pciudf._payload, udf_name)
-    def _build_params(
-        pciudf: ProcessCommonInlineUserDefinedFunction,
-        snowpark_type_mapper: Callable[[snowpark_type.DataType], str],
-        spark_type_mapper: Callable[[types_proto.DataType], str],
-    ) -> List[Param]:
-        """
-        Build the parameter list for the UDF signature.
-        Args:
-            pciudf: The UDF definition object
-            mapper: Function to map Snowpark types to target type system
+    # In case the Scala UDF was created with `spark.udf.register`, the Spark Scala input types (from protobuf) are
+    # stored in pciudf.scala_input_types.
+    # We cannot rely solely on the inputTypes field from the Scala UDF or the Snowpark input types, since:
+    # - spark.udf.register arguments come from the inputTypes field
+    # - UDFs created with a data type (like below) do not populate the inputTypes field. This requires the input types
+    #   inferred by Snowpark. e.g.: udf((i: Long) => (i + 1).toInt, IntegerType)
+    input_types = (
+        pciudf._scala_input_types if pciudf._scala_input_types else pciudf._input_types
+    )
-        Returns:
-            List of Param objects representing the function parameters
-        """
-        if not pciudf._scala_input_types:
-            return (
-                [
-                    Param(name=f"arg{i}", data_type=snowpark_type_mapper(input_type))
-                    for i, input_type in enumerate(pciudf._input_types)
-                ]
-                if pciudf._input_types
-                else []
+    scala_input_params: List[Param] = []
+    sql_input_params: List[Param] = []
+    scala_invocation_args: List[str] = []  # arguments passed into the udf function
+    if input_types:  # input_types can be None when no arguments are provided
+        for i, input_type in enumerate(input_types):
+            param_name = "arg" + str(i)
+            # Create the Scala arguments and input types string: "arg0: Type0, arg1: Type1, ...".
+            scala_input_params.append(
+                Param(param_name, map_type_to_scala_type(input_type))
+            )
+            # Create the Snowflake SQL arguments and input types string: "arg0 TYPE0, arg1 TYPE1, ...".
+            sql_input_params.append(
+                Param(param_name, map_type_to_snowflake_type(input_type))
+            )
+            # In the case of Map input types, we need to cast the argument to the correct type in Scala.
+            # Snowflake SQL Scala can only handle MAP[VARCHAR, VARCHAR] as input types.
+            scala_invocation_args.append(
+                cast_scala_map_args_from_given_type(param_name, input_type)
             )
-        else:
-            return [
-                Param(name=f"arg{i}", data_type=spark_type_mapper(input_type))
-                for i, input_type in enumerate(pciudf._scala_input_types)
-            ]
-    # Create the Scala arguments and input types string: "arg0: Type0, arg1: Type1, ...".
-    # In case the Scala UDF was created with `spark.udf.register`, the Spark Scala input types (from protobuf) are
-    # stored in pciudf.scala_input_types.
-    sql_input_params = _build_params(
-        pciudf, map_snowpark_type_to_snowflake_type, map_spark_type_to_snowflake_type
+    scala_return_type = map_type_to_scala_type(pciudf._original_return_type)
+    # If the SQL return type is a MAP, change this to VARIANT because of issues with Scala UDFs.
+    sql_return_type = map_type_to_snowflake_type(pciudf._original_return_type)
+    imports = build_scala_udf_imports(
+        session,
+        pciudf._payload,
+        udf_name,
+        is_map_return=sql_return_type.startswith("MAP"),
     )
-    sql_return_type = map_snowpark_type_to_snowflake_type(pciudf._return_type)
-    scala_input_params = _build_params(
-        pciudf, map_snowpark_type_to_scala_type, map_spark_type_to_scala_type
+    sql_return_type = (
+        "VARIANT" if sql_return_type.startswith("MAP") else sql_return_type
     )
-    scala_return_type = map_snowpark_type_to_scala_type(pciudf._return_type)
     udf_def = ScalaUDFDef(
         name=udf_name,
@@ -351,6 +410,7 @@ def create_scala_udf(pciudf: ProcessCommonInlineUserDefinedFunction) -> ScalaUdf
         scala_signature=Signature(
             params=scala_input_params, returns=ReturnType(scala_return_type)
         ),
+        scala_invocation_args=scala_invocation_args,
     )
     create_udf_sql = udf_def.to_create_function_sql()
     logger.info(f"Creating Scala UDF: {create_udf_sql}")
@@ -358,56 +418,60 @@ def create_scala_udf(pciudf: ProcessCommonInlineUserDefinedFunction) -> ScalaUdf
     return ScalaUdf(udf_name, pciudf._input_types, pciudf._return_type)
-def map_snowpark_type_to_scala_type(t: snowpark_type.DataType) -> str:
-    """
-    Maps a Snowpark type to a Scala type string.
-    Converts Snowpark DataType objects to their corresponding Scala type names.
-    This mapping is used when generating Scala code for UDFs.
-    Args:
-        t: Snowpark DataType to convert
-    Returns:
-        String representation of the corresponding Scala type
-    Raises:
-        ValueError: If the Snowpark type is not supported
-    """
-    match type(t):
-        case snowpark_type.ArrayType:
-            return f"Array[{map_snowpark_type_to_scala_type(t.element_type)}]"
-        case snowpark_type.BinaryType:
+def map_type_to_scala_type(
+    t: Union[snowpark_type.DataType, types_proto.DataType]
+) -> str:
+    """Maps a Snowpark or Spark protobuf type to a Scala type string."""
+    if not t:
+        return "String"
+    is_snowpark_type = isinstance(t, snowpark_type.DataType)
+    condition = type(t) if is_snowpark_type else t.WhichOneof("kind")
+    match condition:
+        case snowpark_type.ArrayType | "array":
+            return (
+                f"Array[{map_type_to_scala_type(t.element_type)}]"
+                if is_snowpark_type
+                else f"Array[{map_type_to_scala_type(t.array.element_type)}]"
+            )
+        case snowpark_type.BinaryType | "binary":
             return "Array[Byte]"
-        case snowpark_type.BooleanType:
+        case snowpark_type.BooleanType | "boolean":
             return "Boolean"
-        case snowpark_type.ByteType:
+        case snowpark_type.ByteType | "byte":
             return "Byte"
-        case snowpark_type.DateType:
+        case snowpark_type.DateType | "date":
             return "java.sql.Date"
-        case snowpark_type.DecimalType:
+        case snowpark_type.DecimalType | "decimal":
             return "java.math.BigDecimal"
-        case snowpark_type.DoubleType:
+        case snowpark_type.DoubleType | "double":
             return "Double"
-        case snowpark_type.FloatType:
+        case snowpark_type.FloatType | "float":
             return "Float"
         case snowpark_type.GeographyType:
             return "Geography"
-        case snowpark_type.IntegerType:
+        case snowpark_type.IntegerType | "integer":
             return "Int"
-        case snowpark_type.LongType:
+        case snowpark_type.LongType | "long":
             return "Long"
-        case snowpark_type.MapType:  # can also map to OBJECT in Snowflake
-            key_type = map_snowpark_type_to_scala_type(t.key_type)
-            value_type = map_snowpark_type_to_scala_type(t.value_type)
+        case snowpark_type.MapType | "map":  # can also map to OBJECT in Snowflake
+            key_type = (
+                map_type_to_scala_type(t.key_type)
+                if is_snowpark_type
+                else map_type_to_scala_type(t.map.key_type)
+            )
+            value_type = (
+                map_type_to_scala_type(t.value_type)
+                if is_snowpark_type
+                else map_type_to_scala_type(t.map.value_type)
+            )
             return f"Map[{key_type}, {value_type}]"
-        case snowpark_type.NullType:
+        case snowpark_type.NullType | "null":
             return "String"  # cannot set the return type to Null in Snowpark Scala UDFs
-        case snowpark_type.ShortType:
+        case snowpark_type.ShortType | "short":
             return "Short"
-        case snowpark_type.StringType:
+        case snowpark_type.StringType | "string" | "char" | "varchar":
             return "String"
-        case snowpark_type.TimestampType:
+        case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
             return "java.sql.Timestamp"
         case snowpark_type.VariantType:
             return "Variant"
@@ -415,61 +479,65 @@ def map_snowpark_type_to_scala_type(t: snowpark_type.DataType) -> str:
             raise ValueError(f"Unsupported Snowpark type: {t}")
-def map_snowpark_type_to_snowflake_type(t: snowpark_type.DataType) -> str:
-    """
-    Maps a Snowpark type to a Snowflake type string.
-    Converts Snowpark DataType objects to their corresponding Snowflake SQL type names.
-    This mapping is used when generating CREATE FUNCTION SQL statements.
-    Args:
-        t: Snowpark DataType to convert
-    Returns:
-        String representation of the corresponding Snowflake type
-    Raises:
-        ValueError: If the Snowpark type is not supported
-    """
-    match type(t):
-        case snowpark_type.ArrayType:
-            return f"ARRAY({map_snowpark_type_to_snowflake_type(t.element_type)})"
-        case snowpark_type.BinaryType:
+def map_type_to_snowflake_type(
+    t: Union[snowpark_type.DataType, types_proto.DataType]
+) -> str:
+    """Maps a Snowpark or Spark protobuf type to a Snowflake type string."""
+    if not t:
+        return "VARCHAR"
+    is_snowpark_type = isinstance(t, snowpark_type.DataType)
+    condition = type(t) if is_snowpark_type else t.WhichOneof("kind")
+    match condition:
+        case snowpark_type.ArrayType | "array":
+            return (
+                f"ARRAY({map_type_to_snowflake_type(t.element_type)})"
+                if is_snowpark_type
+                else f"ARRAY({map_type_to_snowflake_type(t.array.element_type)})"
+            )
+        case snowpark_type.BinaryType | "binary":
             return "BINARY"
-        case snowpark_type.BooleanType:
+        case snowpark_type.BooleanType | "boolean":
             return "BOOLEAN"
-        case snowpark_type.ByteType:
+        case snowpark_type.ByteType | "byte":
             return "TINYINT"
-        case snowpark_type.DateType:
+        case snowpark_type.DateType | "date":
             return "DATE"
-        case snowpark_type.DecimalType:
+        case snowpark_type.DecimalType | "decimal":
             return "NUMBER"
-        case snowpark_type.DoubleType:
+        case snowpark_type.DoubleType | "double":
             return "DOUBLE"
-        case snowpark_type.FloatType:
+        case snowpark_type.FloatType | "float":
             return "FLOAT"
         case snowpark_type.GeographyType:
             return "GEOGRAPHY"
-        case snowpark_type.IntegerType:
+        case snowpark_type.IntegerType | "integer":
             return "INT"
-        case snowpark_type.LongType:
+        case snowpark_type.LongType | "long":
             return "BIGINT"
-        case snowpark_type.MapType:
+        case snowpark_type.MapType | "map":
             # Maps to OBJECT in Snowflake if key and value types are not specified.
-            key_type = map_snowpark_type_to_snowflake_type(t.key_type)
-            value_type = map_snowpark_type_to_snowflake_type(t.value_type)
+            key_type = (
+                map_type_to_snowflake_type(t.key_type)
+                if is_snowpark_type
+                else map_type_to_snowflake_type(t.map.key_type)
+            )
+            value_type = (
+                map_type_to_snowflake_type(t.value_type)
+                if is_snowpark_type
+                else map_type_to_snowflake_type(t.map.value_type)
+            )
             return (
                 f"MAP({key_type}, {value_type})"
                 if key_type and value_type
                 else "OBJECT"
             )
-        case snowpark_type.NullType:
+        case snowpark_type.NullType | "null":
             return "VARCHAR"
-        case snowpark_type.ShortType:
+        case snowpark_type.ShortType | "short":
             return "SMALLINT"
-        case snowpark_type.StringType:
+        case snowpark_type.StringType | "string" | "char" | "varchar":
             return "VARCHAR"
-        case snowpark_type.TimestampType:
+        case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
             return "TIMESTAMP"
         case snowpark_type.VariantType:
             return "VARIANT"
@@ -477,112 +545,52 @@ def map_snowpark_type_to_snowflake_type(t: snowpark_type.DataType) -> str:
             raise ValueError(f"Unsupported Snowpark type: {t}")
-def map_spark_type_to_scala_type(t: types_proto.DataType) -> str:
-    """
-    Maps a Spark DataType (from protobuf) to a Scala type string.
-    Converts Spark protobuf DataType objects to their corresponding Scala type names.
-    This mapping is used when working with Spark Connect protobuf types.
-    Args:
-        t: Spark protobuf DataType to convert
-    Returns:
-        String representation of the corresponding Scala type
-    Raises:
-        ValueError: If the Spark type is not supported
-    """
-    match t.WhichOneof("kind"):
-        case "array":
-            return f"Array[{map_spark_type_to_scala_type(t.array.element_type)}]"
-        case "binary":
-            return "Array[Byte]"
-        case "boolean":
-            return "Boolean"
-        case "byte":
-            return "Byte"
-        case "date":
-            return "java.sql.Date"
-        case "decimal":
-            return "java.math.BigDecimal"
-        case "double":
-            return "Double"
-        case "float":
-            return "Float"
-        case "integer":
-            return "Int"
-        case "long":
-            return "Long"
-        case "map":
-            key_type = map_spark_type_to_scala_type(t.map.key_type)
-            value_type = map_spark_type_to_scala_type(t.map.value_type)
-            return f"Map[{key_type}, {value_type}]"
-        case "null":
-            return "String"  # cannot set the return type to Null in Snowpark Scala UDFs
-        case "short":
-            return "Short"
-        case "string" | "char" | "varchar":
-            return "String"
-        case "timestamp" | "timestamp_ntz":
-            return "java.sql.Timestamp"
-        case _:
-            raise ValueError(f"Unsupported Spark type: {t}")
-def map_spark_type_to_snowflake_type(t: types_proto.DataType) -> str:
-    """
-    Maps a Spark DataType (from protobuf) to a Snowflake type string.
-    Converts Spark protobuf DataType objects to their corresponding Snowflake SQL type names.
-    This mapping is used when working with Spark Connect protobuf types in Snowflake UDFs.
-    Args:
-        t: Spark protobuf DataType to convert
-    Returns:
-        String representation of the corresponding Snowflake type
-    Raises:
-        ValueError: If the Spark type is not supported
-    """
-    match t.WhichOneof("kind"):
-        case "array":
-            return f"ARRAY({map_spark_type_to_snowflake_type(t.array.element_type)})"
-        case "binary":
-            return "BINARY"
-        case "boolean":
-            return "BOOLEAN"
-        case "byte":
-            return "TINYINT"
-        case "date":
-            return "DATE"
-        case "decimal":
-            return "NUMBER"
-        case "double":
-            return "DOUBLE"
-        case "float":
-            return "FLOAT"
-        case "integer":
-            return "INT"
-        case "long":
-            return "BIGINT"
-        case "map":
-            # Maps to OBJECT in Snowflake if key and value types are not specified.
-            key_type = map_spark_type_to_snowflake_type(t.map.key_type)
-            value_type = map_spark_type_to_snowflake_type(t.map.value_type)
-            return (
-                f"MAP({key_type}, {value_type})"
-                if key_type and value_type
-                else "OBJECT"
-            )
-        case "null":
-            return "VARCHAR"
-        case "short":
-            return "SMALLINT"
-        case "string" | "char" | "varchar":
-            return "VARCHAR"
-        case "timestamp" | "timestamp_ntz":
-            return "TIMESTAMP"
-        case _:
-            raise ValueError(f"Unsupported Spark type: {t}")
+def cast_scala_map_args_from_given_type(
+    arg_name: str, input_type: Union[snowpark_type.DataType, types_proto.DataType]
+) -> str:
+    """If the input_type is a Map, cast the argument arg_name to a Map[key_type, value_type] in Scala."""
+    is_snowpark_type = isinstance(input_type, snowpark_type.DataType)
+    def convert_from_string_to_type(
+        arg_name: str, t: Union[snowpark_type.DataType, types_proto.DataType]
+    ) -> str:
+        """Convert the string argument arg_name to the specified type t in Scala."""
+        condition = type(t) if is_snowpark_type else t.WhichOneof("kind")
+        match condition:
+            case snowpark_type.BinaryType | "binary":
+                return arg_name + ".getBytes()"
+            case snowpark_type.BooleanType | "boolean":
+                return arg_name + ".toBoolean"
+            case snowpark_type.ByteType | "byte":
+                return arg_name + ".getBytes().head"  # TODO: verify if this is correct
+            case snowpark_type.DateType | "date":
+                return f"java.sql.Date.valueOf({arg_name})"
+            case snowpark_type.DecimalType | "decimal":
+                return f"new BigDecimal({arg_name})"
+            case snowpark_type.DoubleType | "double":
+                return arg_name + ".toDouble"
+            case snowpark_type.FloatType | "float":
+                return arg_name + ".toFloat"
+            case snowpark_type.IntegerType | "integer":
+                return arg_name + ".toInt"
+            case snowpark_type.LongType | "long":
+                return arg_name + ".toLong"
+            case snowpark_type.ShortType | "short":
+                return arg_name + ".toShort"
+            case snowpark_type.StringType | "string" | "char" | "varchar":
+                return arg_name
+            case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
+                return "java.sql.Timestamp.valueOf({arg_name})"
+            case _:
+                raise ValueError(f"Unsupported Snowpark type: {t}")
+    if (is_snowpark_type and isinstance(input_type, snowpark_type.MapType)) or (
+        not is_snowpark_type and input_type.WhichOneof("kind") == "map"
+    ):
+        key_type = input_type.key_type if is_snowpark_type else input_type.map.key_type
+        value_type = (
+            input_type.value_type if is_snowpark_type else input_type.map.value_type
+        )
+        return f"{arg_name}.map {{ case (k, v) => ({convert_from_string_to_type('k', key_type)}, {convert_from_string_to_type('v', value_type)})}}"
+    else:
+        return arg_name

snowflake/snowpark_connect/utils/session.py CHANGED Viewed

@@ -14,6 +14,9 @@ from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
 from snowflake.snowpark_connect.utils.describe_query_cache import (
     instrument_session_for_describe_cache,
 )
+from snowflake.snowpark_connect.utils.external_udxf_cache import (
+    init_external_udxf_cache,
+)
 from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
 from snowflake.snowpark_connect.utils.telemetry import telemetry
 from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
@@ -63,6 +66,7 @@ def configure_snowpark_session(session: snowpark.Session):
     # built-in udf cache
     init_builtin_udf_cache(session)
+    init_external_udxf_cache(session)
     # Set experimental parameters (warnings globally suppressed)
     session.ast_enabled = False

snowpark-connect 0.25.0__py3-none-any.whl → 0.27.0__py3-none-any.whl

snowpark-connect 0.25.0py3-none-any.whl → 0.27.0py3-none-any.whl