PyPI - snowpark-connect - Versions diffs - 0.23.0__py3-none-any.whl → 0.24.0__py3-none-any.whl - Mend

snowpark-connect 0.23.0py3-none-any.whl → 0.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (25) hide show

snowflake/snowpark_connect/expression/function_defaults.py ADDED Viewed

@@ -0,0 +1,207 @@
+#
+# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
+#
+from dataclasses import dataclass
+from typing import Any
+import pyspark.sql.connect.proto.expressions_pb2 as expressions_pb2
+import pyspark.sql.connect.proto.types_pb2 as types_pb2
+@dataclass(frozen=True)
+class DefaultParameter:
+    """Represents a single default parameter for a function."""
+    name: str
+    value: Any
+@dataclass(frozen=True)
+class FunctionDefaults:
+    """Represents default parameter configuration for a function."""
+    total_args: int
+    defaults: list[DefaultParameter]
+# FUNCTION_DEFAULTS dictionary to hold operation name with default values.
+# This is required as non pyspark clients such as scala or sql won't send all the parameters.
+# We use this dict to inject the missing parameters before processing the unresolved function.
+FUNCTION_DEFAULTS: dict[str, FunctionDefaults] = {
+    "aes_decrypt": FunctionDefaults(
+        total_args=5,
+        defaults=[
+            DefaultParameter("mode", "GCM"),  # Spark SQL default: GCM
+            DefaultParameter("padding", "NONE"),  # Spark SQL default: NONE for GCM mode
+            DefaultParameter("aad", ""),  # Spark SQL default: empty string
+        ],
+    ),
+    "aes_encrypt": FunctionDefaults(
+        total_args=6,
+        defaults=[
+            DefaultParameter("mode", "GCM"),  # Spark SQL default: GCM
+            DefaultParameter("padding", "NONE"),  # Spark SQL default: NONE for GCM mode
+            DefaultParameter(
+                "iv", ""
+            ),  # Spark SQL default: empty string (random generated if not provided)
+            DefaultParameter("aad", ""),  # Spark SQL default: empty string
+        ],
+    ),
+    "approx_percentile": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("accuracy", 10000)],
+    ),
+    "bround": FunctionDefaults(
+        total_args=2,
+        defaults=[DefaultParameter("scale", 0)],
+    ),
+    "first": FunctionDefaults(
+        total_args=2,
+        defaults=[DefaultParameter("ignorenulls", False)],
+    ),
+    "lag": FunctionDefaults(
+        total_args=2,
+        defaults=[
+            DefaultParameter("offset", 1),
+        ],
+    ),
+    "last": FunctionDefaults(
+        total_args=2,
+        defaults=[DefaultParameter("ignorenulls", False)],
+    ),
+    "lead": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("offset", 1), DefaultParameter("default", None)],
+    ),
+    "locate": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("pos", 1)],
+    ),
+    "months_between": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("roundOff", True)],
+    ),
+    "nth_value": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("ignoreNulls", False)],
+    ),
+    "overlay": FunctionDefaults(
+        total_args=4,
+        defaults=[DefaultParameter("len", -1)],
+    ),
+    "percentile": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("frequency", 1)],
+    ),
+    "percentile_approx": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("accuracy", 10000)],
+    ),
+    "round": FunctionDefaults(
+        total_args=2,
+        defaults=[DefaultParameter("scale", 0)],
+    ),
+    "sentences": FunctionDefaults(
+        total_args=3,
+        defaults=[
+            DefaultParameter("language", ""),
+            DefaultParameter("country", ""),
+        ],
+    ),
+    "sort_array": FunctionDefaults(
+        total_args=2,
+        defaults=[DefaultParameter("asc", True)],
+    ),
+    "split": FunctionDefaults(
+        total_args=3,
+        defaults=[DefaultParameter("limit", -1)],
+    ),
+    "str_to_map": FunctionDefaults(
+        total_args=3,
+        defaults=[
+            DefaultParameter(
+                "pairDelim", ","
+            ),  # Spark SQL default: comma for splitting pairs
+            DefaultParameter(
+                "keyValueDelim", ":"
+            ),  # Spark SQL default: colon for splitting key/value
+        ],
+    ),
+    "try_aes_decrypt": FunctionDefaults(
+        total_args=5,
+        defaults=[
+            DefaultParameter("mode", "GCM"),  # Spark SQL default: GCM
+            DefaultParameter("padding", "NONE"),  # Spark SQL default: NONE for GCM mode
+            DefaultParameter("aad", ""),  # Spark SQL default: empty string
+        ],
+    ),
+}
+def _create_literal_expression(value: Any) -> expressions_pb2.Expression:
+    """Create a literal expression for the given value."""
+    expr = expressions_pb2.Expression()
+    if isinstance(value, bool):
+        expr.literal.boolean = value
+    elif isinstance(value, int):
+        expr.literal.integer = value
+    elif isinstance(value, str):
+        expr.literal.string = value
+    elif isinstance(value, float):
+        expr.literal.double = value
+    elif value is None:
+        null_type = types_pb2.DataType()
+        null_type.null.SetInParent()
+        expr.literal.null.CopyFrom(null_type)
+    else:
+        raise ValueError(f"Unsupported literal type: {value}")
+    return expr
+def inject_function_defaults(
+    unresolved_function: expressions_pb2.Expression.UnresolvedFunction,
+) -> bool:
+    """
+    Inject missing default parameters into an UnresolvedFunction protobuf.
+    Args:
+        unresolved_function: The protobuf UnresolvedFunction to modify
+    Returns:
+        bool: True if any defaults were injected, False otherwise
+    """
+    function_name = unresolved_function.function_name.lower()
+    if function_name not in FUNCTION_DEFAULTS:
+        return False
+    func_config = FUNCTION_DEFAULTS[function_name]
+    current_arg_count = len(unresolved_function.arguments)
+    total_args = func_config.total_args
+    defaults = func_config.defaults
+    if not defaults or current_arg_count >= total_args:
+        return False
+    # Calculate how many defaults to append
+    missing_arg_count = total_args - current_arg_count
+    # Check if any required params are missing.
+    if missing_arg_count > len(defaults):
+        raise ValueError(
+            f"Function '{function_name}' is missing required arguments. "
+            f"Expected {total_args} args, got {current_arg_count}, "
+            f"but only {len(defaults)} defaults are defined."
+        )
+    defaults_to_append = defaults[-missing_arg_count:]
+    injected = False
+    # Simply append the needed default values
+    for default_param in defaults_to_append:
+        default_expr = _create_literal_expression(default_param.value)
+        unresolved_function.arguments.append(default_expr)
+        injected = True
+    return injected

snowflake/snowpark_connect/expression/literal.py CHANGED Viewed

@@ -81,6 +81,11 @@ def get_literal_field_and_name(literal: expressions_proto.Expression.Literal):
         case "decimal":
             # literal.decimal.precision & scale are ignored, as decimal.Decimal doesn't accept them
             return decimal.Decimal(literal.decimal.value), literal.decimal.value
+        case "array":
+            array_values, element_names = zip(
+                *(get_literal_field_and_name(e) for e in literal.array.elements)
+            )
+            return array_values, f"ARRAY({', '.join(element_names)})"
         case "null" | None:
             return None, "NULL"
         case other:

snowflake/snowpark_connect/expression/map_expression.py CHANGED Viewed

@@ -27,7 +27,10 @@ from snowflake.snowpark_connect.expression.literal import get_literal_field_and_
 from snowflake.snowpark_connect.expression.map_cast import map_cast
 from snowflake.snowpark_connect.expression.map_sql_expression import map_sql_expr
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
-from snowflake.snowpark_connect.type_mapping import map_simple_types
+from snowflake.snowpark_connect.type_mapping import (
+    map_simple_types,
+    proto_to_snowpark_type,
+)
 from snowflake.snowpark_connect.typed_column import TypedColumn
 from snowflake.snowpark_connect.utils.context import (
     gen_sql_plan_id,
@@ -166,6 +169,12 @@ def map_expression(
                     lambda: [map_simple_types(lit_type_str)],
                 )
+            if lit_type_str == "array":
+                result_exp = snowpark_fn.lit(lit_value)
+                element_types = proto_to_snowpark_type(exp.literal.array.element_type)
+                array_type = snowpark.types.ArrayType(element_types)
+                return [lit_name], TypedColumn(result_exp, lambda: [array_type])
             # Decimal needs further processing to get the precision and scale properly.
             if lit_type_str == "decimal":
                 # Precision and scale are optional in the proto.

snowflake/snowpark_connect/expression/map_extension.py CHANGED Viewed

@@ -10,7 +10,10 @@ from snowflake.snowpark.types import BooleanType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
-from snowflake.snowpark_connect.utils.context import push_evaluating_sql_scope
+from snowflake.snowpark_connect.utils.context import (
+    push_evaluating_sql_scope,
+    push_outer_dataframe,
+)
 from snowflake.snowpark_connect.utils.telemetry import (
     SnowparkConnectNotImplementedError,
 )
@@ -52,12 +55,19 @@ def map_extension(
             return [name], typed_col
         case "subquery_expression":
+            from snowflake.snowpark_connect.dataframe_container import (
+                DataFrameContainer,
+            )
             from snowflake.snowpark_connect.expression.map_expression import (
                 map_expression,
             )
             from snowflake.snowpark_connect.relation.map_relation import map_relation
-            with push_evaluating_sql_scope():
+            current_outer_df = DataFrameContainer(
+                dataframe=typer.df, column_map=column_mapping
+            )
+            with push_evaluating_sql_scope(), push_outer_dataframe(current_outer_df):
                 df_container = map_relation(extension.subquery_expression.input)
                 df = df_container.dataframe

snowflake/snowpark_connect/expression/map_unresolved_function.py CHANGED Viewed

@@ -80,6 +80,9 @@ from snowflake.snowpark_connect.constants import (
     SPARK_TZ_ABBREVIATIONS_OVERRIDES,
     STRUCTURED_TYPES_ENABLED,
 )
+from snowflake.snowpark_connect.expression.function_defaults import (
+    inject_function_defaults,
+)
 from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
 from snowflake.snowpark_connect.expression.map_cast import (
     CAST_FUNCTIONS,
@@ -299,6 +302,9 @@ def map_unresolved_function(
     function_name = exp.unresolved_function.function_name.lower()
     is_udtf_call = function_name in session._udtfs
+    # Inject default parameters for functions that need them (especially for Scala clients)
+    inject_function_defaults(exp.unresolved_function)
     def _resolve_args_expressions(exp: expressions_proto.Expression):
         def _resolve_fn_arg(exp):
             with resolving_fun_args():
@@ -3761,7 +3767,7 @@ def map_unresolved_function(
                     snowpark_args[1] <= 0, snowpark_fn.lit("")
                 ).otherwise(snowpark_fn.left(*snowpark_args))
             result_type = StringType()
-        case "length" | "char_length" | "character_length":
+        case "length" | "char_length" | "character_length" | "len":
             if exp.unresolved_function.arguments[0].HasField("literal"):
                 # Only update the name if it has the literal field.
                 # If it doesn't, it means it's binary data.
@@ -3822,13 +3828,7 @@ def map_unresolved_function(
         case "locate":
             substr = unwrap_literal(exp.unresolved_function.arguments[0])
             value = snowpark_args[1]
-            if len(exp.unresolved_function.arguments) == 3:
-                start_pos = unwrap_literal(exp.unresolved_function.arguments[2])
-            else:
-                # start_pos is an optional argument and if not provided we should default to 1.
-                # This path will only be reached by spark connect scala clients.
-                start_pos = 1
-                spark_function_name = f"locate({', '.join(snowpark_arg_names)}, 1)"
+            start_pos = unwrap_literal(exp.unresolved_function.arguments[2])
             if start_pos > 0:
                 result_exp = snowpark_fn.locate(substr, value, start_pos)
@@ -4677,7 +4677,7 @@ def map_unresolved_function(
                     snowpark_args[0],
                 )
             result_type = DateType()
-        case "not":
+        case "not" | "!":
             spark_function_name = f"(NOT {snowpark_arg_names[0]})"
             result_exp = ~snowpark_args[0]
             result_type = BooleanType()
@@ -5253,9 +5253,8 @@ def map_unresolved_function(
             # TODO: Seems like more validation of the arguments is appropriate.
             args = exp.unresolved_function.arguments
             if len(args) > 0:
-                if not (
-                    isinstance(snowpark_typed_args[0].typ, IntegerType)
-                    or isinstance(snowpark_typed_args[0].typ, NullType)
+                if not isinstance(
+                    snowpark_typed_args[0].typ, (IntegerType, LongType, NullType)
                 ):
                     raise AnalysisException(
                         f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INT" or "BIGINT") type, however {snowpark_arg_names[0]} has the type "{snowpark_typed_args[0].typ}"""

snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#

snowpark-connect 0.23.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.23.0py3-none-any.whl → 0.24.0py3-none-any.whl