PyPI - snowpark-connect - Versions diffs - 0.20.2__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

snowpark-connect 0.20.2py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (67) hide show

snowflake/snowpark_connect/expression/map_unresolved_function.py CHANGED Viewed

@@ -69,7 +69,10 @@ from snowflake.snowpark_connect.column_name_handler import (
     ColumnNameMap,
     set_schema_getter,
 )
-from snowflake.snowpark_connect.config import global_config
+from snowflake.snowpark_connect.config import (
+    get_boolean_session_config_param,
+    global_config,
+)
 from snowflake.snowpark_connect.constants import (
     DUPLICATE_KEY_FOUND_ERROR_TEMPLATE,
     SPARK_TZ_ABBREVIATIONS_OVERRIDES,
@@ -100,6 +103,7 @@ from snowflake.snowpark_connect.typed_column import (
 )
 from snowflake.snowpark_connect.utils.context import (
     add_sql_aggregate_function,
+    get_current_grouping_columns,
     get_is_aggregate_function,
     get_is_evaluating_sql,
     get_is_in_udtf_context,
@@ -341,6 +345,9 @@ def map_unresolved_function(
     )
     spark_col_names = []
     spark_sql_ansi_enabled = global_config.spark_sql_ansi_enabled
+    spark_sql_legacy_allow_hash_on_map_type = (
+        global_config.spark_sql_legacy_allowHashOnMapType
+    )
     function_name = exp.unresolved_function.function_name.lower()
     telemetry.report_function_usage(function_name)
@@ -867,14 +874,30 @@ def map_unresolved_function(
                     )
         case "approx_percentile" | "percentile_approx":
             # SNOW-1955784: Support accuracy parameter
+            # Use percentile_disc to return actual values from dataset (matches PySpark behavior)
-            # Even though the Spark function accepts a Column for percentage, it will fail unless it's a literal.
-            # Therefore, we can do error checking right here.
-            def _check_percentage(exp: expressions_proto.Expression) -> Column:
-                perc = unwrap_literal(exp)
-                if not 0.0 <= perc <= 1.0:
+            def _pyspark_approx_percentile(
+                column: Column, percentage: float, original_type: DataType
+            ) -> Column:
+                """
+                PySpark-compatible percentile that returns actual values from dataset.
+                - PySpark's approx_percentile returns the "smallest value in the ordered col values
+                  such that no more than percentage of col values is less than or equal to that value"
+                - This means it MUST return an actual value from the original dataset
+                - Snowflake's approx_percentile() may interpolate between values, breaking compatibility
+                - percentile_disc() returns discrete values (actual dataset values), matching PySpark
+                """
+                # Even though the Spark function accepts a Column for percentage, it will fail unless it's a literal.
+                # Therefore, we can do error checking right here.
+                if not 0.0 <= percentage <= 1.0:
                     raise AnalysisException("percentage must be between [0.0, 1.0]")
-                return snowpark_fn.lit(perc)
+                result = snowpark_fn.function("percentile_disc")(
+                    snowpark_fn.lit(percentage)
+                ).within_group(column)
+                return snowpark_fn.cast(result, original_type)
+            column_type = snowpark_typed_args[0].typ
             if isinstance(snowpark_typed_args[1].typ, ArrayType):
                 # Snowpark doesn't accept a list of percentile values.
@@ -882,26 +905,26 @@ def map_unresolved_function(
                 array_func = exp.unresolved_function.arguments[1].unresolved_function
                 assert array_func.function_name == "array", array_func
-                result_exp = snowpark_fn.array_construct(
-                    *[
-                        snowpark_fn.approx_percentile(
-                            snowpark_args[0], _check_percentage(arg)
-                        )
-                        for arg in array_func.arguments
-                    ]
-                )
+                percentile_results = [
+                    _pyspark_approx_percentile(
+                        snowpark_args[0], unwrap_literal(arg), column_type
+                    )
+                    for arg in array_func.arguments
+                ]
+                result_type = ArrayType(element_type=column_type, contains_null=False)
                 result_exp = snowpark_fn.cast(
-                    result_exp,
-                    ArrayType(element_type=DoubleType(), contains_null=False),
+                    snowpark_fn.array_construct(*percentile_results),
+                    result_type,
                 )
-                result_type = ArrayType(element_type=DoubleType(), contains_null=False)
             else:
+                # Handle single percentile
+                percentage = unwrap_literal(exp.unresolved_function.arguments[1])
                 result_exp = TypedColumn(
-                    snowpark_fn.approx_percentile(
-                        snowpark_args[0],
-                        _check_percentage(exp.unresolved_function.arguments[1]),
+                    _pyspark_approx_percentile(
+                        snowpark_args[0], percentage, column_type
                     ),
-                    lambda: [DoubleType()],
+                    lambda: [column_type],
                 )
         case "array":
             if len(snowpark_args) == 0:
@@ -2073,14 +2096,22 @@ def map_unresolved_function(
             assert (
                 len(exp.unresolved_function.arguments) == 2
             ), "date_format takes 2 arguments"
-            result_exp = snowpark_fn.date_format(
-                snowpark_args[0],
-                snowpark_fn.lit(
-                    map_spark_timestamp_format_expression(
-                        exp.unresolved_function.arguments[1], snowpark_typed_args[0].typ
-                    )
-                ),
-            )
+            # Check if format parameter is NULL
+            format_literal = unwrap_literal(exp.unresolved_function.arguments[1])
+            if format_literal is None:
+                # If format is NULL, return NULL for all rows
+                result_exp = snowpark_fn.lit(None)
+            else:
+                result_exp = snowpark_fn.date_format(
+                    snowpark_args[0],
+                    snowpark_fn.lit(
+                        map_spark_timestamp_format_expression(
+                            exp.unresolved_function.arguments[1],
+                            snowpark_typed_args[0].typ,
+                        )
+                    ),
+                )
             result_exp = TypedColumn(result_exp, lambda: [StringType()])
         case "date_from_unix_date":
             result_exp = snowpark_fn.date_add(
@@ -2535,6 +2566,19 @@ def map_unresolved_function(
                 input_types=[StringType(), StringType(), StructType()],
             )
             def _from_csv(csv_data: str, schema: str, options: Optional[dict]):
+                if csv_data is None:
+                    return None
+                if csv_data == "":
+                    # Return dict with None values for empty string
+                    schemas = schema.split(",")
+                    results = {}
+                    for sc in schemas:
+                        parts = [i for i in sc.split(" ") if len(i) != 0]
+                        assert len(parts) == 2, f"{sc} is not a valid schema"
+                        results[parts[0]] = None
+                    return results
                 max_chars_per_column = -1
                 sep = ","
@@ -2617,7 +2661,9 @@ def map_unresolved_function(
                 case _:
                     raise ValueError("Unrecognized from_csv parameters")
-            result_exp = snowpark_fn.cast(csv_result, ddl_schema)
+            result_exp = snowpark_fn.when(
+                snowpark_args[0].is_null(), snowpark_fn.lit(None)
+            ).otherwise(snowpark_fn.cast(csv_result, ddl_schema))
             result_type = ddl_schema
         case "from_json":
             # TODO: support options.
@@ -2651,6 +2697,9 @@ def map_unresolved_function(
             # try to parse first, since spark returns null for invalid json
             result_exp = snowpark_fn.call_function("try_parse_json", snowpark_args[0])
+            # Check if the original input is NULL - if so, return NULL for the entire result
+            original_input_is_null = snowpark_args[0].is_null()
             # helper function to make sure we have the expected array element type
             def _element_type_matches(
                 array_exp: Column, element_type: DataType
@@ -2749,9 +2798,13 @@ def map_unresolved_function(
                 else:
                     return exp
-            result_exp = snowpark_fn.cast(
-                _coerce_to_type(result_exp, result_type), result_type
-            )
+            # Apply the coercion to handle invalid JSON (creates struct with NULL fields)
+            coerced_exp = _coerce_to_type(result_exp, result_type)
+            # If the original input was NULL, return NULL instead of a struct
+            result_exp = snowpark_fn.when(
+                original_input_is_null, snowpark_fn.lit(None)
+            ).otherwise(snowpark_fn.cast(coerced_exp, result_type))
         case "from_unixtime":
             def raise_analysis_exception(
@@ -2896,10 +2949,53 @@ def map_unresolved_function(
             )
         case "grouping" | "grouping_id":
             # grouping_id is not an alias for grouping in PySpark, but Snowflake's implementation handles both
-            result_exp = snowpark_fn.grouping(*snowpark_args)
+            current_grouping_cols = get_current_grouping_columns()
+            if function_name == "grouping_id":
+                if not snowpark_args:
+                    # grouping_id() with empty args means use all grouping columns
+                    spark_function_name = "grouping_id()"
+                    snowpark_args = [
+                        column_mapping.get_snowpark_column_name_from_spark_column_name(
+                            spark_col
+                        )
+                        for spark_col in current_grouping_cols
+                    ]
+                else:
+                    # Verify that grouping arguments match current grouping columns
+                    spark_col_args = [
+                        column_mapping.get_spark_column_name_from_snowpark_column_name(
+                            sp_col.getName()
+                        )
+                        for sp_col in snowpark_args
+                    ]
+                    if current_grouping_cols != spark_col_args:
+                        raise AnalysisException(
+                            f"[GROUPING_ID_COLUMN_MISMATCH] Columns of grouping_id: {spark_col_args} doesnt match "
+                            f"Grouping columns: {current_grouping_cols}"
+                        )
+            if function_name == "grouping_id":
+                result_exp = snowpark_fn.grouping_id(*snowpark_args)
+            else:
+                result_exp = snowpark_fn.grouping(*snowpark_args)
             result_type = LongType()
         case "hash":
             # TODO: See the spark-compatibility-issues.md explanation, this is quite different from Spark.
+            # MapType columns as input should raise an exception as they are not hashable.
+            snowflake_compat = get_boolean_session_config_param(
+                "enable_snowflake_extension_behavior"
+            )
+            # Snowflake's hash function does allow MAP types, but Spark does not. Therefore, if we have the expansion flag enabled
+            # we want to let it pass through and hash MAP types.
+            # Also allow if the legacy config spark.sql.legacy.allowHashOnMapType is set to true
+            if not snowflake_compat and not spark_sql_legacy_allow_hash_on_map_type:
+                for arg in snowpark_typed_args:
+                    if any(isinstance(t, MapType) for t in arg.types):
+                        raise AnalysisException(
+                            '[DATATYPE_MISMATCH.HASH_MAP_TYPE] Cannot resolve "hash(value)" due to data type mismatch: '
+                            'Input to the function `hash` cannot contain elements of the "MAP" type. '
+                            'In Spark, same maps may have different hashcode, thus hash expressions are prohibited on "MAP" elements. '
+                            'To restore previous behavior set "spark.sql.legacy.allowHashOnMapType" to "true".'
+                        )
             result_exp = snowpark_fn.hash(*snowpark_args)
             result_type = LongType()
         case "hex":
@@ -2934,6 +3030,14 @@ def map_unresolved_function(
             result_type = StringType()
         case "histogram_numeric":
             aggregate_input_typ = snowpark_typed_args[0].typ
+            if isinstance(aggregate_input_typ, DecimalType):
+                # mimic bug from Spark 3.5.3.
+                # In 3.5.5 it's fixed and this exception shouldn't be thrown
+                raise ValueError(
+                    "class org.apache.spark.sql.types.Decimal cannot be cast to class java.lang.Number (org.apache.spark.sql.types.Decimal is in unnamed module of loader 'app'; java.lang.Number is in module java.base of loader 'bootstrap')"
+                )
             histogram_return_type = ArrayType(
                 StructType(
                     [
@@ -3154,6 +3258,18 @@ def map_unresolved_function(
             )
             result_type = histogram_return_type
         case "hll_sketch_agg":
+            # check if input type is correct
+            if type(snowpark_typed_args[0].typ) not in [
+                IntegerType,
+                LongType,
+                StringType,
+                BinaryType,
+            ]:
+                type_str = snowpark_typed_args[0].typ.simpleString().upper()
+                raise AnalysisException(
+                    f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INT" or "BIGINT" or "STRING" or "BINARY") type, however "{snowpark_arg_names[0]}" has the type "{type_str}".'
+                )
             match snowpark_args:
                 case [sketch]:
                     spark_function_name = (
@@ -3796,12 +3912,47 @@ def map_unresolved_function(
                 )
             result_type = StringType()
-        case "ltrim":
+        case "ltrim" | "rtrim":
+            function_name_argument = (
+                "TRAILING" if function_name == "rtrim" else "LEADING"
+            )
             if len(snowpark_args) == 2:
                 # Only possible using SQL
-                spark_function_name = f"TRIM(LEADING {snowpark_arg_names[1]} FROM {snowpark_arg_names[0]})"
+                spark_function_name = f"TRIM({function_name_argument} {snowpark_arg_names[1]} FROM {snowpark_arg_names[0]})"
             result_exp = snowpark_fn.ltrim(*snowpark_args)
             result_type = StringType()
+            if isinstance(snowpark_typed_args[0].typ, BinaryType):
+                argument_name = snowpark_arg_names[0]
+                if exp.unresolved_function.arguments[0].HasField("literal"):
+                    argument_name = f"""X'{exp.unresolved_function.arguments[0].literal.binary.hex()}'"""
+                if len(snowpark_args) == 1:
+                    spark_function_name = f"{function_name}({argument_name})"
+                    trim_value = snowpark_fn.lit(b"\x20")
+                if len(snowpark_args) == 2:
+                    # Only possible using SQL
+                    trim_arg = snowpark_arg_names[1]
+                    if isinstance(
+                        snowpark_typed_args[1].typ, BinaryType
+                    ) and exp.unresolved_function.arguments[1].HasField("literal"):
+                        trim_arg = f"""X'{exp.unresolved_function.arguments[1].literal.binary.hex()}'"""
+                        trim_value = snowpark_args[1]
+                    else:
+                        trim_value = snowpark_fn.lit(None)
+                    function_name_argument = (
+                        "TRAILING" if function_name == "rtrim" else "LEADING"
+                    )
+                    spark_function_name = f"TRIM({function_name_argument} {trim_arg} FROM {argument_name})"
+                result_exp = _trim_helper(
+                    snowpark_args[0], trim_value, snowpark_fn.lit(function_name)
+                )
+                result_type = BinaryType()
+            else:
+                if function_name == "ltrim":
+                    result_exp = snowpark_fn.ltrim(*snowpark_args)
+                    result_type = StringType()
+                elif function_name == "rtrim":
+                    result_exp = snowpark_fn.rtrim(*snowpark_args)
+                    result_type = StringType()
         case "make_date":
             y = snowpark_args[0].cast(LongType())
             m = snowpark_args[1].cast(LongType())
@@ -4258,6 +4409,17 @@ def map_unresolved_function(
                 lambda: snowpark_typed_args[0].types,
             )
         case "md5":
+            snowflake_compat = get_boolean_session_config_param(
+                "enable_snowflake_extension_behavior"
+            )
+            # MD5 in Spark only accepts BinaryType or types that can be implicitly cast to it (StringType)
+            if not snowflake_compat:
+                if not isinstance(snowpark_typed_args[0].typ, (BinaryType, StringType)):
+                    raise AnalysisException(
+                        f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "md5({snowpark_arg_names[0]})" due to data type mismatch: '
+                        f'Parameter 1 requires the "BINARY" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
+                    )
             result_exp = snowpark_fn.md5(snowpark_args[0])
             result_type = StringType(32)
         case "median":
@@ -5466,13 +5628,28 @@ def map_unresolved_function(
         case "row_number":
             result_exp = snowpark_fn.row_number()
             result_exp = TypedColumn(result_exp, lambda: [LongType()])
-        case "rtrim":
-            if len(snowpark_args) == 2:
-                # Only possible using SQL
-                spark_function_name = f"TRIM(TRAILING {snowpark_arg_names[1]} FROM {snowpark_arg_names[0]})"
-            result_exp = snowpark_fn.rtrim(*snowpark_args)
-            result_type = StringType()
         case "schema_of_csv":
+            # Validate that the input is a foldable STRING expression
+            if (
+                exp.unresolved_function.arguments[0].WhichOneof("expr_type")
+                != "literal"
+            ):
+                raise AnalysisException(
+                    "[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "
+                    f'"schema_of_csv({snowpark_arg_names[0]})" due to data type mismatch: '
+                    'the input csv should be a foldable "STRING" expression; however, '
+                    f'got "{snowpark_arg_names[0]}".'
+                )
+            if isinstance(snowpark_typed_args[0].typ, StringType):
+                if exp.unresolved_function.arguments[0].literal.string == "":
+                    raise AnalysisException(
+                        "[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "
+                        f'"schema_of_csv({snowpark_arg_names[0]})" due to data type mismatch: '
+                        'the input csv should be a foldable "STRING" expression; however, '
+                        f'got "{snowpark_arg_names[0]}".'
+                    )
             snowpark_args = [
                 typed_arg.column(to_semi_structure=True)
                 for typed_arg in snowpark_typed_args
@@ -5689,6 +5866,16 @@ def map_unresolved_function(
             )
             result_type = ArrayType(ArrayType(StringType()))
         case "sequence":
+            if snowpark_typed_args[0].typ != snowpark_typed_args[1].typ or (
+                not isinstance(snowpark_typed_args[0].typ, _IntegralType)
+                or not isinstance(snowpark_typed_args[1].typ, _IntegralType)
+            ):
+                raise AnalysisException(
+                    f"""[DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES] Cannot resolve "sequence({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: `sequence` uses the wrong parameter type. The parameter type must conform to:
+                        1. The start and stop expressions must resolve to the same type.
+                        2. Otherwise, if start and stop expressions resolve to the "INTEGRAL" type, then the step expression must resolve to the same type.
+                    """
+                )
             result_exp = snowpark_fn.cast(
                 snowpark_fn.sequence(*snowpark_args),
                 ArrayType(LongType(), contains_null=False),
@@ -6274,6 +6461,10 @@ def map_unresolved_function(
             )
             result_type = TimestampType(snowpark.types.TimestampTimeZone.NTZ)
         case "timestamp_millis":
+            if not isinstance(snowpark_typed_args[0].typ, _IntegralType):
+                raise AnalysisException(
+                    f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "timestamp_millis({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "INTEGRAL" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
+                )
             result_exp = snowpark_fn.cast(
                 snowpark_fn.to_timestamp(snowpark_args[0] * 1_000, 6),
                 TimestampType(snowpark.types.TimestampTimeZone.NTZ),
@@ -6283,6 +6474,10 @@ def map_unresolved_function(
             # Spark allows seconds to be fractional. Snowflake does not allow that
             # even though the documentation explicitly says that it does.
             # As a workaround, use integer milliseconds instead of fractional seconds.
+            if not isinstance(snowpark_typed_args[0].typ, _NumericType):
+                raise AnalysisException(
+                    f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "NUMERIC" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
+                )
             result_exp = snowpark_fn.cast(
                 snowpark_fn.to_timestamp(
                     snowpark_fn.cast(snowpark_args[0] * 1_000_000, LongType()), 6
@@ -7116,6 +7311,12 @@ def map_unresolved_function(
                     )
                 )
             )
+            raise_fn = _raise_error_udf_helper(BinaryType())
+            result_exp = (
+                snowpark_fn.when(unbase_arg.is_null(), snowpark_fn.lit(None))
+                .when(result_exp.is_null(), raise_fn(snowpark_fn.lit("Invalid input")))
+                .otherwise(result_exp)
+            )
             result_type = BinaryType()
         case "unhex":
             # Non string columns, convert them to string type. This mimics pyspark behavior.
@@ -7316,6 +7517,15 @@ def map_unresolved_function(
                 )
             result_type = LongType()
         case "when" | "if":
+            # Validate that the condition is a boolean expression
+            if len(snowpark_typed_args) > 0:
+                condition_type = snowpark_typed_args[0].typ
+                if not isinstance(condition_type, BooleanType):
+                    raise AnalysisException(
+                        f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve CASE WHEN condition due to data type mismatch: "
+                        f"Parameter 1 requires the 'BOOLEAN' type, however got '{condition_type}'"
+                    )
             name_components = ["CASE"]
             name_components.append("WHEN")
             name_components.append(snowpark_arg_names[0])
@@ -7334,6 +7544,13 @@ def map_unresolved_function(
                     name_components.append(snowpark_arg_names[i])
                     name_components.append("THEN")
                     name_components.append(snowpark_arg_names[i + 1])
+                    # Validate each WHEN condition
+                    condition_type = snowpark_typed_args[i].typ
+                    if not isinstance(condition_type, BooleanType):
+                        raise AnalysisException(
+                            f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve CASE WHEN condition due to data type mismatch: "
+                            f"Parameter {i + 1} requires the 'BOOLEAN' type, however got '{condition_type}'"
+                        )
                     result_exp = result_exp.when(snowpark_args[i], snowpark_args[i + 1])
                     result_type_indexes.append(i + 1)
             name_components.append("END")
@@ -9448,3 +9665,22 @@ def _validate_number_format_string(format_str: str) -> None:
         raise AnalysisException(
             f"[INVALID_FORMAT.WRONG_NUM_DIGIT] The format is invalid: '{format_str}'. The format string requires at least one number digit."
         )
+def _trim_helper(value: Column, trim_value: Column, trim_type: Column) -> Column:
+    @cached_udf(
+        return_type=BinaryType(),
+        input_types=[BinaryType(), BinaryType(), StringType()],
+    )
+    def _binary_trim_udf(value: bytes, trim_value: bytes, trim_type: str) -> bytes:
+        if value is None or trim_value is None:
+            return value
+        if trim_type in ("rtrim", "btrim", "trim"):
+            while value.endswith(trim_value):
+                value = value[: -len(trim_value)]
+        if trim_type in ("ltrim", "btrim", "trim"):
+            while value.startswith(trim_value):
+                value = value[len(trim_value) :]
+        return value
+    return _binary_trim_udf(value, trim_value, trim_type)

snowflake/snowpark_connect/expression/map_unresolved_star.py CHANGED Viewed

@@ -13,10 +13,10 @@ from snowflake.snowpark.types import StructType
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
-from snowflake.snowpark_connect.utils.attribute_handling import (
+from snowflake.snowpark_connect.utils.context import get_outer_dataframes
+from snowflake.snowpark_connect.utils.identifiers import (
     split_fully_qualified_spark_name,
 )
-from snowflake.snowpark_connect.utils.context import get_outer_dataframes
 def check_struct_and_get_field_datatype(field_name, schema):
@@ -66,8 +66,8 @@ def map_unresolved_star(
         )
         if len(spark_names) == 0:
-            for outer_df in get_outer_dataframes():
-                column_mapping_for_outer_df = outer_df._column_map
+            for outer_df_container in get_outer_dataframes():
+                column_mapping_for_outer_df = outer_df_container.column_map
                 (
                     spark_names,
                     snowpark_names,
@@ -106,8 +106,8 @@ def map_unresolved_star(
                 )
             )
             if prefix_candidate is None:
-                for outer_df in get_outer_dataframes():
-                    prefix_candidate = outer_df._column_map.get_snowpark_column_name_from_spark_column_name(
+                for outer_df_container in get_outer_dataframes():
+                    prefix_candidate = outer_df_container.column_map.get_snowpark_column_name_from_spark_column_name(
                         prefix_candidate_str, allow_non_exists=True
                     )
                     if prefix_candidate is not None:
@@ -184,8 +184,8 @@ def map_unresolved_star_struct(
             )
         )
         if prefix_candidate is None:
-            for outer_df in get_outer_dataframes():
-                prefix_candidate = outer_df._column_map.get_snowpark_column_name_from_spark_column_name(
+            for outer_df_container in get_outer_dataframes():
+                prefix_candidate = outer_df_container.column_map.get_snowpark_column_name_from_spark_column_name(
                     prefix_candidate_str, allow_non_exists=True
                 )
                 if prefix_candidate is not None:

snowflake/snowpark_connect/expression/map_update_fields.py CHANGED Viewed

@@ -10,7 +10,7 @@ from snowflake.snowpark.types import DataType, StringType, StructField, StructTy
 from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
 from snowflake.snowpark_connect.expression.typer import ExpressionTyper
 from snowflake.snowpark_connect.typed_column import TypedColumn
-from snowflake.snowpark_connect.utils.attribute_handling import (
+from snowflake.snowpark_connect.utils.identifiers import (
     split_fully_qualified_spark_name,
 )

snowflake/snowpark_connect/expression/typer.py CHANGED Viewed

@@ -29,7 +29,7 @@ class ExpressionTyper:
         types = self._try_to_type_attribute_or_literal(self.df, column)
         if not types and get_df_before_projection():
             types = self._try_to_type_attribute_or_literal(
-                get_df_before_projection(), column
+                get_df_before_projection().dataframe, column
             )
         if not types:
             # df.select().schema results in DESCRIBE call to Snowflake, so avoid it if possible
@@ -42,17 +42,17 @@ class ExpressionTyper:
         try:
             return self._get_df_datatypes(df, column)
         except SnowparkClientException:  # Fallback to the df before projection
-            df = get_df_before_projection()
-            if df is None:
+            df_container = get_df_before_projection()
+            if df_container is None:
                 raise
-            df = self._join_df_with_outer_dataframes(df)
+            df = self._join_df_with_outer_dataframes(df_container.dataframe)
             return self._get_df_datatypes(df, column)
     @staticmethod
     def _join_df_with_outer_dataframes(df: DataFrame) -> DataFrame:
-        for outer_df in get_outer_dataframes():
-            df = df.join(outer_df)
+        for outer_df_container in get_outer_dataframes():
+            df = df.join(outer_df_container.dataframe)
         return df

snowflake/snowpark_connect/proto/control_pb2.py CHANGED Viewed

@@ -1,11 +1,12 @@
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: control.proto
+# Protobuf Python Version: 4.25.1
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 # @@protoc_insertion_point(imports)
 _sym_db = _symbol_database.Default()
@@ -15,21 +16,21 @@ _sym_db = _symbol_database.Default()
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rcontrol.proto\x12\rsnowflake.ses\"*\n\x06\x43onfig\x12\x14\n\x07log_ast\x18\x01 \x01(\x08H\x00\x88\x01\x01\x42\n\n\x08_log_ast\"\x1e\n\x0bPingRequest\x12\x0f\n\x07payload\x18\x01 \x01(\t\"\x1f\n\x0cPingResponse\x12\x0f\n\x07payload\x18\x01 \x01(\t\"+\n\x14GetRequestAstRequest\x12\x13\n\x0b\x66orce_flush\x18\x01 \x01(\x08\"M\n\x15GetRequestAstResponse\x12\x16\n\x0espark_requests\x18\x01 \x03(\x0c\x12\x1c\n\x14snowpark_ast_batches\x18\x02 \x03(\t2\xe8\x01\n\x0e\x43ontrolService\x12\x39\n\tConfigure\x12\x15.snowflake.ses.Config\x1a\x15.snowflake.ses.Config\x12?\n\x04Ping\x12\x1a.snowflake.ses.PingRequest\x1a\x1b.snowflake.ses.PingResponse\x12Z\n\rGetRequestAst\x12#.snowflake.ses.GetRequestAstRequest\x1a$.snowflake.ses.GetRequestAstResponseb\x06proto3')
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'control_pb2', globals())
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'control_pb2', _globals)
 if _descriptor._USE_C_DESCRIPTORS == False:
   DESCRIPTOR._options = None
-  _CONFIG._serialized_start=32
-  _CONFIG._serialized_end=74
-  _PINGREQUEST._serialized_start=76
-  _PINGREQUEST._serialized_end=106
-  _PINGRESPONSE._serialized_start=108
-  _PINGRESPONSE._serialized_end=139
-  _GETREQUESTASTREQUEST._serialized_start=141
-  _GETREQUESTASTREQUEST._serialized_end=184
-  _GETREQUESTASTRESPONSE._serialized_start=186
-  _GETREQUESTASTRESPONSE._serialized_end=263
-  _CONTROLSERVICE._serialized_start=266
-  _CONTROLSERVICE._serialized_end=498
+  _globals['_CONFIG']._serialized_start=32
+  _globals['_CONFIG']._serialized_end=74
+  _globals['_PINGREQUEST']._serialized_start=76
+  _globals['_PINGREQUEST']._serialized_end=106
+  _globals['_PINGRESPONSE']._serialized_start=108
+  _globals['_PINGRESPONSE']._serialized_end=139
+  _globals['_GETREQUESTASTREQUEST']._serialized_start=141
+  _globals['_GETREQUESTASTREQUEST']._serialized_end=184
+  _globals['_GETREQUESTASTRESPONSE']._serialized_start=186
+  _globals['_GETREQUESTASTRESPONSE']._serialized_end=263
+  _globals['_CONTROLSERVICE']._serialized_start=266
+  _globals['_CONTROLSERVICE']._serialized_end=498
 # @@protoc_insertion_point(module_scope)

snowpark-connect 0.20.2__py3-none-any.whl → 0.21.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.20.2py3-none-any.whl → 0.21.0py3-none-any.whl