PyPI - snowpark-connect - Versions diffs - 0.32.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

snowpark-connect 0.32.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (106) hide show

snowflake/snowpark_connect/resources_initializer.py CHANGED Viewed

@@ -1,7 +1,6 @@
 #
 # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
 #
-import pathlib
 import threading
 import time
@@ -51,11 +50,9 @@ def initialize_resources() -> None:
         """Upload Spark jar files required for creating Scala UDFs."""
         stage = session.get_session_stage()
         resource_path = stage + RESOURCE_PATH
-        import snowflake
+        import snowpark_connect_deps_1
+        import snowpark_connect_deps_2
-        pyspark_jars = (
-            pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
-        )
         jar_files = [
             f"spark-sql_2.12-{SPARK_VERSION}.jar",
             f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
@@ -64,14 +61,29 @@ def initialize_resources() -> None:
             "json4s-ast_2.12-3.7.0-M11.jar",
         ]
-        for jar in jar_files:
-            session.file.put(
-                str(pyspark_jars) + "/" + jar,
-                resource_path,
-                auto_compress=False,
-                overwrite=False,
-                source_compression="NONE",
-            )
+        for jar_name in jar_files:
+            # Try to find the JAR in package 1 first, then package 2
+            jar_path = None
+            try:
+                jar_path = snowpark_connect_deps_1.get_jar_path(jar_name)
+            except FileNotFoundError:
+                try:
+                    jar_path = snowpark_connect_deps_2.get_jar_path(jar_name)
+                except FileNotFoundError:
+                    raise FileNotFoundError(
+                        f"JAR {jar_name} not found in either package"
+                    )
+            try:
+                session.file.put(
+                    str(jar_path),
+                    resource_path,
+                    auto_compress=False,
+                    overwrite=False,
+                    source_compression="NONE",
+                )
+            except Exception as e:
+                raise RuntimeError(f"Failed to upload JAR {jar_name}: {e}")
     start_time = time.time()

snowflake/snowpark_connect/server.py CHANGED Viewed

@@ -24,12 +24,10 @@
 import atexit
 import logging
 import os
-import pathlib
 import socket
 import tempfile
 import threading
 import urllib.parse
-import zipfile
 from concurrent import futures
 from typing import Any, Callable, Dict, List, Optional, Tuple
@@ -48,7 +46,6 @@ from pyspark.errors import PySparkValueError
 from pyspark.sql.connect.client.core import ChannelBuilder
 from pyspark.sql.connect.session import SparkSession
-import snowflake.snowpark_connect
 import snowflake.snowpark_connect.proto.control_pb2_grpc as control_grpc
 import snowflake.snowpark_connect.tcm as tcm
 from snowflake import snowpark
@@ -1032,28 +1029,16 @@ def start_jvm():
         attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
         raise exception
-    pyspark_jars = (
-        pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
-    )
-    if "dataframe_processor.zip" in str(pyspark_jars):
-        # importlib.resource doesn't work when local stage package is used in TCM
-        zip_path = pathlib.Path(
-            snowflake.snowpark_connect.__file__
-        ).parent.parent.parent
-        temp_dir = tempfile.gettempdir()
-        extract_folder = "snowflake/snowpark_connect/includes/jars/"  # Folder to extract (must end with '/')
+    # Import both JAR dependency packages
+    import snowpark_connect_deps_1
+    import snowpark_connect_deps_2
-        with zipfile.ZipFile(zip_path, "r") as zip_ref:
-            for member in zip_ref.namelist():
-                if member.startswith(extract_folder):
-                    zip_ref.extract(member, path=temp_dir)
-        pyspark_jars = pathlib.Path(temp_dir) / extract_folder
-    for path in pyspark_jars.glob("**/*.jar"):
-        jpype.addClassPath(path)
+    # Load all the jar files from both packages
+    jar_path_list = (
+        snowpark_connect_deps_1.list_jars() + snowpark_connect_deps_2.list_jars()
+    )
+    for jar_path in jar_path_list:
+        jpype.addClassPath(jar_path)
     # TODO: Should remove convertStrings, but it breaks the JDBC code.
     jvm_settings: list[str] = list(

snowflake/snowpark_connect/type_mapping.py CHANGED Viewed

@@ -373,6 +373,8 @@ def cast_to_match_snowpark_type(
             return str(content)
         case snowpark.types.DayTimeIntervalType:
             return str(content)
+        case snowpark.types.MapType:
+            return content
         case _:
             exception = SnowparkConnectNotImplementedError(
                 f"Unsupported snowpark data type in casting: {data_type}"

snowflake/snowpark_connect/typed_column.py CHANGED Viewed

@@ -49,7 +49,7 @@ class TypedColumn:
         self.qualifiers = qualifiers
     def get_qualifiers(self) -> set[ColumnQualifier]:
-        return getattr(self, "qualifiers", {ColumnQualifier.no_qualifier()})
+        return getattr(self, "qualifiers", set())
     def set_catalog_database_info(self, catalog_database_info: dict[str, str]) -> None:
         self._catalog_database_info = catalog_database_info
@@ -70,7 +70,7 @@ class TypedColumn:
     def get_multi_col_qualifiers(self, num_columns) -> list[set[ColumnQualifier]]:
         if not hasattr(self, "multi_col_qualifiers"):
-            return [{ColumnQualifier.no_qualifier()} for i in range(num_columns)]
+            return [set() for i in range(num_columns)]
         assert (
             len(self.multi_col_qualifiers) == num_columns
         ), f"Expected {num_columns} multi-column qualifiers, got {len(self.multi_col_qualifiers)}"

snowflake/snowpark_connect/utils/context.py CHANGED Viewed

@@ -55,7 +55,6 @@ _resolving_lambda_fun = ContextVar[bool]("_resolving_lambdas", default=False)
 _current_lambda_params = ContextVar[list[str]]("_current_lambda_params", default=[])
 _is_window_enabled = ContextVar[bool]("_is_window_enabled", default=False)
-_is_in_pivot = ContextVar[bool]("_is_in_pivot", default=False)
 _is_in_udtf_context = ContextVar[bool]("_is_in_udtf_context", default=False)
 _accessing_temp_object = ContextVar[bool]("_accessing_temp_object", default=False)
@@ -467,19 +466,6 @@ def is_window_enabled():
     return _is_window_enabled.get()
-@contextmanager
-def temporary_pivot_expression(value: bool):
-    token = _is_in_pivot.set(value)
-    try:
-        yield
-    finally:
-        _is_in_pivot.reset(token)
-def is_in_pivot() -> bool:
-    return _is_in_pivot.get()
 def get_is_in_udtf_context() -> bool:
     """
     Gets the value of _is_in_udtf_context for the current context, defaults to False.

snowflake/snowpark_connect/utils/expression_transformer.py ADDED Viewed

@@ -0,0 +1,163 @@
+#
+# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
+#
+from snowflake.snowpark import Column, functions as snowpark_fn
+from snowflake.snowpark._internal.analyzer.expression import (
+    CaseWhen,
+    Expression,
+    FunctionExpression,
+    SnowflakeUDF,
+)
+_SF_AGGREGATE_FUNCTIONS = [
+    "any_value",
+    "avg",
+    "corr",
+    "count",
+    "count_if",
+    "covar_pop",
+    "covar_samp",
+    "listagg",
+    "max",
+    "max_by",
+    "median",
+    "min",
+    "min_by",
+    "mode",
+    "percentile_cont",
+    "percentile_disc",
+    "stddev",
+    "stddev_samp",
+    "stddev_pop",
+    "sum",
+    "var_pop",
+    "var_samp",
+    "variance_pop",
+    "variance",
+    "variance_samp",
+    "bitand_agg",
+    "bitor_agg",
+    "bitxor_agg",
+    "booland_agg",
+    "boolor_agg",
+    "boolxor_agg",
+    "hash_agg",
+    "array_agg",
+    "object_agg",
+    "regr_avgx",
+    "regr_avgy",
+    "regr_count",
+    "regr_intercept",
+    "regr_r2",
+    "regr_slope",
+    "regr_sxx",
+    "regr_sxy",
+    "regr_syy",
+    "kurtosis",
+    "skew",
+    "array_union_agg",
+    "array_unique_agg",
+    "bitmap_bit_position",
+    "bitmap_bucket_number",
+    "bitmap_count",
+    "bitmap_construct_agg",
+    "bitmap_or_agg",
+    "approx_count_distinct",
+    "datasketches_hll",
+    "datasketches_hll_accumulate",
+    "datasketches_hll_combine",
+    "datasketches_hll_estimate",
+    "hll",
+    "hll_accumulate",
+    "hll_combine",
+    "hll_estimate",
+    "hll_export",
+    "hll_import",
+    "approximate_jaccard_index",
+    "approximate_similarity",
+    "minhash",
+    "minhash_combine",
+    "approx_top_k",
+    "approx_top_k_accumulate",
+    "approx_top_k_combine",
+    "approx_top_k_estimate",
+    "approx_percentile",
+    "approx_percentile_accumulate",
+    "approx_percentile_combine",
+    "approx_percentile_estimate",
+    "grouping",
+    "grouping_id",
+    "ai_agg",
+    "ai_summarize_agg",
+]
+def _is_agg_function_expression(expression: Expression) -> bool:
+    if (
+        isinstance(expression, FunctionExpression)
+        and expression.pretty_name.lower() in _SF_AGGREGATE_FUNCTIONS
+    ):
+        return True
+    # For PySpark aggregate functions that were mapped using a UDAF, e.g. try_sum
+    if isinstance(expression, SnowflakeUDF) and expression.is_aggregate_function:
+        return True
+    return False
+def _get_child_expressions(expression: Expression) -> list[Expression]:
+    if isinstance(expression, CaseWhen):
+        return expression._child_expressions
+    return expression.children or []
+def inject_condition_to_all_agg_functions(
+    expression: Expression, condition: Column
+) -> None:
+    """
+    Recursively traverses an expression tree and wraps all aggregate function arguments with a CASE WHEN condition.
+    Args:
+        expression: The Snowpark expression tree to traverse and modify.
+        condition: The Column condition to inject into aggregate function arguments.
+    """
+    any_agg_function_found = _inject_condition_to_all_agg_functions(
+        expression, condition
+    )
+    if not any_agg_function_found:
+        raise ValueError(f"No aggregate functions found in: {expression.sql}")
+def _inject_condition_to_all_agg_functions(
+    expression: Expression, condition: Column
+) -> bool:
+    any_agg_function_found = False
+    if _is_agg_function_expression(expression):
+        new_children = []
+        for child in _get_child_expressions(expression):
+            case_when = snowpark_fn.when(condition, Column(child))
+            new_children.append(case_when._expr1)
+        # Swap children
+        expression.children = new_children
+        if len(new_children) > 0:
+            expression.child = new_children[0]
+        return True
+    for child in _get_child_expressions(expression):
+        is_agg_function_in_child = _inject_condition_to_all_agg_functions(
+            child, condition
+        )
+        if is_agg_function_in_child:
+            any_agg_function_found = True
+    return any_agg_function_found

snowflake/snowpark_connect/utils/sequence.py ADDED Viewed

@@ -0,0 +1,21 @@
+#
+# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
+#
+import threading
+from collections import defaultdict
+from snowflake.snowpark_connect.utils.context import get_session_id
+# per session number sequences to generate unique snowpark columns
+_session_sequences = defaultdict(int)
+_lock = threading.Lock()
+def next_unique_num():
+    session_id = get_session_id()
+    with _lock:
+        next_num = _session_sequences[session_id]
+        _session_sequences[session_id] = next_num + 1
+    return next_num

snowflake/snowpark_connect/utils/session.py CHANGED Viewed

@@ -94,6 +94,10 @@ def configure_snowpark_session(session: snowpark.Session):
     session.connection.arrow_number_to_decimal_setter = True
     session.custom_package_usage_config["enabled"] = True
+    # Scoped temp objects may not be accessible in stored procedure and cause "object does not exist" error. So disable
+    # _use_scoped_temp_objects here and use temp table instead.
+    session._use_scoped_temp_objects = False
     # Configure CTE optimization based on session configuration
     cte_optimization_enabled = get_cte_optimization_enabled()
     session.cte_optimization_enabled = cte_optimization_enabled
@@ -128,7 +132,6 @@ def configure_snowpark_session(session: snowpark.Session):
         "TIMEZONE": f"'{global_config.spark_sql_session_timeZone}'",
         "QUOTED_IDENTIFIERS_IGNORE_CASE": "false",
         "PYTHON_SNOWPARK_ENABLE_THREAD_SAFE_SESSION": "true",
-        "PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false",  # this is required for creating udfs from sproc
         "ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
         "QUERY_TAG": f"'{query_tag}'",
     }

snowflake/snowpark_connect/utils/udf_helper.py CHANGED Viewed

@@ -186,6 +186,7 @@ def parse_return_type(return_type_json_str) -> Optional[DataType]:
 def create(session, called_from, return_type_json_str, input_types_json_str, input_column_names_json_str, udf_name, replace, udf_packages, udf_imports, b64_str, original_return_type):
+    session._use_scoped_temp_objects = False
     import snowflake.snowpark.context as context
     context._use_structured_type_semantics = True
     context._is_snowpark_connect_compatible_mode = True

snowflake/snowpark_connect/utils/udtf_helper.py CHANGED Viewed

@@ -153,6 +153,7 @@ def parse_types(types_json_str) -> Optional[list[DataType]]:
     return json.loads(types_json_str)
 def create(session, b64_str, expected_types_json_str, output_schema_json_str, packages, imports, is_arrow_enabled, is_spark_compatible_udtf_mode_enabled, called_from):
+    session._use_scoped_temp_objects = False
     import snowflake.snowpark.context as context
     context._use_structured_type_semantics = True
     context._is_snowpark_connect_compatible_mode = True
@@ -257,6 +258,7 @@ from snowflake.snowpark.types import _parse_datatype_json_value
 {inline_udtf_utils_py_code}
 def create(session, b64_str, spark_column_names_json_str, input_schema_json_str, return_schema_json_str):
+    session._use_scoped_temp_objects = False
     import snowflake.snowpark.context as context
     context._use_structured_type_semantics = True
     context._is_snowpark_connect_compatible_mode = True
@@ -330,6 +332,7 @@ from snowflake.snowpark.types import _parse_datatype_json_value
 from pyspark.serializers import CloudPickleSerializer
 def create(session, func_info_json):
+    session._use_scoped_temp_objects = False
     import snowflake.snowpark.context as context
     context._use_structured_type_semantics = True
     context._is_snowpark_connect_compatible_mode = True

snowflake/snowpark_connect/version.py CHANGED Viewed

@@ -2,4 +2,4 @@
 #
 # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
 #
-VERSION = (0,32,0)
+VERSION = (1,0,0)

{snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: snowpark-connect
-Version: 0.32.0
+Version: 1.0.0
 Summary: Snowpark Connect for Spark
 Author: Snowflake, Inc
 License: Apache License, Version 2.0
@@ -9,6 +9,8 @@ Description-Content-Type: text/markdown
 License-File: LICENSE.txt
 License-File: LICENSE-binary
 License-File: NOTICE-binary
+Requires-Dist: snowpark-connect-deps-1==3.56.2
+Requires-Dist: snowpark-connect-deps-2==3.56.2
 Requires-Dist: certifi>=2025.1.31
 Requires-Dist: cloudpickle
 Requires-Dist: fsspec[http]
@@ -16,7 +18,7 @@ Requires-Dist: jpype1
 Requires-Dist: protobuf<6.32.0,>=4.25.3
 Requires-Dist: s3fs>=2025.3.0
 Requires-Dist: snowflake.core<2,>=1.0.5
-Requires-Dist: snowflake-snowpark-python[pandas]<1.41.0,==1.40.0
+Requires-Dist: snowflake-snowpark-python[pandas]<1.43.0,==1.42.0
 Requires-Dist: snowflake-connector-python<4.0.0,>=3.18.0
 Requires-Dist: sqlglot>=26.3.8
 Requires-Dist: jaydebeapi

snowpark-connect 0.32.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

snowpark-connect 0.32.0py3-none-any.whl → 1.0.0py3-none-any.whl