PyPI - snowpark-connect - Versions diffs - 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

snowpark-connect 0.27.0py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

snowflake/snowpark_connect/server.py CHANGED Viewed

@@ -21,17 +21,13 @@
 # limitations under the License.
 #
-import atexit
-import logging
 import os
-import pathlib
-import socket
+import sys
 import tempfile
 import threading
-import urllib.parse
-import zipfile
 from concurrent import futures
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Callable, Dict, List, Optional
 import grpc
 import jpype
@@ -41,14 +37,10 @@ import pyspark.sql.connect.proto.base_pb2_grpc as proto_base_grpc
 import pyspark.sql.connect.proto.common_pb2 as common_proto
 import pyspark.sql.connect.proto.relations_pb2 as relations_proto
 import pyspark.sql.connect.proto.types_pb2 as types_proto
-from packaging import version
 from pyspark import StorageLevel
 from pyspark.conf import SparkConf
-from pyspark.errors import PySparkValueError
-from pyspark.sql.connect.client.core import ChannelBuilder
 from pyspark.sql.connect.session import SparkSession
-import snowflake.snowpark_connect
 import snowflake.snowpark_connect.proto.control_pb2_grpc as control_grpc
 import snowflake.snowpark_connect.tcm as tcm
 from snowflake import snowpark
@@ -56,7 +48,11 @@ from snowflake.snowpark_connect.analyze_plan.map_tree_string import map_tree_str
 from snowflake.snowpark_connect.config import route_config_proto
 from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
 from snowflake.snowpark_connect.control_server import ControlServicer
-from snowflake.snowpark_connect.error.error_utils import build_grpc_error_response
+from snowflake.snowpark_connect.error.error_codes import ErrorCodes
+from snowflake.snowpark_connect.error.error_utils import (
+    attach_custom_error_code,
+    build_grpc_error_response,
+)
 from snowflake.snowpark_connect.execute_plan.map_execution_command import (
     map_execution_command,
 )
@@ -66,7 +62,26 @@ from snowflake.snowpark_connect.execute_plan.map_execution_root import (
 from snowflake.snowpark_connect.relation.map_local_relation import map_local_relation
 from snowflake.snowpark_connect.relation.map_relation import map_relation
 from snowflake.snowpark_connect.relation.utils import get_semantic_string
-from snowflake.snowpark_connect.resources_initializer import initialize_resources_async
+from snowflake.snowpark_connect.resources_initializer import initialize_resources
+from snowflake.snowpark_connect.server_common import (  # noqa: F401 - re-exported for public API
+    _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
+    _client_telemetry_context,
+    _disable_protobuf_recursion_limit,
+    _get_default_grpc_options,
+    _reset_server_run_state,
+    _setup_spark_environment,
+    _stop_server,
+    configure_server_url,
+    get_client_url,
+    get_server_error,
+    get_server_running,
+    get_server_url,
+    get_session,
+    set_grpc_max_message_size,
+    set_server_error,
+    setup_signal_handlers,
+    validate_startup_parameters,
+)
 from snowflake.snowpark_connect.type_mapping import (
     map_type_string_to_proto,
     snowpark_to_proto_type,
@@ -82,12 +97,13 @@ from snowflake.snowpark_connect.utils.cache import (
     df_cache_map_put_if_absent,
 )
 from snowflake.snowpark_connect.utils.context import (
+    clean_request_external_tables,
     clear_context_data,
-    get_session_id,
-    set_session_id,
+    get_request_external_tables,
+    get_spark_session_id,
+    set_spark_session_id,
     set_spark_version,
 )
-from snowflake.snowpark_connect.utils.env_utils import get_int_from_env
 from snowflake.snowpark_connect.utils.external_udxf_cache import (
     clear_external_udxf_cache,
 )
@@ -96,7 +112,25 @@ from snowflake.snowpark_connect.utils.interrupt import (
     interrupt_queries_with_tag,
     interrupt_query,
 )
-from snowflake.snowpark_connect.utils.profiling import profile_method
+from snowflake.snowpark_connect.utils.java_stored_procedure import (
+    set_java_udf_creator_initialized_state,
+)
+from snowflake.snowpark_connect.utils.open_telemetry import (
+    is_telemetry_enabled,
+    otel_attach_context,
+    otel_create_context_wrapper,
+    otel_create_status,
+    otel_detach_context,
+    otel_end_root_span,
+    otel_flush_telemetry,
+    otel_get_current_span,
+    otel_get_root_span_context,
+    otel_get_status_code,
+    otel_get_tracer,
+    otel_initialize,
+    otel_start_span_as_current,
+)
+from snowflake.snowpark_connect.utils.profiling import PROFILING_ENABLED, profile_method
 from snowflake.snowpark_connect.utils.session import (
     configure_snowpark_session,
     get_or_create_snowpark_session,
@@ -112,29 +146,111 @@ from snowflake.snowpark_connect.utils.telemetry import (
 )
 from snowflake.snowpark_connect.utils.xxhash64 import xxhash64_string
-DEFAULT_PORT = 15002
-# https://github.com/apache/spark/blob/v3.5.3/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala#L21
-_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = 128 * 1024 * 1024
-# TODO: Verify if we we want to configure it via env variables.
-_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE = 64 * 1024  # 64kb
+def _store_client_stack_trace(client_stack_info):
+    """Store client stack trace in thread-local storage"""
+    _client_telemetry_context.stack_trace = client_stack_info
+def _clear_client_stack_trace():
+    """Clear client stack trace"""
+    _client_telemetry_context.stack_trace = None
+def _get_client_stack_trace():
+    """Get current client stack trace"""
+    return getattr(_client_telemetry_context, "stack_trace", None)
+def _add_client_stack_trace_to_span(span, client_stack):
+    """
+    Add formatted client stack trace to a specific span.
-def _sanitize_file_paths(text: str) -> str:
+    Args:
+        span: The OpenTelemetry span to add the stack trace attribute to
+        client_stack: The client stack trace data (list of frame dicts)
     """
-    Sanitize file paths in error messages by replacing them with placeholders.
-    Only matches actual file paths, not module names or class names.
+    if not client_stack or not span or not span.is_recording():
+        return
+    stack_frames = []
+    for frame in client_stack:
+        if frame.get("file_name") and frame.get("line_number"):
+            method = frame.get("method_name", "unknown")
+            location = f"{frame.get('file_name')}:{frame.get('line_number')}"
+            stack_frames.append(f"{method} at {location}")
+    if stack_frames:
+        span.set_attribute("client.stack_trace", " <- ".join(stack_frames))
+def _process_and_store_client_stack_trace(request, add_to_span: bool = False):
     """
-    import re
+    Extract, store, and optionally add client stack trace to the current span.
-    # Pattern to match file paths in traceback "File" lines only
-    # This targets the specific format: File "/path/to/file.py", line XX
-    file_line_pattern = r'(File\s+["\'])([^"\']+)(["\'],\s+line\s+\d+)'
+    Args:
+        request: The gRPC request containing user context with stack trace
+        add_to_span: If True, format and add stack trace as span attribute to current span
-    def replace_file_path(match):
-        return f"{match.group(1)}<redacted_file_path>{match.group(3)}"
+    Returns:
+        The extracted client_stack (or None) for use in ExecutePlan
+    """
+    # Extract and store client stack trace information for telemetry
+    client_stack = _extract_and_log_user_stack_trace(request)
+    if client_stack:
+        _store_client_stack_trace(client_stack)
-    return re.sub(file_line_pattern, replace_file_path, text)
+    # Set span attribute with formatted stack trace (if requested and available)
+    if add_to_span and client_stack:
+        root_span_otel_context = otel_get_root_span_context()
+        if root_span_otel_context is not None and is_telemetry_enabled():
+            current_span = otel_get_current_span()
+            if current_span and current_span.is_recording():
+                _add_client_stack_trace_to_span(current_span, client_stack)
+    return client_stack
+def _extract_and_log_user_stack_trace(request):
+    """
+    Extract and log user stack trace information from request extensions.
+    Args:
+        request: The gRPC request containing user_context.extensions
+    Returns:
+        List of stack trace frames or None if no traces found
+    """
+    try:
+        from snowflake.snowpark_connect.utils.patch_spark_line_number import (
+            extract_stack_trace_from_extensions,
+        )
+        if hasattr(request, "user_context") and hasattr(
+            request.user_context, "extensions"
+        ):
+            stack_traces = extract_stack_trace_from_extensions(
+                request.user_context.extensions
+            )
+            if stack_traces:
+                logger.debug("User code stack trace:")
+                for i, frame in enumerate(stack_traces):
+                    logger.debug(
+                        f"  Frame {i}: {frame.get('method_name', 'unknown')} "
+                        f"at {frame.get('file_name', 'unknown')}:{frame.get('line_number', 'unknown')}"
+                    )
+                return stack_traces  # Return the stack traces for telemetry use
+            else:
+                logger.debug("No user stack trace information found in request")
+                return None
+    except Exception as e:
+        # Don't let stack trace extraction errors affect the main request
+        logger.debug(f"Failed to extract user stack trace: {e}")
+        return None
 def _handle_exception(context, e: Exception):
@@ -147,16 +263,15 @@ def _handle_exception(context, e: Exception):
     if show_traceback:
         # Show detailed traceback (includes error info naturally)
         error_traceback = traceback.format_exc()
-        sanitized_traceback = _sanitize_file_paths(error_traceback)
-        logger.error(sanitized_traceback)
+        logger.error(error_traceback)
     else:
         # Show only basic error information, no traceback
         logger.error("Error: %s - %s", type(e).__name__, str(e))
     telemetry.report_request_failure(e)
     if tcm.TCM_MODE:
-        # TODO: SNOW-2009834 gracefully return error back in TCM
+        # spark decoder will catch the error and return it to GS gracefully
+        attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
         raise e
     from grpc_status import rpc_status
@@ -165,14 +280,71 @@ def _handle_exception(context, e: Exception):
     context.abort_with_status(rpc_status.to_status(rich_status))
+# Decorator for creating method spans as children of root span
+def _with_method_span(method_name):
+    """
+    Decorator to create a new span as child of root span for gRPC methods and provide it as parent to Snowpark operations.
+    """
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            # Get the root span context first
+            root_span_otel_context = otel_get_root_span_context()
+            # Only proceed if BOTH conditions are true
+            if root_span_otel_context is not None and is_telemetry_enabled():
+                # Attach the root context first, then create child span
+                context_token = otel_attach_context(root_span_otel_context)
+                try:
+                    tracer = otel_get_tracer(__name__)
+                    span_name = f"snowpark_connect.{method_name}"
+                    # Create span as child of the root span context
+                    span_context_mgr = otel_start_span_as_current(tracer, span_name)
+                    if span_context_mgr:
+                        with span_context_mgr as span:
+                            try:
+                                # Execute the method with the new span as current context
+                                return func(*args, **kwargs)
+                            except Exception as e:
+                                # Record the exception in the span
+                                span.record_exception(e)
+                                StatusCode = otel_get_status_code()
+                                if StatusCode:
+                                    status = otel_create_status(
+                                        StatusCode.ERROR, str(e)
+                                    )
+                                    if status:
+                                        span.set_status(status)
+                                raise
+                    else:
+                        # No span created, just execute the function
+                        return func(*args, **kwargs)
+                finally:
+                    # Always detach the root context
+                    if context_token is not None:
+                        otel_detach_context(context_token)
+            else:
+                # No root context available or OTel not available, execute without span
+                return func(*args, **kwargs)
+        return wrapper
+    return decorator
+# Snowflake Connect gRPC Service Implementation
 class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
     def __init__(
         self,
         log_request_fn: Optional[Callable[[bytearray], None]] = None,
     ) -> None:
         self.log_request_fn = log_request_fn
-        # Trigger async initialization here, so that we reduce overhead for rpc calls.
-        initialize_resources_async()
+        # Trigger synchronous initialization here, so that we reduce overhead for rpc calls.
+        initialize_resources()
     @profile_method
     def ExecutePlan(self, request: proto_base.ExecutePlanRequest, context):
@@ -181,20 +353,45 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
         It is guaranteed that there is at least one ARROW batch returned even if the result set is empty.
         """
         logger.info("ExecutePlan")
+        client_stack = _process_and_store_client_stack_trace(request, add_to_span=False)
         if self.log_request_fn is not None:
             self.log_request_fn(request.SerializeToString())
         # TODO: remove session id context when we host this in Snowflake server
         # set the thread-local context of session id
         clear_context_data()
-        set_session_id(request.session_id)
+        set_spark_session_id(request.session_id)
         set_spark_version(request.client_type)
         telemetry.initialize_request_summary(request)
         set_query_tags(request.tags)
-        result_iter = iter(())
+        # Additional context attachment for Snowpark DataFrame operations
+        snowpark_context_token = None
+        span = None
+        span_context_manager = None
         try:
+            root_span_otel_context = otel_get_root_span_context()
+            if root_span_otel_context is not None and is_telemetry_enabled():
+                snowpark_context_token = otel_attach_context(root_span_otel_context)
+                # Create span manually for generator function and make it current
+                tracer = otel_get_tracer(__name__)
+                span_context_manager = otel_start_span_as_current(
+                    tracer, "snowpark_connect.ExecutePlan"
+                )
+                span = None
+                if span_context_manager:
+                    span = (
+                        span_context_manager.__enter__()
+                    )  # Start the span context AND make it current
+                    # Add stack trace to this manually created span
+                    _add_client_stack_trace_to_span(span, client_stack)
+            result_iter = iter(())
             match request.plan.WhichOneof("op_type"):
                 case "root":
                     logger.info("ROOT")
@@ -212,32 +409,60 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
                 result_complete=proto_base.ExecutePlanResponse.ResultComplete(),
             )
         except Exception as e:
+            if span:
+                span.record_exception(e)
+                StatusCode = otel_get_status_code()
+                if StatusCode:
+                    status = otel_create_status(StatusCode.ERROR, str(e))
+                    if status:
+                        span.set_status(status)
             _handle_exception(context, e)
         finally:
+            if span_context_manager:
+                span_context_manager.__exit__(None, None, None)  # End the span
+            if snowpark_context_token is not None:
+                otel_detach_context(snowpark_context_token)
+            # Clear client stack trace when request is done
+            _clear_client_stack_trace()
+            otel_flush_telemetry()
+            self._cleanup_external_tables()
             telemetry.send_request_summary_telemetry()
     @profile_method
+    @_with_method_span("AnalyzePlan")
     def AnalyzePlan(self, request: proto_base.AnalyzePlanRequest, context):
         """Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query."""
         logger.info(f"AnalyzePlan: {request.WhichOneof('analyze')}")
+        _process_and_store_client_stack_trace(request, add_to_span=True)
         if self.log_request_fn is not None:
             self.log_request_fn(request.SerializeToString())
         try:
             # TODO: remove session id context when we host this in Snowflake server
             # set the thread-local context of session id
             clear_context_data()
-            set_session_id(request.session_id)
+            set_spark_session_id(request.session_id)
             set_spark_version(request.client_type)
             telemetry.initialize_request_summary(request)
             match request.WhichOneof("analyze"):
                 case "schema":
                     result = map_relation(request.schema.plan.root)
-                    snowpark_df = result.dataframe
-                    snowpark_schema: snowpark.types.StructType = snowpark_df.schema
+                    from snowflake.snowpark_connect.relation.read.metadata_utils import (
+                        without_internal_columns,
+                    )
+                    filtered_result = without_internal_columns(result)
+                    filtered_df = filtered_result.dataframe
                     schema = proto_base.AnalyzePlanResponse.Schema(
                         schema=types_proto.DataType(
                             **snowpark_to_proto_type(
-                                snowpark_schema, result.column_map, snowpark_df
+                                filtered_df.schema,
+                                filtered_result.column_map,
+                                filtered_df,
                             )
                         )
                     )
@@ -274,10 +499,15 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
                     plan_id = request.persist.relation.common.plan_id
                     # cache the plan if it is not already in the map
+                    from snowflake.snowpark_connect.relation.read.metadata_utils import (
+                        without_internal_columns,
+                    )
                     df_cache_map_put_if_absent(
                         (request.session_id, plan_id),
-                        lambda: map_relation(request.persist.relation),
-                        materialize=True,
+                        lambda: without_internal_columns(
+                            map_relation(request.persist.relation)
+                        ),
                     )
                     storage_level = request.persist.storage_level
@@ -366,15 +596,24 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
                         ),
                     )
                 case _:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"ANALYZE PLAN NOT IMPLEMENTED:\n{request}"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
         except Exception as e:
             _handle_exception(context, e)
         finally:
+            # Clear client stack trace when request is done
+            _clear_client_stack_trace()
+            otel_flush_telemetry()
+            self._cleanup_external_tables()
             telemetry.send_request_summary_telemetry()
     @staticmethod
+    @_with_method_span("Config")
     def Config(
         request: proto_base.ConfigRequest,
         context,
@@ -389,12 +628,18 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
     ):
         """Update or fetch the configurations and returns a [[ConfigResponse]] containing the result."""
         logger.info("Config")
+        _process_and_store_client_stack_trace(request, add_to_span=True)
         try:
             telemetry.initialize_request_summary(request)
             return route_config_proto(request, get_or_create_snowpark_session())
         except Exception as e:
             _handle_exception(context, e)
         finally:
+            # Clear client stack trace when request is done
+            _clear_client_stack_trace()
+            otel_flush_telemetry()
             telemetry.send_request_summary_telemetry()
     def AddArtifacts(self, request_iterator, context):
@@ -402,11 +647,9 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
         the added artifacts.
         """
         logger.info("AddArtifacts")
         session: snowpark.Session = get_or_create_snowpark_session()
-        filenames: dict[str, str] = {}
         response: dict[str, proto_base.AddArtifactsResponse.ArtifactSummary] = {}
-        # Store accumulated data for local relation cache
-        cache_data: dict[str, bytearray] = {}
         def _try_handle_local_relation(artifact_name: str, data: bytes):
             """
@@ -422,12 +665,14 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
             )  # heuristic to identify local relations
             def _handle_regular_artifact():
-                filenames[artifact_name] = write_artifact(
+                artifact = write_artifact(
                     session,
                     artifact_name,
                     data,
                     overwrite=True,
                 )
+                with session._filenames_lock:
+                    session._filenames[get_spark_session_id()][artifact_name] = artifact
             if is_likely_local_relation:
                 try:
@@ -435,9 +680,8 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
                     l_relation.ParseFromString(data)
                     relation = relations_proto.Relation(local_relation=l_relation)
                     df_cache_map_put_if_absent(
-                        (get_session_id(), artifact_name.replace("cache/", "")),
+                        (get_spark_session_id(), artifact_name.replace("cache/", "")),
                         lambda: map_local_relation(relation),  # noqa: B023
-                        materialize=True,
                     )
                 except Exception as e:
                     logger.warning("Failed to put df into cache: %s", str(e))
@@ -458,29 +702,46 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
         # Batch artifacts are sent as a single "batch" message containing a list of
         # artifacts. We do not need to keep track of the name since it is included in
         # each artifact.
-        current_name: str = ""
         for request in request_iterator:
             clear_context_data()
-            set_session_id(request.session_id)
+            set_spark_session_id(request.session_id)
             set_spark_version(request.client_type)
+            with session._filenames_lock:
+                if request.session_id not in session._filenames:
+                    session._filenames[request.session_id] = {}
             match request.WhichOneof("payload"):
                 case "begin_chunk":
                     current_name = request.begin_chunk.name
-                    assert (
-                        current_name not in filenames
-                    ), "Duplicate artifact name found."
+                    current_chunk = {
+                        "name": current_name,
+                        "num_chunks": request.begin_chunk.num_chunks,
+                        "current_chunk_index": 1,
+                    }
+                    with session._filenames_lock:
+                        assert (
+                            current_name not in session._filenames[request.session_id]
+                        ), "Duplicate artifact name found."
                     if current_name.startswith("cache/"):
-                        cache_data[current_name] = bytearray(
+                        current_chunk["cache"] = bytearray(
                             request.begin_chunk.initial_chunk.data
                         )
                     else:
-                        filenames[current_name] = write_artifact(
+                        artifact = write_artifact(
                             session,
                             current_name,
                             request.begin_chunk.initial_chunk.data,
                             overwrite=True,
                         )
+                        with session._filenames_lock:
+                            session._filenames[request.session_id][
+                                current_name
+                            ] = artifact
+                    # cache current chunk
+                    with session._current_chunk_lock:
+                        session._current_chunk[request.session_id] = current_chunk
                     response[
                         current_name
                     ] = proto_base.AddArtifactsResponse.ArtifactSummary(
@@ -491,18 +752,53 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
                         ),
                     )
                 case "chunk":
+                    # retrieve current chunk
+                    with session._current_chunk_lock:
+                        if request.session_id not in session._current_chunk:
+                            exception = ValueError(
+                                f"Received 'chunk' for session_id '{request.session_id}' without a prior 'begin_chunk'."
+                            )
+                            attach_custom_error_code(
+                                exception, ErrorCodes.INTERNAL_ERROR
+                            )
+                            raise exception
+                        current_chunk = session._current_chunk[request.session_id]
+                    current_name = current_chunk["name"]
+                    current_chunk["current_chunk_index"] += 1
                     if current_name.startswith("cache/"):
-                        cache_data[current_name].extend(request.chunk.data)
+                        current_chunk["cache"].extend(request.chunk.data)
                     else:
-                        assert filenames[current_name] == write_artifact(
+                        artifact = write_artifact(
                             session, current_name, request.chunk.data
-                        ), "Artifact staging error."
+                        )
+                        with session._filenames_lock:
+                            assert (
+                                session._filenames[request.session_id][current_name]
+                                == artifact
+                            ), "Artifact staging error."
+                    if (
+                        current_chunk["current_chunk_index"]
+                        == current_chunk["num_chunks"]
+                    ):
+                        # all chunks are ready
+                        if current_name.startswith("cache/"):
+                            _try_handle_local_relation(
+                                current_name, bytes(current_chunk["cache"])
+                            )
+                        with session._current_chunk_lock:
+                            # remove current chunk from session
+                            del session._current_chunk[request.session_id]
                     response[
                         current_name
                     ] = proto_base.AddArtifactsResponse.ArtifactSummary(
                         name=current_name,
-                        is_crc_successful=response[current_name].is_crc_successful
+                        is_crc_successful=(
+                            current_name not in response
+                            or response[current_name].is_crc_successful
+                        )
                         and check_checksum(request.chunk.data, request.chunk.crc),
                     )
                 case "batch":
@@ -519,62 +815,89 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
                             ),
                         )
                 case _:
-                    raise ValueError(
+                    exception = ValueError(
                         f"Unexpected payload type in AddArtifacts: {request.WhichOneof('payload')}"
                     )
-        for name, data in cache_data.items():
-            _try_handle_local_relation(name, bytes(data))
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
+        # if current chunk is still not finished, just return here
+        # This should only happen in TCM since we have to send request via rest one by one so current chunk cannot be
+        # finished in one iteration
+        with session._current_chunk_lock:
+            if request.session_id in session._current_chunk:
+                return proto_base.AddArtifactsResponse(
+                    artifacts=list(response.values())
+                )
         class_files: dict[str, str] = {}
-        for (name, filepath) in filenames.items():
-            if name.endswith(".class"):
-                # name is <dir>/<package>/<class_name>
-                # we don't need the dir name, but require the package, so only remove dir
-                if os.name != "nt":
-                    class_files[name.split("/", 1)[-1]] = filepath
-                else:
-                    class_files[name.split("\\", 1)[-1]] = filepath
-                continue
-            session.file.put(
-                filepath,
-                session.get_session_stage(),
-                auto_compress=False,
-                overwrite=True,
-                source_compression="GZIP" if name.endswith(".gz") else "NONE",
-            )
-            if name.startswith("cache"):
-                continue
-            # Remove temporary stored files which are put on the stage
-            os.remove(filepath)
-            # Add only files marked to be used in user generated Python UDFs.
-            cached_name = f"{session.get_session_stage()}/{filepath.split('/')[-1]}"
-            if not name.startswith("pyfiles") and cached_name in session._python_files:
-                session._python_files.remove(cached_name)
-            elif name.startswith("pyfiles"):
-                session._python_files.add(cached_name)
-            if not name.startswith("pyfiles"):
-                session._import_files.add(cached_name)
-        if class_files:
-            write_class_files_to_stage(session, class_files)
+        with session._filenames_lock:
+            for (name, filepath) in session._filenames[get_spark_session_id()].items():
+                if name.endswith(".class"):
+                    # name is <dir>/<package>/<class_name>
+                    # we don't need the dir name, but require the package, so only remove dir
+                    if os.name != "nt":
+                        class_files[name.split("/", 1)[-1]] = filepath
+                    else:
+                        class_files[name.split("\\", 1)[-1]] = filepath
+                    continue
+                session.file.put(
+                    filepath,
+                    session.get_session_stage(),
+                    auto_compress=False,
+                    overwrite=True,
+                    source_compression="GZIP" if name.endswith(".gz") else "NONE",
+                )
-        if any(not name.startswith("cache") for name in filenames.keys()):
-            clear_external_udxf_cache(session)
+                if name.startswith("cache"):
+                    continue
+                # Add only files marked to be used in user generated Python UDFs.
+                cached_name = f"{session.get_session_stage()}/{filepath.split('/')[-1]}"
+                if (
+                    not name.startswith("pyfiles")
+                    and cached_name in session._python_files
+                ):
+                    session._python_files.remove(cached_name)
+                elif name.startswith("pyfiles"):
+                    session._python_files.add(cached_name)
+                if name.startswith("jars/"):
+                    session._artifact_jars.add(cached_name)
+                    # Recreate the Java procedure to reload jars
+                    set_java_udf_creator_initialized_state(False)
+                elif not name.startswith("pyfiles"):
+                    session._import_files.add(cached_name)
+                # Remove temporary stored files which are put on the stage
+                os.remove(filepath)
+            if class_files:
+                jar_name = write_class_files_to_stage(session, class_files)
+                session._artifact_jars.add(jar_name)
+            if any(
+                not name.startswith("cache")
+                for name in session._filenames[get_spark_session_id()].keys()
+            ):
+                clear_external_udxf_cache(session)
+            # clear filenames for this session
+            session._filenames[get_spark_session_id()] = {}
         return proto_base.AddArtifactsResponse(artifacts=list(response.values()))
     def ArtifactStatus(self, request, context):
         """Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]]"""
         logger.info("ArtifactStatus")
         clear_context_data()
-        set_session_id(request.session_id)
+        set_spark_session_id(request.session_id)
         set_spark_version(request.client_type)
         session: snowpark.Session = get_or_create_snowpark_session()
         if os.name != "nt":
             tmp_path = f"/tmp/sas-{session.session_id}/"
         else:
@@ -583,7 +906,7 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
         def _is_local_relation_cached(name: str) -> bool:
             if name.startswith("cache/"):
                 hash = name.replace("cache/", "")
-                cached_df = df_cache_map_get((get_session_id(), hash))
+                cached_df = df_cache_map_get((get_spark_session_id(), hash))
                 return cached_df is not None
             return False
@@ -618,6 +941,7 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
         # instead of using operation ids, we're relying on Snowflake query ids here, meaning that:
         # - The list of returned interrupted_ids contains query ids of interrupted jobs, instead of their operation ids
         # - INTERRUPT_TYPE_OPERATION_ID interrupt type expects a Snowflake query id instead of an operation id
         try:
             match request.interrupt_type:
                 case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_ALL:
@@ -627,9 +951,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
                 case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID:
                     interrupted_ids = interrupt_query(request.operation_id)
                 case _:
-                    raise SnowparkConnectNotImplementedError(
+                    exception = SnowparkConnectNotImplementedError(
                         f"INTERRUPT NOT IMPLEMENTED:\n{request}"
                     )
+                    attach_custom_error_code(
+                        exception, ErrorCodes.UNSUPPORTED_OPERATION
+                    )
+                    raise exception
             return proto_base.InterruptResponse(
                 session_id=request.session_id,
@@ -647,9 +975,12 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
         continue. If there is a ResultComplete, the client should use ReleaseExecute with
         """
         logger.info("ReattachExecute")
-        raise SnowparkConnectNotImplementedError(
+        exception = SnowparkConnectNotImplementedError(
             "Spark client has detached, please resubmit request. In a future version, the server will be support the reattach."
         )
+        attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
+        raise exception
     def ReleaseExecute(self, request: proto_base.ReleaseExecuteRequest, context):
         """Release an reattachable execution, or parts thereof.
@@ -666,6 +997,18 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
         except Exception as e:
             _handle_exception(context, e)
+    def _cleanup_external_tables(self):
+        external_tables = get_request_external_tables()
+        if not external_tables:
+            return
+        session: snowpark.Session = get_or_create_snowpark_session()
+        for table in external_tables:
+            try:
+                session.sql(f"DROP EXTERNAL TABLE IF EXISTS {table}").collect()
+            except Exception as e:
+                logger.warning(f"Failed to drop external table {table}: {e}")
+        clean_request_external_tables()
     # TODO: These are required in Spark 4.x.
     # def ReleaseSession(self, request, context):
     #     """Release a session.
@@ -682,39 +1025,16 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
     #     return super().FetchErrorDetails(request, context)
-# Global state related to server connection
-_server_running: threading.Event = threading.Event()
-_server_error: bool = False
-_server_url: Optional[str] = None
-_client_url: Optional[str] = None
-# Used to reset server global state to the initial blank slate state if error happens during server startup.
-# Called after the startup error is caught and handled / logged etc.
-def _reset_server_run_state():
-    global _server_running, _server_error, _server_url, _client_url
-    _server_running.clear()
-    _server_error = False
-    _server_url = None
-    _client_url = None
-def _stop_server(stop_event: threading.Event, server: grpc.Server):
-    stop_event.wait()
-    server.stop(0)
-    _reset_server_run_state()
-    logger.info("server stop sent")
 def _serve(
     stop_event: Optional[threading.Event] = None,
     session: Optional[snowpark.Session] = None,
 ):
-    global _server_running, _server_error
+    server_running = get_server_running()
     # TODO: factor out the Snowflake connection code.
     server = None
     try:
         config_snowpark()
         if session is None:
             session = get_or_create_snowpark_session()
         else:
@@ -725,33 +1045,16 @@ def _serve(
             # No need to start grpc server in TCM
             return
-        server_options = [
-            (
-                "grpc.max_receive_message_length",
-                get_int_from_env(
-                    "SNOWFLAKE_GRPC_MAX_MESSAGE_SIZE",
-                    _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
-                ),
-            ),
-            (
-                "grpc.max_metadata_size",
-                get_int_from_env(
-                    "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
-                    _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
-                ),
-            ),
-            (
-                "grpc.absolute_max_metadata_size",
-                get_int_from_env(
-                    "SNOWFLAKE_GRPC_MAX_METADATA_SIZE",
-                    _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
-                )
-                * 2,
-            ),
-        ]
+        server_options = _get_default_grpc_options()
+        # cProfile doesn't work correctly with multiple threads
+        max_workers = 1 if PROFILING_ENABLED else 10
         server = grpc.server(
-            futures.ThreadPoolExecutor(max_workers=10), options=server_options
+            futures.ThreadPoolExecutor(max_workers=max_workers),
+            options=server_options,
         )
         control_servicer = ControlServicer(session)
         proto_base_grpc.add_SparkConnectServiceServicer_to_server(
             SnowflakeConnectServicer(control_servicer.log_spark_connect_batch),
@@ -762,193 +1065,33 @@ def _serve(
         server.add_insecure_port(server_url)
         logger.info(f"Starting Snowpark Connect server on {server_url}...")
         server.start()
-        _server_running.set()
+        server_running.set()
         logger.info("Snowpark Connect server started!")
         telemetry.send_server_started_telemetry()
         if stop_event is not None:
             # start a background thread to listen for stop event and terminate the server
             threading.Thread(
                 target=_stop_server, args=(stop_event, server), daemon=True
             ).start()
         server.wait_for_termination()
     except Exception as e:
-        _server_error = True
-        _server_running.set()  # unblock any client sessions
+        set_server_error(True)
+        server_running.set()  # unblock any client sessions
         if "Invalid connection_name 'spark-connect', known ones are " in str(e):
             logger.error(
                 "Ensure 'spark-connect' connection config has been set correctly in connections.toml."
             )
         else:
             logger.error("Error starting up Snowpark Connect server", exc_info=True)
+        attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
         raise e
     finally:
         # flush the telemetry queue if possible
         telemetry.shutdown()
-def _set_remote_url(remote_url: str):
-    global _server_url, _client_url
-    _client_url = remote_url
-    parsed_url = urllib.parse.urlparse(remote_url)
-    if parsed_url.scheme == "sc":
-        _server_url = parsed_url.netloc
-        server_port = parsed_url.port or DEFAULT_PORT
-        _check_port_is_free(server_port)
-    elif parsed_url.scheme == "unix":
-        _server_url = remote_url.split("/;")[0]
-    else:
-        raise RuntimeError(f"Invalid Snowpark Connect URL: {remote_url}")
-def _set_server_tcp_port(server_port: int):
-    global _server_url, _client_url
-    _check_port_is_free(server_port)
-    _server_url = f"[::]:{server_port}"
-    _client_url = f"sc://127.0.0.1:{server_port}"
-def _check_port_is_free(port: int) -> None:
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.settimeout(1)
-        if s.connect_ex(("127.0.0.1", port)) == 0:
-            raise RuntimeError(f"TCP port {port} is already in use")
-def _set_server_unix_domain_socket(path: str):
-    global _server_url, _client_url
-    _server_url = f"unix:{path}"
-    _client_url = f"unix:{path}"
-def get_server_url() -> str:
-    global _server_url
-    if not _server_url:
-        raise RuntimeError("Server URL not set")
-    return _server_url
-def get_client_url() -> str:
-    global _client_url
-    if not _client_url:
-        raise RuntimeError("Client URL not set")
-    return _client_url
-def _make_unix_domain_socket() -> str:
-    parent_dir = tempfile.mkdtemp()
-    server_path = os.path.join(parent_dir, "snowflake_sas_grpc.sock")
-    atexit.register(_cleanup_unix_domain_socket, server_path)
-    return server_path
-def _cleanup_unix_domain_socket(server_path: str) -> None:
-    parent_dir = os.path.dirname(server_path)
-    if os.path.exists(server_path):
-        os.remove(server_path)
-    if os.path.exists(parent_dir):
-        os.rmdir(parent_dir)
-class UnixDomainSocketChannelBuilder(ChannelBuilder):
-    """
-    Spark Connect gRPC channel builder for Unix domain sockets
-    """
-    def __init__(
-        self, url: str = None, channelOptions: Optional[List[Tuple[str, Any]]] = None
-    ) -> None:
-        if url is None:
-            url = get_client_url()
-        if url[:6] != "unix:/" or len(url) < 7:
-            raise PySparkValueError(
-                error_class="INVALID_CONNECT_URL",
-                message_parameters={
-                    "detail": "The URL must start with 'unix://'. Please update the URL to follow the correct format, e.g., 'unix://unix_domain_socket_path'.",
-                },
-            )
-        # Rewrite the URL to use http as the scheme so that we can leverage
-        # Python's built-in parser to parse URL parameters
-        fake_url = "http://" + url[6:]
-        self.url = urllib.parse.urlparse(fake_url)
-        self.params: Dict[str, str] = {}
-        self._extract_attributes()
-        # Now parse the real unix domain socket URL
-        self.url = urllib.parse.urlparse(url)
-        GRPC_DEFAULT_OPTIONS = [
-            ("grpc.max_send_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
-            ("grpc.max_receive_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
-            ("grpc.max_metadata_size", _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE),
-            (
-                "grpc.absolute_max_metadata_size",
-                2 * _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
-            ),
-        ]
-        if channelOptions is None:
-            self._channel_options = GRPC_DEFAULT_OPTIONS
-        else:
-            self._channel_options = GRPC_DEFAULT_OPTIONS + channelOptions
-        # For Spark 4.0 support, but also backwards compatible.
-        self._params = self.params
-    def _extract_attributes(self) -> None:
-        """Extract attributes from parameters.
-        This method was copied from
-        https://github.com/apache/spark/blob/branch-3.5/python/pyspark/sql/connect/client/core.py
-        This is required for Spark 4.0 support, since it is dropped in favor of moving
-        the extraction logic into the constructor.
-        """
-        if len(self.url.params) > 0:
-            parts = self.url.params.split(";")
-            for p in parts:
-                kv = p.split("=")
-                if len(kv) != 2:
-                    raise PySparkValueError(
-                        error_class="INVALID_CONNECT_URL",
-                        message_parameters={
-                            "detail": f"Parameter '{p}' should be provided as a "
-                            f"key-value pair separated by an equal sign (=). Please update "
-                            f"the parameter to follow the correct format, e.g., 'key=value'.",
-                        },
-                    )
-                self.params[kv[0]] = urllib.parse.unquote(kv[1])
-        netloc = self.url.netloc.split(":")
-        if len(netloc) == 1:
-            self.host = netloc[0]
-            if version.parse(pyspark.__version__) >= version.parse("4.0.0"):
-                from pyspark.sql.connect.client.core import DefaultChannelBuilder
-                self.port = DefaultChannelBuilder.default_port()
-            else:
-                self.port = ChannelBuilder.default_port()
-        elif len(netloc) == 2:
-            self.host = netloc[0]
-            self.port = int(netloc[1])
-        else:
-            raise PySparkValueError(
-                error_class="INVALID_CONNECT_URL",
-                message_parameters={
-                    "detail": f"Target destination '{self.url.netloc}' should match the "
-                    f"'<host>:<port>' pattern. Please update the destination to follow "
-                    f"the correct format, e.g., 'hostname:port'.",
-                },
-            )
-    # We override this to enable compatibility with Spark 4.0
-    host = None
-    @property
-    def endpoint(self) -> str:
-        return f"{self.url.scheme}:{self.url.path}"
-    def toChannel(self) -> grpc.Channel:
-        return grpc.insecure_channel(self.endpoint, options=self._channel_options)
+        # End the root span when server shuts down completely
+        otel_end_root_span()
 def config_snowpark() -> None:
@@ -977,12 +1120,24 @@ def start_jvm():
         if tcm.TCM_MODE:
             # No-op if JVM is already started in TCM mode
             return
-        raise RuntimeError(
+        exception = RuntimeError(
             "JVM must not be running when starting the Spark Connect server"
         )
+        attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
+        raise exception
+    import pathlib
+    import zipfile
+    import snowflake.snowpark_connect
+    # Import both JAR dependency packages
+    import snowpark_connect_deps_1
+    import snowpark_connect_deps_2
+    # First, add JARs from includes/jars directory
     pyspark_jars = (
-        pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
+        pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes" / "jars"
     )
     if "dataframe_processor.zip" in str(pyspark_jars):
@@ -991,18 +1146,31 @@ def start_jvm():
             snowflake.snowpark_connect.__file__
         ).parent.parent.parent
         temp_dir = tempfile.gettempdir()
         extract_folder = "snowflake/snowpark_connect/includes/jars/"  # Folder to extract (must end with '/')
         with zipfile.ZipFile(zip_path, "r") as zip_ref:
             for member in zip_ref.namelist():
                 if member.startswith(extract_folder):
                     zip_ref.extract(member, path=temp_dir)
         pyspark_jars = pathlib.Path(temp_dir) / extract_folder
-    for path in pyspark_jars.glob("**/*.jar"):
-        jpype.addClassPath(path)
+    included_jar_names = set()
+    if pyspark_jars.exists():
+        for jar_path in pyspark_jars.glob(
+            "**/*.jar"
+        ):  # Use **/*.jar to handle nested paths in TCM
+            jpype.addClassPath(str(jar_path))
+            included_jar_names.add(jar_path.name)
+    # Load jar files from both packages, skipping those already loaded from includes/jars
+    jar_path_list = (
+        snowpark_connect_deps_1.list_jars() + snowpark_connect_deps_2.list_jars()
+    )
+    for jar_path in jar_path_list:
+        # Skip if this JAR was already loaded from includes/jars
+        if jar_path.name not in included_jar_names:
+            jpype.addClassPath(jar_path)
     # TODO: Should remove convertStrings, but it breaks the JDBC code.
     jvm_settings: list[str] = list(
@@ -1027,6 +1195,7 @@ def start_session(
     snowpark_session: Optional[snowpark.Session] = None,
     connection_parameters: Optional[Dict[str, str]] = None,
     max_grpc_message_size: int = _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE,
+    _add_signal_handler: bool = False,
 ) -> threading.Thread | None:
     """
     Starts Spark Connect server connected to Snowflake. No-op if the Server is already running.
@@ -1048,147 +1217,80 @@ def start_session(
         connection_parameters: A dictionary of connection parameters to use to create the Snowpark session. If this is
                                 provided, the `snowpark_session` parameter must be None.
     """
-    try:
-        # Changing the value of our global variable based on the grpc message size provided by the user.
-        global _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE
-        _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
+    # Increase recursion limit to 1100 (1000 by default)
+    # introduced due to Scala OSS Test: org.apache.spark.sql.ClientE2ETestSuite.spark deep recursion
+    sys.setrecursionlimit(1100)
-        from pyspark.sql.connect.client import ChannelBuilder
+    # Apply PySpark Connect client patching for enhanced debugging (only if telemetry is enabled)
+    from snowflake.snowpark_connect.utils.patch_spark_line_number import (
+        patch_pyspark_connect,
+    )
-        ChannelBuilder.MAX_MESSAGE_LENGTH = max_grpc_message_size
+    if is_telemetry_enabled():
+        patch_pyspark_connect()
-        if os.environ.get("SPARK_ENV_LOADED"):
-            raise RuntimeError(
-                "Snowpark Connect cannot be run inside of a Spark environment"
-            )
-        if connection_parameters is not None:
-            if snowpark_session is not None:
-                raise ValueError(
-                    "Only specify one of snowpark_session and connection_parameters"
-                )
-            snowpark_session = snowpark.Session.builder.configs(
-                connection_parameters
-            ).create()
+    try:
+        # Set max grpc message size if provided
+        if max_grpc_message_size is not None:
+            set_grpc_max_message_size(max_grpc_message_size)
-        global _server_running, _server_error
-        if _server_running.is_set():
+        # Validate startup parameters
+        snowpark_session = validate_startup_parameters(
+            snowpark_session, connection_parameters
+        )
+        server_running = get_server_running()
+        if server_running.is_set():
             url = get_client_url()
             logger.warning(f"Snowpark Connect session is already running at {url}")
             return
-        if len(list(filter(None, [remote_url, tcp_port, unix_domain_socket]))) > 1:
-            raise RuntimeError(
-                "Can only set at most one of remote_url, tcp_port, and unix_domain_socket"
-            )
-        url_from_env = os.environ.get("SPARK_REMOTE", None)
-        if remote_url:
-            _set_remote_url(remote_url)
-        elif tcp_port:
-            _set_server_tcp_port(tcp_port)
-        elif unix_domain_socket:
-            _set_server_unix_domain_socket(unix_domain_socket)
-        elif url_from_env:
-            # Spark clients use environment variable SPARK_REMOTE to figure out Spark Connect URL. If none of the
-            # connection properties (remote_url, tcp_port, unix_domain_socket) are explicitly passed in to this
-            # function then we should try and mimic clients' behavior
-            # i.e. read the server URL from the SPARK_REMOTE environment variable.
-            _set_remote_url(url_from_env)
-        else:
-            # No connection properties can be found at all - either as arguments to this function or int the environment
-            # variable. We use random, unique Unix Domain Socket as a last fallback. Client can connect to this randomly
-            # generated UDS port using snowpark_connect.get_session().
-            # Mostly used in stored procs and Notebooks to avoid port conflicts.
-            if os.name == "nt":
-                # Windows does not support unix domain sockets, so use default TCP port instead.
-                _set_server_tcp_port(DEFAULT_PORT)
-            else:
-                # Generate unique, random UDS port. Mostly useful in stored proc environment to avoid port conflicts.
-                unix_domain_socket = _make_unix_domain_socket()
-                _set_server_unix_domain_socket(unix_domain_socket)
+        configure_server_url(remote_url, tcp_port, unix_domain_socket)
         start_jvm()
         _disable_protobuf_recursion_limit()
+        otel_initialize()
+        if _add_signal_handler:
+            setup_signal_handlers(stop_event)
         if is_daemon:
             arguments = (stop_event, snowpark_session)
-            # `daemon=True` ensures the server thread exits when script finishes.
-            server_thread = threading.Thread(target=_serve, args=arguments, daemon=True)
-            server_thread.start()
-            _server_running.wait()
-            if _server_error:
-                raise RuntimeError("Snowpark Connect session failed to start")
-            return server_thread
-        else:
-            # Launch in the foreground.
-            _serve(session=snowpark_session)
-    except Exception as e:
-        _reset_server_run_state()
-        logger.error(e, exc_info=True)
-        raise e
+            target_func = otel_create_context_wrapper(_serve)
-def get_session(url: Optional[str] = None, conf: SparkConf = None) -> SparkSession:
-    """
-    Returns spark connect session
-    Parameters:
-        url (Optional[str]): Spark connect server URL. Uses default server URL if none is provided.
-    Returns:
-        A new spark connect session
-    Raises:
-        RuntimeError: If Spark Connect server is not started.
-    """
-    try:
-        if not url:
-            url = get_client_url()
+            server_thread = threading.Thread(
+                target=target_func, args=arguments, daemon=True
+            )
+            server_thread.start()
+            server_running.wait()
+            if get_server_error():
+                exception = RuntimeError("Snowpark Connect session failed to start")
+                attach_custom_error_code(
+                    exception, ErrorCodes.STARTUP_CONNECTION_FAILED
+                )
+                raise exception
-        if url.startswith("unix:/"):
-            b = SparkSession.builder.channelBuilder(UnixDomainSocketChannelBuilder())
+            return server_thread
         else:
-            b = SparkSession.builder.remote(url)
-        if conf is not None:
-            for k, v in conf.getAll():
-                b.config(k, v)
-        return b.getOrCreate()
+            # Launch in the foreground with stop_event
+            _serve(stop_event=stop_event, session=snowpark_session)
     except Exception as e:
         _reset_server_run_state()
         logger.error(e, exc_info=True)
+        attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
         raise e
 def init_spark_session(conf: SparkConf = None) -> SparkSession:
-    try:
-        # For Notebooks on SPCS
-        from jdk4py import JAVA_HOME
-        os.environ["JAVA_HOME"] = str(JAVA_HOME)
-    except ModuleNotFoundError:
-        # For notebooks on Warehouse
-        os.environ["JAVA_HOME"] = os.environ["CONDA_PREFIX"]
-        os.environ["JAVA_LD_LIBRARY_PATH"] = os.path.join(
-            os.environ["CONDA_PREFIX"], "lib", "server"
-        )
-    logger.info("JAVA_HOME=%s", os.environ["JAVA_HOME"])
+    _setup_spark_environment()
+    from snowflake.snowpark_connect.utils.session import _get_current_snowpark_session
-    os.environ["SPARK_LOCAL_HOSTNAME"] = "127.0.0.1"
-    os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
-    snowpark_session = snowpark.context.get_active_session()
+    snowpark_session = _get_current_snowpark_session()
     start_session(snowpark_session=snowpark_session)
     return get_session(conf=conf)
-def enable_debug_logging():
-    logger.setLevel(logging.DEBUG)
-    for handler in logger.handlers:
-        handler.setLevel(logging.DEBUG)
 def _get_files_metadata(data_source: relations_proto.Read.DataSource) -> List[str]:
     # TODO: Handle paths on the cloud
     paths = data_source.paths
@@ -1206,15 +1308,3 @@ def _get_files_metadata(data_source: relations_proto.Read.DataSource) -> List[st
                 ]
             )
     return files
-def _disable_protobuf_recursion_limit():
-    # https://github.com/protocolbuffers/protobuf/blob/960e79087b332583c80537c949621108a85aa442/src/google/protobuf/io/coded_stream.h#L616
-    # Disable protobuf recursion limit (default 100) because Spark workloads often produce deeply nested execution plans. For example:
-    # - Queries with many unions
-    # - Complex expressions with multiple levels of nesting
-    # Without this, legitimate Spark queries would fail with `(DecodeError) Error parsing message with type 'spark.connect.Relation'` error.
-    # see test_sql_resulting_in_nested_protobuf
-    from google.protobuf.pyext import cpp_message
-    cpp_message._message.SetAllowOversizeProtos(True)

snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

snowpark-connect 0.27.0py3-none-any.whl → 1.6.0py3-none-any.whl