snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +717 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +309 -26
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/error_utils.py +28 -0
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +224 -15
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +86 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
- snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
- snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +171 -48
- snowflake/snowpark_connect/server.py +528 -473
- snowflake/snowpark_connect/server_common/__init__.py +503 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/type_support.py +130 -0
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +195 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +192 -40
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Utility for patching PySpark Connect client to inject stack trace debugging information
|
|
7
|
+
into gRPC requests for better error reporting and debugging.
|
|
8
|
+
|
|
9
|
+
Compatible with PySpark 3.5.3 - uses StringValue and inspect module for clean stack traces.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import inspect
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
from collections import namedtuple
|
|
16
|
+
from typing import Callable, List, Optional
|
|
17
|
+
|
|
18
|
+
import pyspark
|
|
19
|
+
from google.protobuf.any_pb2 import Any
|
|
20
|
+
from google.protobuf.wrappers_pb2 import StringValue
|
|
21
|
+
from pyspark.sql.connect.client import SparkConnectClient
|
|
22
|
+
|
|
23
|
+
CallSite = namedtuple("CallSite", "function file linenum")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def retrieve_stack_frames() -> Optional[List[CallSite]]:
|
|
27
|
+
"""
|
|
28
|
+
Return a list of CallSites representing the relevant user code stack frames.
|
|
29
|
+
|
|
30
|
+
Uses inspect module to get clean stack information, filtering out internal
|
|
31
|
+
PySpark and library code to focus on user code.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of CallSite objects representing user code frames, or None if no frames found.
|
|
35
|
+
"""
|
|
36
|
+
# Get current stack frames
|
|
37
|
+
stack = inspect.stack()
|
|
38
|
+
if not stack:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
# Paths to filter out (internal code)
|
|
42
|
+
pyspark_path = os.path.dirname(pyspark.__file__)
|
|
43
|
+
current_file = __file__ # This patch file itself
|
|
44
|
+
|
|
45
|
+
user_frames = []
|
|
46
|
+
|
|
47
|
+
# Skip the first few frames (this function, the wrapper, etc.) and look for user code
|
|
48
|
+
# Start from frame 3 if available, otherwise start from the beginning
|
|
49
|
+
start_frame = min(3, len(stack))
|
|
50
|
+
for frame_info in stack[start_frame:]:
|
|
51
|
+
filename = frame_info.filename
|
|
52
|
+
|
|
53
|
+
# Skip internal PySpark code and this patch file
|
|
54
|
+
if (
|
|
55
|
+
filename.startswith(pyspark_path)
|
|
56
|
+
or filename == current_file
|
|
57
|
+
or "site-packages" in filename
|
|
58
|
+
):
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
# This looks like user code
|
|
62
|
+
user_frames.append(
|
|
63
|
+
CallSite(
|
|
64
|
+
function=frame_info.function, file=filename, linenum=frame_info.lineno
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Limit to reasonable number of frames
|
|
69
|
+
if len(user_frames) >= 5:
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
return user_frames if user_frames else None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def exec_with_debug_info(orig_fn: Callable) -> Callable:
|
|
76
|
+
"""
|
|
77
|
+
A closure to inject debug information into gRPC requests made to the server.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
orig_fn: The original PySpark Connect function to wrap.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
The modified PySpark Connect function with debug information injection.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def patched_fn(*args, **kwargs):
|
|
87
|
+
"""
|
|
88
|
+
Retrieve the original request object created by PySpark and add debug information.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
*args: Arguments to be used with the original function.
|
|
92
|
+
**kwargs: Keyword arguments to be used with the original function.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
The request with debug information attached.
|
|
96
|
+
"""
|
|
97
|
+
req = orig_fn(*args, **kwargs)
|
|
98
|
+
stack_frames = retrieve_stack_frames()
|
|
99
|
+
|
|
100
|
+
if stack_frames is not None:
|
|
101
|
+
# Add stack trace information as a JSON string for Spark 3.5.3 compatibility
|
|
102
|
+
stack_trace_data = []
|
|
103
|
+
for call_site in stack_frames:
|
|
104
|
+
stack_trace_data.append(
|
|
105
|
+
{
|
|
106
|
+
"method_name": call_site.function,
|
|
107
|
+
"file_name": call_site.file,
|
|
108
|
+
"line_number": call_site.linenum,
|
|
109
|
+
}
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Create a StringValue containing JSON-encoded stack trace
|
|
113
|
+
stack_trace_json = json.dumps(stack_trace_data)
|
|
114
|
+
string_value = StringValue(value=stack_trace_json)
|
|
115
|
+
|
|
116
|
+
# Pack the debug information into an Any object and append to request
|
|
117
|
+
any_obj = Any()
|
|
118
|
+
any_obj.Pack(string_value)
|
|
119
|
+
req.user_context.extensions.append(any_obj)
|
|
120
|
+
else:
|
|
121
|
+
# No debug information available, create an empty stack trace
|
|
122
|
+
empty_stack_trace = json.dumps([])
|
|
123
|
+
string_value = StringValue(value=empty_stack_trace)
|
|
124
|
+
|
|
125
|
+
# Pack the empty debug information into an Any object
|
|
126
|
+
any_obj = Any()
|
|
127
|
+
any_obj.Pack(string_value)
|
|
128
|
+
req.user_context.extensions.append(any_obj)
|
|
129
|
+
|
|
130
|
+
return req
|
|
131
|
+
|
|
132
|
+
return patched_fn
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def extract_stack_trace_from_extensions(extensions) -> List[dict]:
|
|
136
|
+
"""
|
|
137
|
+
Extract stack trace information from user_context.extensions on the server side.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
extensions: The extensions field from request.user_context.extensions
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
List of dictionaries containing stack trace information, or empty list if none found.
|
|
144
|
+
Each dictionary contains: method_name, file_name, line_number
|
|
145
|
+
"""
|
|
146
|
+
for extension in extensions:
|
|
147
|
+
if extension.Is(StringValue.DESCRIPTOR):
|
|
148
|
+
string_value = StringValue()
|
|
149
|
+
extension.Unpack(string_value)
|
|
150
|
+
try:
|
|
151
|
+
stack_trace_data = json.loads(string_value.value)
|
|
152
|
+
if isinstance(stack_trace_data, list):
|
|
153
|
+
return stack_trace_data
|
|
154
|
+
except (json.JSONDecodeError, ValueError):
|
|
155
|
+
continue
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def patch_pyspark_connect() -> None:
|
|
160
|
+
"""
|
|
161
|
+
Patch the PySpark Connect client functions to include debug information in gRPC requests.
|
|
162
|
+
|
|
163
|
+
This monkey-patches key SparkConnectClient methods to automatically inject
|
|
164
|
+
stack trace information into all requests sent to the Spark server.
|
|
165
|
+
|
|
166
|
+
Compatible with PySpark 3.5.3 - uses JSON-encoded StringValue for stack trace data.
|
|
167
|
+
"""
|
|
168
|
+
# Patch core request methods to include debug information
|
|
169
|
+
SparkConnectClient._execute_plan_request_with_metadata = exec_with_debug_info(
|
|
170
|
+
SparkConnectClient._execute_plan_request_with_metadata
|
|
171
|
+
)
|
|
172
|
+
SparkConnectClient._analyze_plan_request_with_metadata = exec_with_debug_info(
|
|
173
|
+
SparkConnectClient._analyze_plan_request_with_metadata
|
|
174
|
+
)
|
|
175
|
+
SparkConnectClient._config_request_with_metadata = exec_with_debug_info(
|
|
176
|
+
SparkConnectClient._config_request_with_metadata
|
|
177
|
+
)
|
|
178
|
+
# Patch interrupt request as well (usage uncertain, but included for completeness)
|
|
179
|
+
SparkConnectClient._interrupt_request = exec_with_debug_info(
|
|
180
|
+
SparkConnectClient._interrupt_request
|
|
181
|
+
)
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import cProfile
|
|
12
12
|
import functools
|
|
13
|
+
import inspect
|
|
13
14
|
import os
|
|
14
15
|
from datetime import datetime
|
|
15
16
|
from typing import Any, Callable
|
|
@@ -35,13 +36,29 @@ def profile_method(method: Callable) -> Callable:
|
|
|
35
36
|
profile_filename = f"{PROFILE_OUTPUT_DIR}/{method_name}_{timestamp}.prof"
|
|
36
37
|
|
|
37
38
|
profiler = cProfile.Profile()
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
|
|
40
|
+
if inspect.isgeneratorfunction(method):
|
|
41
|
+
|
|
42
|
+
def profiled_generator():
|
|
43
|
+
profiler.enable()
|
|
44
|
+
try:
|
|
45
|
+
generator = method(*args, **kwargs)
|
|
46
|
+
for item in generator:
|
|
47
|
+
profiler.disable()
|
|
48
|
+
yield item
|
|
49
|
+
profiler.enable()
|
|
50
|
+
finally:
|
|
51
|
+
profiler.disable()
|
|
52
|
+
profiler.dump_stats(profile_filename)
|
|
53
|
+
|
|
54
|
+
return profiled_generator()
|
|
55
|
+
else:
|
|
56
|
+
profiler.enable()
|
|
57
|
+
try:
|
|
58
|
+
result = method(*args, **kwargs)
|
|
59
|
+
return result
|
|
60
|
+
finally:
|
|
61
|
+
profiler.disable()
|
|
62
|
+
profiler.dump_stats(profile_filename)
|
|
46
63
|
|
|
47
64
|
return wrapper
|