snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +717 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +309 -26
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/error_utils.py +28 -0
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +224 -15
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +86 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
- snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
- snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +171 -48
- snowflake/snowpark_connect/server.py +528 -473
- snowflake/snowpark_connect/server_common/__init__.py +503 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/type_support.py +130 -0
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +195 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +192 -40
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -11,10 +11,11 @@ from abc import ABC, abstractmethod
|
|
|
11
11
|
from collections import defaultdict
|
|
12
12
|
from collections.abc import Iterable
|
|
13
13
|
from contextvars import ContextVar
|
|
14
|
+
from dataclasses import dataclass
|
|
14
15
|
from enum import Enum, unique
|
|
15
|
-
from typing import Dict
|
|
16
16
|
|
|
17
17
|
import google.protobuf.message
|
|
18
|
+
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
18
19
|
|
|
19
20
|
from snowflake.connector.cursor import SnowflakeCursor
|
|
20
21
|
from snowflake.connector.telemetry import (
|
|
@@ -26,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
|
|
|
26
27
|
from snowflake.snowpark import Session
|
|
27
28
|
from snowflake.snowpark._internal.utils import get_os_name, get_python_version
|
|
28
29
|
from snowflake.snowpark.version import VERSION as snowpark_version
|
|
30
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
29
31
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
30
32
|
from snowflake.snowpark_connect.version import VERSION as sas_version
|
|
31
33
|
|
|
@@ -56,22 +58,16 @@ class TelemetryType(Enum):
|
|
|
56
58
|
|
|
57
59
|
class EventType(Enum):
|
|
58
60
|
SERVER_STARTED = "scos_server_started"
|
|
61
|
+
WARNING = "scos_warning"
|
|
59
62
|
|
|
60
63
|
|
|
61
64
|
# global labels
|
|
62
|
-
|
|
65
|
+
DEFAULT_SOURCE = "SparkConnectForSnowpark"
|
|
63
66
|
SCOS_VERSION = ".".join([str(d) for d in sas_version if d is not None])
|
|
64
67
|
SNOWPARK_VERSION = ".".join([str(d) for d in snowpark_version if d is not None])
|
|
65
68
|
PYTHON_VERSION = get_python_version()
|
|
66
69
|
OS = get_os_name()
|
|
67
70
|
|
|
68
|
-
STATIC_TELEMETRY_DATA = {
|
|
69
|
-
TelemetryField.KEY_SOURCE.value: SOURCE,
|
|
70
|
-
TelemetryField.KEY_VERSION.value: SCOS_VERSION,
|
|
71
|
-
TelemetryField.KEY_SNOWPARK_VERSION.value: SNOWPARK_VERSION,
|
|
72
|
-
TelemetryField.KEY_PYTHON_VERSION.value: PYTHON_VERSION,
|
|
73
|
-
TelemetryField.KEY_OS.value: OS,
|
|
74
|
-
}
|
|
75
71
|
|
|
76
72
|
# list of config keys for which we record values, other config values are not recorded
|
|
77
73
|
RECORDED_CONFIG_KEYS = {
|
|
@@ -88,6 +84,7 @@ RECORDED_CONFIG_KEYS = {
|
|
|
88
84
|
"spark.sql.session.localRelationCacheThreshold",
|
|
89
85
|
"spark.sql.mapKeyDedupPolicy",
|
|
90
86
|
"snowpark.connect.sql.passthrough",
|
|
87
|
+
"snowpark.connect.cte.optimization_enabled",
|
|
91
88
|
"snowpark.connect.iceberg.external_volume",
|
|
92
89
|
"snowpark.connect.sql.identifiers.auto-uppercase",
|
|
93
90
|
"snowpark.connect.udtf.compatibility_mode",
|
|
@@ -104,11 +101,13 @@ REDACTED_PLAN_SUFFIXES = [
|
|
|
104
101
|
]
|
|
105
102
|
|
|
106
103
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
104
|
+
@dataclass
|
|
105
|
+
class TelemetryMessage:
|
|
106
|
+
"""Container for telemetry messages in the processing queue."""
|
|
107
|
+
|
|
108
|
+
message: dict
|
|
109
|
+
timestamp: int
|
|
110
|
+
is_warning: bool
|
|
112
111
|
|
|
113
112
|
|
|
114
113
|
def safe(func):
|
|
@@ -121,9 +120,11 @@ def safe(func):
|
|
|
121
120
|
def wrap(*args, **kwargs):
|
|
122
121
|
try:
|
|
123
122
|
func(*args, **kwargs)
|
|
124
|
-
except Exception:
|
|
125
|
-
#
|
|
126
|
-
|
|
123
|
+
except Exception as e:
|
|
124
|
+
# report failed operation to telemetry
|
|
125
|
+
telemetry.send_warning_msg(
|
|
126
|
+
f"Telemetry operation {func} failed due to exception", e
|
|
127
|
+
)
|
|
127
128
|
|
|
128
129
|
return wrap
|
|
129
130
|
|
|
@@ -238,6 +239,7 @@ class Telemetry:
|
|
|
238
239
|
self._is_enabled = is_enabled
|
|
239
240
|
self._is_initialized = False
|
|
240
241
|
self._lock = threading.Lock()
|
|
242
|
+
self._source = DEFAULT_SOURCE
|
|
241
243
|
|
|
242
244
|
# Async processing setup
|
|
243
245
|
self._message_queue = queue.Queue(maxsize=10000)
|
|
@@ -246,11 +248,32 @@ class Telemetry:
|
|
|
246
248
|
def __del__(self):
|
|
247
249
|
self.shutdown()
|
|
248
250
|
|
|
249
|
-
def
|
|
251
|
+
def _get_static_telemetry_data(self) -> dict:
|
|
252
|
+
"""Get static telemetry data with current configuration."""
|
|
253
|
+
return {
|
|
254
|
+
TelemetryField.KEY_SOURCE.value: self._source,
|
|
255
|
+
TelemetryField.KEY_VERSION.value: SCOS_VERSION,
|
|
256
|
+
TelemetryField.KEY_SNOWPARK_VERSION.value: SNOWPARK_VERSION,
|
|
257
|
+
TelemetryField.KEY_PYTHON_VERSION.value: PYTHON_VERSION,
|
|
258
|
+
TelemetryField.KEY_OS.value: OS,
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
def _basic_telemetry_data(self) -> dict:
|
|
262
|
+
return {
|
|
263
|
+
**self._get_static_telemetry_data(),
|
|
264
|
+
TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
def initialize(self, session: Session, source: str = None):
|
|
250
268
|
"""
|
|
251
269
|
Must be called after the session is created to initialize telemetry.
|
|
252
270
|
Gets the telemetry client from the session's connection and uses it
|
|
253
271
|
to report telemetry data.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
session: Snowpark Session to use for telemetry
|
|
275
|
+
source: Optional source identifier for telemetry (e.g., "SparkConnectThinClient").
|
|
276
|
+
Defaults to "SparkConnectForSnowpark".
|
|
254
277
|
"""
|
|
255
278
|
if not self._is_enabled:
|
|
256
279
|
return
|
|
@@ -261,12 +284,15 @@ class Telemetry:
|
|
|
261
284
|
return
|
|
262
285
|
self._is_initialized = True
|
|
263
286
|
|
|
264
|
-
|
|
265
|
-
|
|
287
|
+
if source is not None:
|
|
288
|
+
self._source = source
|
|
289
|
+
|
|
290
|
+
telemetry_client = getattr(session._conn._conn, "_telemetry", None)
|
|
291
|
+
if telemetry_client is None:
|
|
266
292
|
# no telemetry client available, so we export with queries
|
|
267
293
|
self._sink = QueryTelemetrySink(session)
|
|
268
294
|
else:
|
|
269
|
-
self._sink = ClientTelemetrySink(
|
|
295
|
+
self._sink = ClientTelemetrySink(telemetry_client)
|
|
270
296
|
|
|
271
297
|
self._start_worker_thread()
|
|
272
298
|
logger.info(f"Telemetry initialized with {type(self._sink)}")
|
|
@@ -289,10 +315,7 @@ class Telemetry:
|
|
|
289
315
|
|
|
290
316
|
self._request_summary.set(summary)
|
|
291
317
|
|
|
292
|
-
|
|
293
|
-
summary["query_plan"] = _protobuf_to_json_with_redaction(
|
|
294
|
-
request.plan, REDACTED_PLAN_SUFFIXES
|
|
295
|
-
)
|
|
318
|
+
_set_query_plan(request, summary)
|
|
296
319
|
|
|
297
320
|
def _not_in_request(self):
|
|
298
321
|
# we don't want to add things to the summary if it's not initialized
|
|
@@ -335,6 +358,11 @@ class Telemetry:
|
|
|
335
358
|
summary["error_message"] = str(e)
|
|
336
359
|
summary["error_type"] = type(e).__name__
|
|
337
360
|
|
|
361
|
+
if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
|
|
362
|
+
summary["error_code"] = ErrorCodes.INTERNAL_ERROR
|
|
363
|
+
else:
|
|
364
|
+
summary["error_code"] = e.custom_error_code
|
|
365
|
+
|
|
338
366
|
error_location = _error_location(e)
|
|
339
367
|
if error_location:
|
|
340
368
|
summary["error_location"] = error_location
|
|
@@ -426,6 +454,63 @@ class Telemetry:
|
|
|
426
454
|
|
|
427
455
|
summary["internal_queries"] += 1
|
|
428
456
|
|
|
457
|
+
@safe
|
|
458
|
+
def report_describe_query_cache_lookup(self):
|
|
459
|
+
"""Report a describe query cache lookup."""
|
|
460
|
+
if self._not_in_request():
|
|
461
|
+
return
|
|
462
|
+
|
|
463
|
+
summary = self._request_summary.get()
|
|
464
|
+
|
|
465
|
+
if "describe_cache_lookups" not in summary:
|
|
466
|
+
summary["describe_cache_lookups"] = 0
|
|
467
|
+
|
|
468
|
+
summary["describe_cache_lookups"] += 1
|
|
469
|
+
|
|
470
|
+
@safe
|
|
471
|
+
def report_describe_query_cache_hit(self):
|
|
472
|
+
"""Report a describe query cache hit."""
|
|
473
|
+
if self._not_in_request():
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
summary = self._request_summary.get()
|
|
477
|
+
|
|
478
|
+
if "describe_cache_hits" not in summary:
|
|
479
|
+
summary["describe_cache_hits"] = 0
|
|
480
|
+
|
|
481
|
+
summary["describe_cache_hits"] += 1
|
|
482
|
+
|
|
483
|
+
@safe
|
|
484
|
+
def report_describe_query_cache_expired(self, expired_by: float):
|
|
485
|
+
"""Report a describe query cache hit."""
|
|
486
|
+
if self._not_in_request():
|
|
487
|
+
return
|
|
488
|
+
|
|
489
|
+
summary = self._request_summary.get()
|
|
490
|
+
|
|
491
|
+
if "describe_cache_expired" not in summary:
|
|
492
|
+
summary["describe_cache_expired"] = 0
|
|
493
|
+
|
|
494
|
+
summary["describe_cache_expired"] += 1
|
|
495
|
+
|
|
496
|
+
if "describe_cache_expired_by" not in summary:
|
|
497
|
+
summary["describe_cache_expired_by"] = []
|
|
498
|
+
|
|
499
|
+
summary["describe_cache_expired_by"].append(expired_by)
|
|
500
|
+
|
|
501
|
+
@safe
|
|
502
|
+
def report_describe_query_cache_clear(self):
|
|
503
|
+
"""Report a describe query cache clear."""
|
|
504
|
+
if self._not_in_request():
|
|
505
|
+
return
|
|
506
|
+
|
|
507
|
+
summary = self._request_summary.get()
|
|
508
|
+
|
|
509
|
+
if "describe_cache_cleared" not in summary:
|
|
510
|
+
summary["describe_cache_cleared"] = 0
|
|
511
|
+
|
|
512
|
+
summary["describe_cache_cleared"] += 1
|
|
513
|
+
|
|
429
514
|
@safe
|
|
430
515
|
def report_udf_usage(self, udf_name: str):
|
|
431
516
|
if self._not_in_request():
|
|
@@ -460,7 +545,7 @@ class Telemetry:
|
|
|
460
545
|
@safe
|
|
461
546
|
def send_server_started_telemetry(self):
|
|
462
547
|
message = {
|
|
463
|
-
**_basic_telemetry_data(),
|
|
548
|
+
**self._basic_telemetry_data(),
|
|
464
549
|
TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
|
|
465
550
|
TelemetryType.EVENT_TYPE.value: EventType.SERVER_STARTED.value,
|
|
466
551
|
TelemetryField.KEY_DATA.value: {
|
|
@@ -472,27 +557,69 @@ class Telemetry:
|
|
|
472
557
|
@safe
|
|
473
558
|
def send_request_summary_telemetry(self):
|
|
474
559
|
if self._not_in_request():
|
|
475
|
-
|
|
476
|
-
"
|
|
560
|
+
self.send_warning_msg(
|
|
561
|
+
"Trying to send request summary telemetry without initializing it"
|
|
477
562
|
)
|
|
478
563
|
return
|
|
479
564
|
|
|
480
565
|
summary = self._request_summary.get()
|
|
481
566
|
message = {
|
|
482
|
-
**_basic_telemetry_data(),
|
|
567
|
+
**self._basic_telemetry_data(),
|
|
483
568
|
TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_REQUEST_SUMMARY.value,
|
|
484
569
|
TelemetryField.KEY_DATA.value: summary,
|
|
485
570
|
}
|
|
486
571
|
self._send(message)
|
|
487
572
|
|
|
488
|
-
def
|
|
573
|
+
def send_warning_msg(self, msg: str, e: Exception = None) -> None:
|
|
574
|
+
# using this within @safe decorator may result in recursive loop
|
|
575
|
+
try:
|
|
576
|
+
message = self._build_warning_message(msg, e)
|
|
577
|
+
if not message:
|
|
578
|
+
return
|
|
579
|
+
|
|
580
|
+
self._send(message, is_warning=True)
|
|
581
|
+
except Exception:
|
|
582
|
+
# if there's an exception here, there's nothing we can really do about it
|
|
583
|
+
pass
|
|
584
|
+
|
|
585
|
+
def _build_warning_message(self, warning_msg: str, e: Exception = None) -> dict:
|
|
586
|
+
try:
|
|
587
|
+
data = {"warning_message": warning_msg}
|
|
588
|
+
if e is not None:
|
|
589
|
+
data["exception"] = repr(e)
|
|
590
|
+
|
|
591
|
+
# add session and operation id if available
|
|
592
|
+
spark_session_id = self._request_summary.get().get("spark_session_id", None)
|
|
593
|
+
if spark_session_id is not None:
|
|
594
|
+
data["spark_session_id"] = spark_session_id
|
|
595
|
+
|
|
596
|
+
spark_operation_id = self._request_summary.get().get(
|
|
597
|
+
"spark_operation_id", None
|
|
598
|
+
)
|
|
599
|
+
if spark_operation_id is not None:
|
|
600
|
+
data["spark_operation_id"] = spark_operation_id
|
|
601
|
+
|
|
602
|
+
message = {
|
|
603
|
+
**self._basic_telemetry_data(),
|
|
604
|
+
TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
|
|
605
|
+
TelemetryType.EVENT_TYPE.value: EventType.WARNING.value,
|
|
606
|
+
TelemetryField.KEY_DATA.value: data,
|
|
607
|
+
}
|
|
608
|
+
return message
|
|
609
|
+
except Exception:
|
|
610
|
+
return {}
|
|
611
|
+
|
|
612
|
+
def _send(self, msg: dict, is_warning: bool = False) -> None:
|
|
489
613
|
"""Queue a telemetry message for asynchronous processing."""
|
|
490
614
|
if not self._is_enabled:
|
|
491
615
|
return
|
|
492
616
|
|
|
493
617
|
timestamp = get_time_millis()
|
|
494
618
|
try:
|
|
495
|
-
|
|
619
|
+
telemetry_msg = TelemetryMessage(
|
|
620
|
+
message=msg, timestamp=timestamp, is_warning=is_warning
|
|
621
|
+
)
|
|
622
|
+
self._message_queue.put_nowait(telemetry_msg)
|
|
496
623
|
except queue.Full:
|
|
497
624
|
# If queue is full, drop the message to avoid blocking
|
|
498
625
|
logger.warning("Telemetry queue is full, dropping message")
|
|
@@ -510,13 +637,16 @@ class Telemetry:
|
|
|
510
637
|
while True:
|
|
511
638
|
try:
|
|
512
639
|
# block to allow the GIL to switch threads
|
|
513
|
-
|
|
514
|
-
if
|
|
515
|
-
# shutdown
|
|
640
|
+
telemetry_msg = self._message_queue.get()
|
|
641
|
+
if telemetry_msg is None:
|
|
642
|
+
# shutdown signal
|
|
516
643
|
break
|
|
517
|
-
self._sink.add_telemetry_data(
|
|
518
|
-
|
|
519
|
-
|
|
644
|
+
self._sink.add_telemetry_data(
|
|
645
|
+
telemetry_msg.message, telemetry_msg.timestamp
|
|
646
|
+
)
|
|
647
|
+
except Exception as e:
|
|
648
|
+
if not telemetry_msg.is_warning:
|
|
649
|
+
self.send_warning_msg("Failed to add telemetry message to sink", e)
|
|
520
650
|
finally:
|
|
521
651
|
self._message_queue.task_done()
|
|
522
652
|
|
|
@@ -529,7 +659,7 @@ class Telemetry:
|
|
|
529
659
|
return
|
|
530
660
|
|
|
531
661
|
try:
|
|
532
|
-
self._message_queue.put_nowait(
|
|
662
|
+
self._message_queue.put_nowait(None)
|
|
533
663
|
# Wait for worker thread to finish
|
|
534
664
|
self._worker_thread.join(timeout=3.0)
|
|
535
665
|
except Exception:
|
|
@@ -538,7 +668,7 @@ class Telemetry:
|
|
|
538
668
|
)
|
|
539
669
|
|
|
540
670
|
|
|
541
|
-
def _error_location(e: Exception) ->
|
|
671
|
+
def _error_location(e: Exception) -> dict | None:
|
|
542
672
|
"""
|
|
543
673
|
Inspect the exception traceback and extract the file name, line number, and function name
|
|
544
674
|
from the last frame (the one that raised the exception).
|
|
@@ -619,7 +749,7 @@ def _protobuf_to_json_with_redaction(
|
|
|
619
749
|
"""Recursively convert protobuf message to dict"""
|
|
620
750
|
|
|
621
751
|
if not isinstance(msg, google.protobuf.message.Message):
|
|
622
|
-
|
|
752
|
+
telemetry.send_warning_msg(f"Expected a protobuf message, got: {type(msg)}")
|
|
623
753
|
return {}
|
|
624
754
|
|
|
625
755
|
result = {}
|
|
@@ -644,6 +774,28 @@ def _protobuf_to_json_with_redaction(
|
|
|
644
774
|
)
|
|
645
775
|
|
|
646
776
|
|
|
777
|
+
def _set_query_plan(request: google.protobuf.message.Message, summary: dict) -> None:
|
|
778
|
+
if isinstance(request, proto_base.ExecutePlanRequest):
|
|
779
|
+
# ExecutePlanRequest has plan at top level
|
|
780
|
+
if hasattr(request, "plan"):
|
|
781
|
+
summary["query_plan"] = (
|
|
782
|
+
_protobuf_to_json_with_redaction(request.plan, REDACTED_PLAN_SUFFIXES),
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
elif isinstance(request, proto_base.AnalyzePlanRequest):
|
|
786
|
+
# AnalyzePlanRequest has plan under oneof analyze
|
|
787
|
+
analyze_type = request.WhichOneof("analyze")
|
|
788
|
+
if not analyze_type:
|
|
789
|
+
return
|
|
790
|
+
|
|
791
|
+
summary["analyze_type"] = analyze_type
|
|
792
|
+
analyze_field = getattr(request, analyze_type)
|
|
793
|
+
if hasattr(analyze_field, "plan"):
|
|
794
|
+
summary["query_plan"] = _protobuf_to_json_with_redaction(
|
|
795
|
+
analyze_field.plan, REDACTED_PLAN_SUFFIXES
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
|
|
647
799
|
# global telemetry client
|
|
648
800
|
telemetry = Telemetry(is_enabled="SNOWPARK_CONNECT_DISABLE_TELEMETRY" not in os.environ)
|
|
649
801
|
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from pyspark.errors import AnalysisException
|
|
8
|
+
|
|
9
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
10
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
11
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
12
|
+
from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
13
|
+
from snowflake.snowpark_connect.utils.context import get_spark_session_id
|
|
14
|
+
|
|
15
|
+
_temp_views = SynchronizedDict[Tuple[str, str], DataFrameContainer]()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None:
|
|
19
|
+
normalized_name = _normalize(name)
|
|
20
|
+
current_session_id = get_spark_session_id()
|
|
21
|
+
for key in list(_temp_views.keys()):
|
|
22
|
+
if _normalize(key[0]) == normalized_name and key[1] == current_session_id:
|
|
23
|
+
if replace:
|
|
24
|
+
_temp_views.remove(key)
|
|
25
|
+
break
|
|
26
|
+
else:
|
|
27
|
+
exception = AnalysisException(
|
|
28
|
+
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
|
|
29
|
+
)
|
|
30
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
31
|
+
raise exception
|
|
32
|
+
|
|
33
|
+
_temp_views[(name, current_session_id)] = df
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def unregister_temp_view(name: str) -> bool:
|
|
37
|
+
normalized_name = _normalize(name)
|
|
38
|
+
|
|
39
|
+
for key in _temp_views.keys():
|
|
40
|
+
normalized_key = _normalize(key[0])
|
|
41
|
+
if normalized_name == normalized_key and key[1] == get_spark_session_id():
|
|
42
|
+
pop_result = _temp_views.remove(key)
|
|
43
|
+
return pop_result is not None
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_temp_view(name: str) -> Optional[DataFrameContainer]:
|
|
48
|
+
normalized_name = _normalize(name)
|
|
49
|
+
for key in _temp_views.keys():
|
|
50
|
+
normalized_key = _normalize(key[0])
|
|
51
|
+
if normalized_name == normalized_key and key[1] == get_spark_session_id():
|
|
52
|
+
return _temp_views.get(key)
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_temp_view_normalized_names() -> list[str]:
|
|
57
|
+
return [
|
|
58
|
+
_normalize(key[0])
|
|
59
|
+
for key in _temp_views.keys()
|
|
60
|
+
if key[1] == get_spark_session_id()
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _normalize(name: str) -> str:
|
|
65
|
+
from snowflake.snowpark_connect.config import global_config
|
|
66
|
+
|
|
67
|
+
return name if global_config.spark_sql_caseSensitive else name.lower()
|