snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +680 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +237 -23
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +123 -5
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +85 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +110 -48
- snowflake/snowpark_connect/server.py +546 -456
- snowflake/snowpark_connect/server_common/__init__.py +500 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +187 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +163 -22
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -4,13 +4,17 @@
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
|
+
import threading
|
|
7
8
|
from collections.abc import Sequence
|
|
8
9
|
from typing import Any
|
|
9
10
|
|
|
10
11
|
from snowflake import snowpark
|
|
11
|
-
from snowflake.
|
|
12
|
+
from snowflake.connector.description import PLATFORM
|
|
13
|
+
from snowflake.snowpark.exceptions import SnowparkClientException
|
|
12
14
|
from snowflake.snowpark.session import _get_active_session
|
|
13
15
|
from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
|
|
16
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
17
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
18
|
from snowflake.snowpark_connect.utils.describe_query_cache import (
|
|
15
19
|
instrument_session_for_describe_cache,
|
|
16
20
|
)
|
|
@@ -21,6 +25,13 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
|
21
25
|
from snowflake.snowpark_connect.utils.telemetry import telemetry
|
|
22
26
|
from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
|
|
23
27
|
|
|
28
|
+
SKIP_SESSION_CONFIGURATION = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def skip_session_configuration(skip: bool):
|
|
32
|
+
global SKIP_SESSION_CONFIGURATION
|
|
33
|
+
SKIP_SESSION_CONFIGURATION = skip
|
|
34
|
+
|
|
24
35
|
|
|
25
36
|
# Suppress experimental warnings from snowflake.snowpark logger
|
|
26
37
|
def _filter_experimental_warnings(record):
|
|
@@ -50,7 +61,12 @@ def _get_current_snowpark_session() -> snowpark.Session | None:
|
|
|
50
61
|
|
|
51
62
|
def configure_snowpark_session(session: snowpark.Session):
|
|
52
63
|
"""Configure a snowpark session with required parameters and settings."""
|
|
53
|
-
from snowflake.snowpark_connect.config import
|
|
64
|
+
from snowflake.snowpark_connect.config import (
|
|
65
|
+
get_cte_optimization_enabled,
|
|
66
|
+
global_config,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
global SKIP_SESSION_CONFIGURATION
|
|
54
70
|
|
|
55
71
|
logger.info(f"Configuring session {session}")
|
|
56
72
|
|
|
@@ -63,11 +79,28 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
63
79
|
# custom udf imports
|
|
64
80
|
session._python_files = set()
|
|
65
81
|
session._import_files = set()
|
|
82
|
+
session._artifact_jars = set()
|
|
83
|
+
|
|
84
|
+
# custom artifact attributes
|
|
85
|
+
# track current chunk
|
|
86
|
+
# key: session_id, value: dict of (name, num_chunks, current_chunk_index)
|
|
87
|
+
session._current_chunk: dict[str, dict] = {}
|
|
88
|
+
# Use thread-safe access when modifying current chunk dictionary
|
|
89
|
+
session._current_chunk_lock = threading.RLock()
|
|
90
|
+
|
|
91
|
+
# track filenames to be uploaded to stage
|
|
92
|
+
# key: session_id, value: dict of (name, filename)
|
|
93
|
+
session._filenames: dict[str, dict[str, str]] = {}
|
|
94
|
+
# Use thread-safe access when modifying filenames dictionary
|
|
95
|
+
session._filenames_lock = threading.RLock()
|
|
66
96
|
|
|
67
97
|
# built-in udf cache
|
|
68
98
|
init_builtin_udf_cache(session)
|
|
69
99
|
init_external_udxf_cache(session)
|
|
70
100
|
|
|
101
|
+
# file format cache
|
|
102
|
+
session._file_formats = set()
|
|
103
|
+
|
|
71
104
|
# Set experimental parameters (warnings globally suppressed)
|
|
72
105
|
session.ast_enabled = False
|
|
73
106
|
session.eliminate_numeric_sql_value_cast_enabled = False
|
|
@@ -77,6 +110,18 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
77
110
|
session.connection.arrow_number_to_decimal_setter = True
|
|
78
111
|
session.custom_package_usage_config["enabled"] = True
|
|
79
112
|
|
|
113
|
+
# Scoped temp objects may not be accessible in stored procedure and cause "object does not exist" error. So disable
|
|
114
|
+
# _use_scoped_temp_objects here and use temp table instead.
|
|
115
|
+
session._use_scoped_temp_objects = False
|
|
116
|
+
|
|
117
|
+
# Configure CTE optimization based on session configuration
|
|
118
|
+
cte_optimization_enabled = get_cte_optimization_enabled()
|
|
119
|
+
session.cte_optimization_enabled = cte_optimization_enabled
|
|
120
|
+
logger.info(f"CTE optimization enabled: {cte_optimization_enabled}")
|
|
121
|
+
|
|
122
|
+
# Default query tag to be used unless overridden by user using AppName or spark.addTag()
|
|
123
|
+
query_tag = "SNOWPARK_CONNECT_QUERY"
|
|
124
|
+
|
|
80
125
|
default_fallback_timezone = "UTC"
|
|
81
126
|
if global_config.spark_sql_session_timeZone is None:
|
|
82
127
|
try:
|
|
@@ -103,35 +148,20 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
103
148
|
"TIMEZONE": f"'{global_config.spark_sql_session_timeZone}'",
|
|
104
149
|
"QUOTED_IDENTIFIERS_IGNORE_CASE": "false",
|
|
105
150
|
"PYTHON_SNOWPARK_ENABLE_THREAD_SAFE_SESSION": "true",
|
|
106
|
-
"
|
|
151
|
+
"ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
|
|
152
|
+
"QUERY_TAG": f"'{query_tag}'",
|
|
107
153
|
}
|
|
108
154
|
|
|
109
|
-
session.
|
|
110
|
-
|
|
111
|
-
).collect()
|
|
112
|
-
|
|
113
|
-
# Rolling ahead in preparation of GS release 9.22 (ETA 8/5/2025). Once 9.22 is past rollback risk, merge this
|
|
114
|
-
# parameter with other in the session_params dictionary above
|
|
115
|
-
try:
|
|
155
|
+
# SNOW-2245971: Stored procedures inside Native Apps run as Execute As Owner and hence cannot set session params.
|
|
156
|
+
if not SKIP_SESSION_CONFIGURATION:
|
|
116
157
|
session.sql(
|
|
117
|
-
"ALTER SESSION SET
|
|
158
|
+
f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
|
|
118
159
|
).collect()
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
160
|
+
else:
|
|
161
|
+
session_param_names = ", ".join(session_params.keys())
|
|
162
|
+
logger.info(
|
|
163
|
+
f"Skipping Snowpark Connect session configuration as requested. Please make sure following session parameters are set correctly: {session_param_names}"
|
|
122
164
|
)
|
|
123
|
-
try:
|
|
124
|
-
session.sql(
|
|
125
|
-
"ALTER SESSION SET ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT=true"
|
|
126
|
-
).collect()
|
|
127
|
-
except SnowparkSQLException:
|
|
128
|
-
logger.debug("ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT is not defined")
|
|
129
|
-
try:
|
|
130
|
-
session.sql(
|
|
131
|
-
"ALTER SESSION SET ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE=true"
|
|
132
|
-
).collect()
|
|
133
|
-
except SnowparkSQLException:
|
|
134
|
-
logger.debug("ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE is not defined")
|
|
135
165
|
|
|
136
166
|
# Instrument the snowpark session to use a cache for describe queries.
|
|
137
167
|
instrument_session_for_describe_cache(session)
|
|
@@ -145,6 +175,10 @@ def _is_running_in_SPCS():
|
|
|
145
175
|
)
|
|
146
176
|
|
|
147
177
|
|
|
178
|
+
def _is_running_in_stored_procedure_or_notebook():
|
|
179
|
+
return PLATFORM == "XP"
|
|
180
|
+
|
|
181
|
+
|
|
148
182
|
def _get_session_configs_from_ENV() -> dict[str, Any]:
|
|
149
183
|
session_configs = {
|
|
150
184
|
"account": os.getenv("SNOWFLAKE_ACCOUNT"),
|
|
@@ -198,11 +232,13 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
|
|
|
198
232
|
"""Sets Snowpark session query_tag value to the tag from the Spark request."""
|
|
199
233
|
|
|
200
234
|
if any("," in tag for tag in spark_tags):
|
|
201
|
-
|
|
235
|
+
exception = ValueError("Tags cannot contain ','.")
|
|
236
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
237
|
+
raise exception
|
|
202
238
|
|
|
203
239
|
# TODO: Tags might not be set correctly in parallel workloads or multi-threaded code.
|
|
204
240
|
snowpark_session = get_or_create_snowpark_session()
|
|
205
241
|
spark_tags_str = ",".join(sorted(spark_tags)) if spark_tags else None
|
|
206
242
|
|
|
207
|
-
if spark_tags_str != snowpark_session.query_tag:
|
|
243
|
+
if spark_tags_str and spark_tags_str != snowpark_session.query_tag:
|
|
208
244
|
snowpark_session.query_tag = spark_tags_str
|
|
@@ -6,17 +6,59 @@ import logging
|
|
|
6
6
|
|
|
7
7
|
from pyspark import StorageLevel
|
|
8
8
|
|
|
9
|
-
logger = logging.getLogger("snowflake_connect_server")
|
|
10
|
-
logger.setLevel(logging.WARN)
|
|
11
9
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
"
|
|
10
|
+
def ensure_logger_has_handler(
|
|
11
|
+
logger_name: str, log_level: int = logging.INFO, force_level: bool = False
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Ensure a logger has a StreamHandler, add one if missing.
|
|
15
|
+
Checks both the specific logger and root logger for existing handlers.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
logger_name: Name of the logger to configure
|
|
19
|
+
log_level: Log level to set on both logger and handler
|
|
20
|
+
force_level: If True, always set the log level. If False, only set if logger level is NOTSET
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The configured logger
|
|
24
|
+
"""
|
|
25
|
+
target_logger = logging.getLogger(logger_name)
|
|
26
|
+
|
|
27
|
+
# Only set level if forced or if logger hasn't been configured yet
|
|
28
|
+
if force_level or target_logger.level == logging.NOTSET:
|
|
29
|
+
target_logger.setLevel(log_level)
|
|
30
|
+
else:
|
|
31
|
+
log_level = target_logger.level
|
|
32
|
+
|
|
33
|
+
# Check if the logger already has a StreamHandler
|
|
34
|
+
has_stream_handler = any(
|
|
35
|
+
isinstance(h, logging.StreamHandler) for h in target_logger.handlers
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Check if root logger has handlers (from basicConfig or manual setup)
|
|
39
|
+
root_logger = logging.getLogger()
|
|
40
|
+
has_root_handlers = len(root_logger.handlers) > 0
|
|
41
|
+
|
|
42
|
+
# Only add handler if:
|
|
43
|
+
# 1. Logger doesn't have its own StreamHandler AND
|
|
44
|
+
# 2. Root logger doesn't have handlers (to avoid duplication)
|
|
45
|
+
if not has_stream_handler and not has_root_handlers:
|
|
46
|
+
handler = logging.StreamHandler()
|
|
47
|
+
handler.setLevel(log_level)
|
|
48
|
+
formatter = logging.Formatter(
|
|
49
|
+
"%(asctime)s - %(name)s - %(levelname)s - [Thread %(thread)d] - %(message)s"
|
|
50
|
+
)
|
|
51
|
+
handler.setFormatter(formatter)
|
|
52
|
+
target_logger.addHandler(handler)
|
|
53
|
+
|
|
54
|
+
return target_logger
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Initialize the main logger using the helper function
|
|
58
|
+
# force_level=False means it will respect any existing log level configuration
|
|
59
|
+
logger = ensure_logger_has_handler(
|
|
60
|
+
"snowflake_connect_server", logging.INFO, force_level=False
|
|
16
61
|
)
|
|
17
|
-
console_handler.setFormatter(formatter)
|
|
18
|
-
# Display the logs to the console
|
|
19
|
-
logger.addHandler(console_handler)
|
|
20
62
|
|
|
21
63
|
|
|
22
64
|
def run_once_decorator(func):
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
SPCS Logger - Adapted from ExecPlatform/src/coprocessor/python/telemetry/py/logger.py
|
|
7
|
+
Outputs flat JSON format compatible with SPCS OpenTelemetry collector with proper trace context.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
import traceback
|
|
14
|
+
from typing import Any, Mapping, Sequence
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SPCSLoggerConfig:
|
|
18
|
+
"""Configuration for SPCS logger."""
|
|
19
|
+
|
|
20
|
+
MESSAGE_SIZE_LIMIT_BYTES = 524288 # 512KB
|
|
21
|
+
ELLIPSIS = "..."
|
|
22
|
+
|
|
23
|
+
# Set to True if initialized
|
|
24
|
+
is_initialized = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _encode_value_simple(value: Any) -> Any:
|
|
28
|
+
"""
|
|
29
|
+
Encode a value to simple JSON format (not OpenTelemetry nested format).
|
|
30
|
+
SPCS expects flat JSON values, not the {stringValue: ...} format.
|
|
31
|
+
"""
|
|
32
|
+
if isinstance(value, (bool, str, int, float)):
|
|
33
|
+
return value
|
|
34
|
+
if isinstance(value, Sequence) and not isinstance(value, str):
|
|
35
|
+
return [_encode_value_simple(v) for v in value]
|
|
36
|
+
if isinstance(value, Mapping):
|
|
37
|
+
return {str(k): _encode_value_simple(v) for k, v in value.items()}
|
|
38
|
+
# Stringify anything else
|
|
39
|
+
return str(value)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Skip Python's built-in LogRecord attributes
|
|
43
|
+
_RESERVED_ATTRS = frozenset(
|
|
44
|
+
(
|
|
45
|
+
"asctime",
|
|
46
|
+
"args",
|
|
47
|
+
"created",
|
|
48
|
+
"exc_info",
|
|
49
|
+
"exc_text",
|
|
50
|
+
"filename",
|
|
51
|
+
"funcName",
|
|
52
|
+
"message",
|
|
53
|
+
"levelname",
|
|
54
|
+
"levelno",
|
|
55
|
+
"lineno",
|
|
56
|
+
"module",
|
|
57
|
+
"msecs",
|
|
58
|
+
"msg",
|
|
59
|
+
"name",
|
|
60
|
+
"pathname",
|
|
61
|
+
"process",
|
|
62
|
+
"processName",
|
|
63
|
+
"relativeCreated",
|
|
64
|
+
"stack_info",
|
|
65
|
+
"thread",
|
|
66
|
+
"threadName",
|
|
67
|
+
"taskName",
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _extract_attributes(record: logging.LogRecord) -> dict:
|
|
73
|
+
"""Extract log record attributes to flat dict format for SPCS."""
|
|
74
|
+
attributes = {}
|
|
75
|
+
|
|
76
|
+
# Extract custom attributes from extra={}
|
|
77
|
+
for k, v in vars(record).items():
|
|
78
|
+
if k not in _RESERVED_ATTRS:
|
|
79
|
+
attributes[k] = _encode_value_simple(v)
|
|
80
|
+
|
|
81
|
+
# Add standard code location attributes
|
|
82
|
+
attributes["code.lineno"] = record.lineno
|
|
83
|
+
attributes["code.function"] = record.funcName
|
|
84
|
+
attributes["code.filepath"] = record.pathname
|
|
85
|
+
|
|
86
|
+
# Add exception info if present
|
|
87
|
+
if record.exc_info is not None:
|
|
88
|
+
exctype, value, tb = record.exc_info
|
|
89
|
+
if exctype is not None:
|
|
90
|
+
attributes["exception.type"] = exctype.__name__
|
|
91
|
+
if value is not None and value.args:
|
|
92
|
+
attributes["exception.message"] = str(value.args[0])
|
|
93
|
+
if tb is not None:
|
|
94
|
+
attributes["exception.stacktrace"] = "".join(
|
|
95
|
+
traceback.format_exception(*record.exc_info)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return attributes
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_snowflake_log_level_name(py_level_name: str) -> str:
|
|
102
|
+
"""
|
|
103
|
+
Convert Python log level to Snowflake log level.
|
|
104
|
+
This matches the original UDF logger implementation.
|
|
105
|
+
"""
|
|
106
|
+
level = py_level_name.upper()
|
|
107
|
+
if level == "WARNING":
|
|
108
|
+
return "WARN"
|
|
109
|
+
elif level == "CRITICAL":
|
|
110
|
+
return "FATAL"
|
|
111
|
+
elif level == "NOTSET":
|
|
112
|
+
return "TRACE"
|
|
113
|
+
else:
|
|
114
|
+
return level
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_severity_number(snowflake_level: str) -> int:
|
|
118
|
+
"""
|
|
119
|
+
Get OTLP severity number (integer) for a Snowflake log level.
|
|
120
|
+
|
|
121
|
+
OTLP Spec: https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber
|
|
122
|
+
This returns INTEGER values (not strings like the buggy UDF code).
|
|
123
|
+
"""
|
|
124
|
+
if snowflake_level == "TRACE":
|
|
125
|
+
return 1 # SEVERITY_NUMBER_TRACE
|
|
126
|
+
elif snowflake_level == "DEBUG":
|
|
127
|
+
return 5 # SEVERITY_NUMBER_DEBUG
|
|
128
|
+
elif snowflake_level == "INFO":
|
|
129
|
+
return 9 # SEVERITY_NUMBER_INFO
|
|
130
|
+
elif snowflake_level == "WARN":
|
|
131
|
+
return 13 # SEVERITY_NUMBER_WARN
|
|
132
|
+
elif snowflake_level == "ERROR":
|
|
133
|
+
return 17 # SEVERITY_NUMBER_ERROR
|
|
134
|
+
elif snowflake_level == "FATAL":
|
|
135
|
+
return 21 # SEVERITY_NUMBER_FATAL
|
|
136
|
+
else:
|
|
137
|
+
return 0 # SEVERITY_NUMBER_UNSPECIFIED
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _encode_spcs_log_record(record: logging.LogRecord) -> dict:
|
|
141
|
+
"""
|
|
142
|
+
Encode a log record to the FLAT JSON format expected by SPCS.
|
|
143
|
+
|
|
144
|
+
SPCS OpenTelemetry collector expects:
|
|
145
|
+
{
|
|
146
|
+
"body": "message",
|
|
147
|
+
"severity_text": "INFO",
|
|
148
|
+
"severity_number": 9, # INTEGER, not string!
|
|
149
|
+
"attributes": {...},
|
|
150
|
+
"scope": {"name": "logger_name"}
|
|
151
|
+
}
|
|
152
|
+
"""
|
|
153
|
+
# Format the message
|
|
154
|
+
message = str(record.msg)
|
|
155
|
+
if record.args:
|
|
156
|
+
try:
|
|
157
|
+
message = message % record.args
|
|
158
|
+
except (TypeError, ValueError):
|
|
159
|
+
message = str(record.msg)
|
|
160
|
+
|
|
161
|
+
# Truncate message if it exceeds size limit
|
|
162
|
+
message_bytes = message.encode("utf-8", errors="replace")
|
|
163
|
+
if sys.getsizeof(message_bytes) > SPCSLoggerConfig.MESSAGE_SIZE_LIMIT_BYTES:
|
|
164
|
+
truncate_length = SPCSLoggerConfig.MESSAGE_SIZE_LIMIT_BYTES - len(
|
|
165
|
+
SPCSLoggerConfig.ELLIPSIS.encode()
|
|
166
|
+
)
|
|
167
|
+
# Ensure we don't cut in the middle of a UTF-8 multibyte sequence
|
|
168
|
+
while truncate_length > 0 and (message_bytes[truncate_length] & 0xC0) == 0x80:
|
|
169
|
+
truncate_length -= 1
|
|
170
|
+
message_bytes = message_bytes[0:truncate_length]
|
|
171
|
+
message = (
|
|
172
|
+
message_bytes.decode("utf-8", errors="replace") + SPCSLoggerConfig.ELLIPSIS
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Map to Snowflake log level
|
|
176
|
+
snowflake_level = get_snowflake_log_level_name(record.levelname)
|
|
177
|
+
|
|
178
|
+
# Construct the FLAT log record (NOT nested OpenTelemetry structure)
|
|
179
|
+
log_record = {
|
|
180
|
+
"body": message,
|
|
181
|
+
"severity_text": snowflake_level,
|
|
182
|
+
"severity_number": get_severity_number(snowflake_level), # INTEGER!
|
|
183
|
+
"attributes": _extract_attributes(record),
|
|
184
|
+
"scope": {"name": record.name},
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return log_record
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# =============================================================================
|
|
191
|
+
# SPCS-SPECIFIC HANDLER
|
|
192
|
+
# =============================================================================
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class SPCSStreamHandler(logging.StreamHandler):
|
|
196
|
+
"""
|
|
197
|
+
Custom handler for SPCS that writes flat JSON format to stdout.
|
|
198
|
+
|
|
199
|
+
The SPCS OpenTelemetry collector will:
|
|
200
|
+
1. Capture stdout
|
|
201
|
+
2. Parse JSON if line matches ^{.*}$
|
|
202
|
+
3. Extract body, severity_text, severity_number, attributes, scope, trace_id, span_id fields
|
|
203
|
+
4. Map trace_id/span_id to LogRecord protobuf fields
|
|
204
|
+
5. Backend creates TRACE column from protobuf trace_id/span_id
|
|
205
|
+
6. Route to Event Table
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def __init__(self, stream=None) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Initialize the handler.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
stream: Output stream (default: sys.stdout)
|
|
214
|
+
"""
|
|
215
|
+
super().__init__(stream or sys.stdout)
|
|
216
|
+
|
|
217
|
+
def emit(self, record: logging.LogRecord):
|
|
218
|
+
"""
|
|
219
|
+
Emit a log record as single-line JSON to stdout.
|
|
220
|
+
"""
|
|
221
|
+
try:
|
|
222
|
+
# Encode to SPCS-compatible flat JSON format
|
|
223
|
+
log_record = _encode_spcs_log_record(record)
|
|
224
|
+
|
|
225
|
+
# Convert to compact JSON string (single line, no spaces)
|
|
226
|
+
log_json = json.dumps(log_record, separators=(",", ":"))
|
|
227
|
+
|
|
228
|
+
# Write to stdout (SPCS captures this)
|
|
229
|
+
self.stream.write(log_json + "\n")
|
|
230
|
+
self.flush()
|
|
231
|
+
|
|
232
|
+
except Exception:
|
|
233
|
+
self.handleError(record)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# =============================================================================
|
|
237
|
+
# INITIALIZATION FUNCTIONS
|
|
238
|
+
# =============================================================================
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def setup_spcs_logger(
|
|
242
|
+
log_level: int = logging.INFO,
|
|
243
|
+
logger_name: str = None,
|
|
244
|
+
enable_console_output: bool = False,
|
|
245
|
+
) -> logging.Logger:
|
|
246
|
+
"""
|
|
247
|
+
Set up the root logger for SPCS with flat JSON formatting.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
log_level: Python logging level (e.g., logging.INFO)
|
|
251
|
+
logger_name: Optional logger name (None for root logger)
|
|
252
|
+
enable_console_output: If True, also adds a human-readable console handler to stderr
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Configured logger instance
|
|
256
|
+
|
|
257
|
+
Example:
|
|
258
|
+
>>> logger = setup_spcs_logger(logging.INFO, enable_console_output=True)
|
|
259
|
+
>>> logger.info("Hello from SPCS", extra={"user_id": 123, "action": "login"})
|
|
260
|
+
|
|
261
|
+
# Output to stdout (captured by SPCS):
|
|
262
|
+
{"body":"Hello from SPCS","severity_text":"INFO","severity_number":9,"attributes":{"user_id":123,"action":"login","code.lineno":42,"code.function":"main","code.filepath":"/app/main.py"},"scope":{"name":"root"}}
|
|
263
|
+
|
|
264
|
+
# Output to stderr (if enable_console_output=True):
|
|
265
|
+
2024-01-15 10:30:45,123 - root - INFO - Hello from SPCS
|
|
266
|
+
"""
|
|
267
|
+
# Mark as initialized
|
|
268
|
+
SPCSLoggerConfig.is_initialized = True
|
|
269
|
+
|
|
270
|
+
# Get logger (root or named)
|
|
271
|
+
logger = logging.getLogger(logger_name)
|
|
272
|
+
logger.setLevel(log_level)
|
|
273
|
+
logger.handlers.clear()
|
|
274
|
+
|
|
275
|
+
# Add SPCS flat JSON handler (writes JSON to stdout)
|
|
276
|
+
spcs_handler = SPCSStreamHandler(sys.stdout)
|
|
277
|
+
spcs_handler.setLevel(log_level)
|
|
278
|
+
logger.addHandler(spcs_handler)
|
|
279
|
+
|
|
280
|
+
# Optionally add human-readable console handler (to stderr to avoid mixing with JSON logs)
|
|
281
|
+
if enable_console_output:
|
|
282
|
+
console_handler = logging.StreamHandler(sys.stderr)
|
|
283
|
+
console_handler.setLevel(log_level)
|
|
284
|
+
formatter = logging.Formatter(
|
|
285
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
286
|
+
)
|
|
287
|
+
console_handler.setFormatter(formatter)
|
|
288
|
+
logger.addHandler(console_handler)
|
|
289
|
+
|
|
290
|
+
return logger
|