snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +717 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +309 -26
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/error_utils.py +28 -0
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +224 -15
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +86 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
- snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
- snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +171 -48
- snowflake/snowpark_connect/server.py +528 -473
- snowflake/snowpark_connect/server_common/__init__.py +503 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/type_support.py +130 -0
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +195 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +192 -40
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
10
|
+
quote_name_without_upper_casing,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class ColumnQualifier:
|
|
16
|
+
parts: tuple[str, ...]
|
|
17
|
+
|
|
18
|
+
def __post_init__(self) -> None:
|
|
19
|
+
if not all(isinstance(x, str) for x in self.parts):
|
|
20
|
+
raise TypeError("ColumnQualifier.parts must be strings")
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def is_empty(self) -> bool:
|
|
24
|
+
return len(self.parts) == 0
|
|
25
|
+
|
|
26
|
+
def all_qualified_names(self, name: str) -> list[str]:
|
|
27
|
+
qualifier_parts = self.parts
|
|
28
|
+
qualifier_prefixes = [
|
|
29
|
+
".".join(quote_name_without_upper_casing(x) for x in qualifier_parts[i:])
|
|
30
|
+
for i in range(len(qualifier_parts))
|
|
31
|
+
]
|
|
32
|
+
return [f"{prefix}.{name}" for prefix in qualifier_prefixes]
|
|
33
|
+
|
|
34
|
+
def to_upper(self):
|
|
35
|
+
return ColumnQualifier(tuple(part.upper() for part in self.parts))
|
|
36
|
+
|
|
37
|
+
def matches(self, target: ColumnQualifier) -> bool:
|
|
38
|
+
if self.is_empty or target.is_empty:
|
|
39
|
+
return False
|
|
40
|
+
# If the column has fewer qualifiers than the target, it cannot match
|
|
41
|
+
if len(self.parts) < len(target.parts):
|
|
42
|
+
return False
|
|
43
|
+
return self.parts[-len(target.parts) :] == target.parts
|
|
@@ -8,7 +8,7 @@ import re
|
|
|
8
8
|
import sys
|
|
9
9
|
from collections import defaultdict
|
|
10
10
|
from copy import copy, deepcopy
|
|
11
|
-
from typing import Any
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
12
|
|
|
13
13
|
import jpype
|
|
14
14
|
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
@@ -17,11 +17,18 @@ from tzlocal import get_localzone_name
|
|
|
17
17
|
from snowflake import snowpark
|
|
18
18
|
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
19
19
|
quote_name_without_upper_casing,
|
|
20
|
+
unquote_if_quoted,
|
|
20
21
|
)
|
|
21
22
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
22
23
|
from snowflake.snowpark.types import TimestampTimeZone, TimestampType
|
|
24
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
25
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
26
|
+
from snowflake.snowpark_connect.type_support import set_integral_types_conversion
|
|
23
27
|
from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
24
|
-
from snowflake.snowpark_connect.utils.context import
|
|
28
|
+
from snowflake.snowpark_connect.utils.context import (
|
|
29
|
+
get_jpype_jclass_lock,
|
|
30
|
+
get_spark_session_id,
|
|
31
|
+
)
|
|
25
32
|
from snowflake.snowpark_connect.utils.external_udxf_cache import (
|
|
26
33
|
clear_external_udxf_cache,
|
|
27
34
|
)
|
|
@@ -139,9 +146,30 @@ class GlobalConfig:
|
|
|
139
146
|
"spark.sql.parser.quotedRegexColumnNames": "false",
|
|
140
147
|
# custom configs
|
|
141
148
|
"snowpark.connect.version": ".".join(map(str, sas_version)),
|
|
149
|
+
"snowpark.connect.temporary.views.create_in_snowflake": "false",
|
|
142
150
|
# Control whether repartition(n) on a DataFrame forces splitting into n files during writes
|
|
143
151
|
# This matches spark behavior more closely, but introduces overhead.
|
|
144
152
|
"snowflake.repartition.for.writes": "false",
|
|
153
|
+
"snowpark.connect.structured_types.fix": "true",
|
|
154
|
+
# Local relation optimization: Use List[Row] for small data, PyArrow for large data
|
|
155
|
+
# Enabled in production by default to improve performance for createDataFrame on small local relations.
|
|
156
|
+
# Disabled in tests by default unless explicitly enabled to stabilize flaky tests that are not applying row ordering.
|
|
157
|
+
# SNOW-2719980: Remove this flag after test fragility issues are resolved
|
|
158
|
+
"snowpark.connect.localRelation.optimizeSmallData": "true",
|
|
159
|
+
"spark.sql.execution.arrow.maxRecordsPerBatch": "10000", # TODO: no-op
|
|
160
|
+
# USE_VECTORIZED_SCANNER will become the default in a future BCR; Snowflake recommends setting it to TRUE for new workloads.
|
|
161
|
+
# This significantly reduces latency for loading Parquet files by downloading only relevant columnar sections into memory.
|
|
162
|
+
"snowpark.connect.parquet.useVectorizedScanner": "true",
|
|
163
|
+
# USE_LOGICAL_TYPE enables proper handling of Parquet logical types (TIMESTAMP, DATE, DECIMAL).
|
|
164
|
+
# Without useLogicalType set to "true", Parquet TIMESTAMP (INT64 physical) is incorrectly read as NUMBER(38,0).
|
|
165
|
+
"snowpark.connect.parquet.useLogicalType": "false",
|
|
166
|
+
"spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue": "false",
|
|
167
|
+
"spark.sql.parquet.outputTimestampType": "TIMESTAMP_MILLIS",
|
|
168
|
+
"snowpark.connect.handleIntegralOverflow": "false",
|
|
169
|
+
"snowpark.connect.scala.version": "2.12",
|
|
170
|
+
# Control whether to convert decimal - to integral types and vice versa: DecimalType(p,0) <-> ByteType/ShortType/IntegerType/LongType
|
|
171
|
+
# Values: "client_default" (behavior based on client type), "enabled", "disabled"
|
|
172
|
+
"snowpark.connect.integralTypesEmulation": "client_default",
|
|
145
173
|
}
|
|
146
174
|
|
|
147
175
|
boolean_config_list = [
|
|
@@ -150,11 +178,16 @@ class GlobalConfig:
|
|
|
150
178
|
"spark.sql.repl.eagerEval.enabled",
|
|
151
179
|
"spark.sql.crossJoin.enabled",
|
|
152
180
|
"spark.sql.caseSensitive",
|
|
181
|
+
"snowpark.connect.localRelation.optimizeSmallData",
|
|
182
|
+
"snowpark.connect.parquet.useVectorizedScanner",
|
|
183
|
+
"snowpark.connect.parquet.useLogicalType",
|
|
153
184
|
"spark.sql.ansi.enabled",
|
|
154
185
|
"spark.sql.legacy.allowHashOnMapType",
|
|
155
186
|
"spark.Catalog.databaseFilterInformationSchema",
|
|
156
187
|
"spark.sql.parser.quotedRegexColumnNames",
|
|
157
188
|
"snowflake.repartition.for.writes",
|
|
189
|
+
"spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue",
|
|
190
|
+
"snowpark.connect.handleIntegralOverflow",
|
|
158
191
|
]
|
|
159
192
|
|
|
160
193
|
int_config_list = [
|
|
@@ -171,8 +204,15 @@ class GlobalConfig:
|
|
|
171
204
|
"spark.app.name": lambda session, name: setattr(
|
|
172
205
|
session, "query_tag", f"Spark-Connect-App-Name={name}"
|
|
173
206
|
),
|
|
207
|
+
# TODO SNOW-2896871: Remove with version 1.10.0
|
|
174
208
|
"snowpark.connect.udf.imports": lambda session, imports: parse_imports(
|
|
175
|
-
session, imports
|
|
209
|
+
session, imports, "python"
|
|
210
|
+
),
|
|
211
|
+
"snowpark.connect.udf.python.imports": lambda session, imports: parse_imports(
|
|
212
|
+
session, imports, "python"
|
|
213
|
+
),
|
|
214
|
+
"snowpark.connect.udf.java.imports": lambda session, imports: parse_imports(
|
|
215
|
+
session, imports, "java"
|
|
176
216
|
),
|
|
177
217
|
}
|
|
178
218
|
|
|
@@ -257,21 +297,34 @@ SESSION_CONFIG_KEY_WHITELIST = {
|
|
|
257
297
|
"spark.sql.execution.pythonUDTF.arrow.enabled",
|
|
258
298
|
"spark.sql.tvf.allowMultipleTableArguments.enabled",
|
|
259
299
|
"snowpark.connect.sql.passthrough",
|
|
300
|
+
"snowpark.connect.cte.optimization_enabled",
|
|
260
301
|
"snowpark.connect.iceberg.external_volume",
|
|
261
302
|
"snowpark.connect.sql.identifiers.auto-uppercase",
|
|
303
|
+
"snowpark.connect.sql.partition.external_table_location",
|
|
262
304
|
"snowpark.connect.udtf.compatibility_mode",
|
|
263
305
|
"snowpark.connect.views.duplicate_column_names_handling_mode",
|
|
264
|
-
"
|
|
306
|
+
"snowpark.connect.temporary.views.create_in_snowflake",
|
|
307
|
+
"snowpark.connect.enable_snowflake_extension_behavior",
|
|
308
|
+
"spark.hadoop.fs.s3a.server-side-encryption.key",
|
|
309
|
+
"spark.hadoop.fs.s3a.assumed.role.arn",
|
|
310
|
+
"snowpark.connect.describe_cache_ttl_seconds",
|
|
311
|
+
"mapreduce.fileoutputcommitter.marksuccessfuljobs",
|
|
312
|
+
"spark.sql.parquet.enable.summary-metadata",
|
|
313
|
+
"parquet.enable.summary-metadata",
|
|
265
314
|
}
|
|
266
|
-
|
|
315
|
+
AZURE_ACCOUNT_KEY = re.compile(
|
|
267
316
|
r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
|
|
268
317
|
)
|
|
318
|
+
AZURE_SAS_KEY = re.compile(
|
|
319
|
+
r"^fs\.azure\.sas\.fixed\.token\.[^\.]+\.dfs\.core\.windows\.net$"
|
|
320
|
+
)
|
|
269
321
|
|
|
270
322
|
|
|
271
323
|
def valid_session_config_key(key: str):
|
|
272
324
|
return (
|
|
273
325
|
key in SESSION_CONFIG_KEY_WHITELIST # AWS session keys
|
|
274
326
|
or AZURE_SAS_KEY.match(key) # Azure session keys
|
|
327
|
+
or AZURE_ACCOUNT_KEY.match(key) # Azure account keys
|
|
275
328
|
)
|
|
276
329
|
|
|
277
330
|
|
|
@@ -279,17 +332,23 @@ class SessionConfig:
|
|
|
279
332
|
"""This class contains the session configuration for the Spark Server."""
|
|
280
333
|
|
|
281
334
|
default_session_config = {
|
|
282
|
-
"snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
|
|
283
335
|
"snowpark.connect.sql.passthrough": "false",
|
|
336
|
+
"snowpark.connect.cte.optimization_enabled": "false",
|
|
284
337
|
"snowpark.connect.udtf.compatibility_mode": "false",
|
|
285
338
|
"snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
|
|
286
339
|
"spark.sql.execution.pythonUDTF.arrow.enabled": "false",
|
|
287
340
|
"spark.sql.tvf.allowMultipleTableArguments.enabled": "true",
|
|
288
|
-
"enable_snowflake_extension_behavior": "false",
|
|
341
|
+
"snowpark.connect.enable_snowflake_extension_behavior": "false",
|
|
342
|
+
"snowpark.connect.describe_cache_ttl_seconds": "300",
|
|
343
|
+
"snowpark.connect.sql.partition.external_table_location": None,
|
|
344
|
+
"mapreduce.fileoutputcommitter.marksuccessfuljobs": "false",
|
|
345
|
+
"spark.sql.parquet.enable.summary-metadata": "false",
|
|
346
|
+
"parquet.enable.summary-metadata": "false",
|
|
289
347
|
}
|
|
290
348
|
|
|
291
349
|
def __init__(self) -> None:
|
|
292
350
|
self.config = deepcopy(self.default_session_config)
|
|
351
|
+
self.table_metadata: Dict[str, Dict[str, Any]] = {}
|
|
293
352
|
|
|
294
353
|
def __getitem__(self, item: str) -> str:
|
|
295
354
|
return self.get(item)
|
|
@@ -319,6 +378,11 @@ CONFIG_ALLOWED_VALUES: dict[str, tuple] = {
|
|
|
319
378
|
"all",
|
|
320
379
|
"none",
|
|
321
380
|
),
|
|
381
|
+
"snowpark.connect.integralTypesEmulation": (
|
|
382
|
+
"client_default",
|
|
383
|
+
"enabled",
|
|
384
|
+
"disabled",
|
|
385
|
+
),
|
|
322
386
|
}
|
|
323
387
|
|
|
324
388
|
# Set some default configuration that are necessary for the driver.
|
|
@@ -344,9 +408,11 @@ def route_config_proto(
|
|
|
344
408
|
if not pair.HasField("value"):
|
|
345
409
|
from pyspark.errors import IllegalArgumentException
|
|
346
410
|
|
|
347
|
-
|
|
411
|
+
exception = IllegalArgumentException(
|
|
348
412
|
f"Cannot set config '{pair.key}' to None"
|
|
349
413
|
)
|
|
414
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
|
|
415
|
+
raise exception
|
|
350
416
|
|
|
351
417
|
set_config_param(
|
|
352
418
|
config.session_id, pair.key, pair.value, snowpark_session
|
|
@@ -429,7 +495,11 @@ def route_config_proto(
|
|
|
429
495
|
pair.value = str(global_config.is_modifiable(key)).lower()
|
|
430
496
|
return res
|
|
431
497
|
case _:
|
|
432
|
-
|
|
498
|
+
exception = SnowparkConnectNotImplementedError(
|
|
499
|
+
f"Unexpected request {config}"
|
|
500
|
+
)
|
|
501
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
502
|
+
raise exception
|
|
433
503
|
|
|
434
504
|
|
|
435
505
|
def set_config_param(
|
|
@@ -469,19 +539,27 @@ def _verify_static_config_not_modified(key: str) -> None:
|
|
|
469
539
|
# https://github.com/apache/spark/blob/v3.5.3/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala#L161
|
|
470
540
|
# Spark does not allow to modify static configurations at runtime.
|
|
471
541
|
if global_config.is_static_config(key) and global_config.is_set(key):
|
|
472
|
-
|
|
542
|
+
exception = ValueError(f"Cannot modify the value of a static config: {key}")
|
|
543
|
+
attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
|
|
544
|
+
raise exception
|
|
473
545
|
|
|
474
546
|
|
|
475
547
|
def _verify_is_valid_config_value(key: str, value: Any) -> None:
|
|
476
548
|
if key in CONFIG_ALLOWED_VALUES and value not in CONFIG_ALLOWED_VALUES[key]:
|
|
477
|
-
|
|
549
|
+
exception = ValueError(
|
|
478
550
|
f"Invalid value '{value}' for key '{key}'. Allowed values: {', '.join(CONFIG_ALLOWED_VALUES[key])}."
|
|
479
551
|
)
|
|
552
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
|
|
553
|
+
raise exception
|
|
480
554
|
|
|
481
555
|
|
|
482
556
|
def _verify_is_not_readonly_config(key):
|
|
483
557
|
if key in global_config.readonly_config_list:
|
|
484
|
-
|
|
558
|
+
exception = ValueError(
|
|
559
|
+
f"Config with key {key} is read-only and cannot be modified."
|
|
560
|
+
)
|
|
561
|
+
attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
|
|
562
|
+
raise exception
|
|
485
563
|
|
|
486
564
|
|
|
487
565
|
def set_jvm_timezone(timezone_id: str):
|
|
@@ -498,10 +576,13 @@ def set_jvm_timezone(timezone_id: str):
|
|
|
498
576
|
RuntimeError: If JVM is not started
|
|
499
577
|
"""
|
|
500
578
|
if not jpype.isJVMStarted():
|
|
501
|
-
|
|
579
|
+
exception = RuntimeError("JVM must be started before setting timezone")
|
|
580
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
581
|
+
raise exception
|
|
502
582
|
|
|
503
583
|
try:
|
|
504
|
-
|
|
584
|
+
with get_jpype_jclass_lock():
|
|
585
|
+
TimeZone = jpype.JClass("java.util.TimeZone")
|
|
505
586
|
new_timezone = TimeZone.getTimeZone(timezone_id)
|
|
506
587
|
TimeZone.setDefault(new_timezone)
|
|
507
588
|
|
|
@@ -513,7 +594,9 @@ def set_jvm_timezone(timezone_id: str):
|
|
|
513
594
|
def reset_jvm_timezone_to_system_default():
|
|
514
595
|
"""Reset JVM timezone to the system's default timezone"""
|
|
515
596
|
if not jpype.isJVMStarted():
|
|
516
|
-
|
|
597
|
+
exception = RuntimeError("JVM must be started first")
|
|
598
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
599
|
+
raise exception
|
|
517
600
|
|
|
518
601
|
try:
|
|
519
602
|
TimeZone = jpype.JClass("java.util.TimeZone")
|
|
@@ -522,9 +605,13 @@ def reset_jvm_timezone_to_system_default():
|
|
|
522
605
|
f"Reset JVM timezone to system default: {TimeZone.getDefault().getID()}"
|
|
523
606
|
)
|
|
524
607
|
except jpype.JException as e:
|
|
525
|
-
|
|
608
|
+
exception = RuntimeError(f"Java exception while resetting timezone: {e}")
|
|
609
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
610
|
+
raise exception
|
|
526
611
|
except Exception as e:
|
|
527
|
-
|
|
612
|
+
exception = RuntimeError(f"Unexpected error resetting JVM timezone: {e}")
|
|
613
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
614
|
+
raise exception
|
|
528
615
|
|
|
529
616
|
|
|
530
617
|
def set_snowflake_parameters(
|
|
@@ -569,38 +656,137 @@ def set_snowflake_parameters(
|
|
|
569
656
|
snowpark_session.use_database(db)
|
|
570
657
|
case (prev, curr) if prev != curr:
|
|
571
658
|
snowpark_session.use_schema(prev)
|
|
659
|
+
case "snowpark.connect.cte.optimization_enabled":
|
|
660
|
+
# Set CTE optimization on the snowpark session
|
|
661
|
+
cte_enabled = str_to_bool(value)
|
|
662
|
+
snowpark_session.cte_optimization_enabled = cte_enabled
|
|
663
|
+
logger.info(f"Updated snowpark session CTE optimization: {cte_enabled}")
|
|
664
|
+
case "snowpark.connect.structured_types.fix":
|
|
665
|
+
# TODO: SNOW-2367714 Remove this once the fix is automatically enabled in Snowpark
|
|
666
|
+
snowpark.context._enable_fix_2360274 = str_to_bool(value)
|
|
667
|
+
logger.info(f"Updated snowpark session structured types fix: {value}")
|
|
668
|
+
case "spark.sql.parquet.outputTimestampType":
|
|
669
|
+
if value == "TIMESTAMP_MICROS":
|
|
670
|
+
snowpark_session.sql(
|
|
671
|
+
"ALTER SESSION SET UNLOAD_PARQUET_TIME_TIMESTAMP_MILLIS = false"
|
|
672
|
+
).collect()
|
|
673
|
+
else:
|
|
674
|
+
# Default: TIMESTAMP_MILLIS (or any other value)
|
|
675
|
+
snowpark_session.sql(
|
|
676
|
+
"ALTER SESSION SET UNLOAD_PARQUET_TIME_TIMESTAMP_MILLIS = true"
|
|
677
|
+
).collect()
|
|
678
|
+
logger.info(f"Updated parquet timestamp output type to: {value}")
|
|
679
|
+
case "snowpark.connect.scala.version":
|
|
680
|
+
# force java udf helper recreation
|
|
681
|
+
set_java_udf_creator_initialized_state(False)
|
|
682
|
+
case "snowpark.connect.integralTypesEmulation":
|
|
683
|
+
# "client_default" - don't change, let set_spark_version handle it
|
|
684
|
+
# "enabled" / "disabled" - explicitly set
|
|
685
|
+
if value.lower() == "enabled":
|
|
686
|
+
set_integral_types_conversion(True)
|
|
687
|
+
elif value.lower() == "disabled":
|
|
688
|
+
set_integral_types_conversion(False)
|
|
572
689
|
case _:
|
|
573
690
|
pass
|
|
574
691
|
|
|
575
692
|
|
|
576
693
|
def get_boolean_session_config_param(name: str) -> bool:
|
|
577
|
-
session_config = sessions_config[
|
|
694
|
+
session_config = sessions_config[get_spark_session_id()]
|
|
578
695
|
return str_to_bool(session_config[name])
|
|
579
696
|
|
|
580
697
|
|
|
698
|
+
def get_string_session_config_param(name: str) -> str:
|
|
699
|
+
session_config = sessions_config[get_spark_session_id()]
|
|
700
|
+
return str(session_config[name])
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
def get_cte_optimization_enabled() -> bool:
|
|
704
|
+
"""Get the CTE optimization configuration setting."""
|
|
705
|
+
return get_boolean_session_config_param("snowpark.connect.cte.optimization_enabled")
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def get_success_file_generation_enabled() -> bool:
|
|
709
|
+
"""Get the _SUCCESS file generation configuration setting."""
|
|
710
|
+
return get_boolean_session_config_param(
|
|
711
|
+
"mapreduce.fileoutputcommitter.marksuccessfuljobs"
|
|
712
|
+
)
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
def get_parquet_metadata_generation_enabled() -> bool:
|
|
716
|
+
"""
|
|
717
|
+
Get the Parquet metadata file generation configuration setting.
|
|
718
|
+
"""
|
|
719
|
+
return get_boolean_session_config_param(
|
|
720
|
+
"spark.sql.parquet.enable.summary-metadata"
|
|
721
|
+
) or get_boolean_session_config_param("parquet.enable.summary-metadata")
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def get_describe_cache_ttl_seconds() -> int:
|
|
725
|
+
"""Get the describe query cache TTL from session config, with a default fallback."""
|
|
726
|
+
session_config: SessionConfig = sessions_config[get_spark_session_id()]
|
|
727
|
+
default_ttl: str = SessionConfig.default_session_config[
|
|
728
|
+
"snowpark.connect.describe_cache_ttl_seconds"
|
|
729
|
+
]
|
|
730
|
+
try:
|
|
731
|
+
ttl_str = session_config.get(
|
|
732
|
+
"snowpark.connect.describe_cache_ttl_seconds", default_ttl
|
|
733
|
+
)
|
|
734
|
+
return int(ttl_str)
|
|
735
|
+
except ValueError: # fallback to default ttl
|
|
736
|
+
return int(default_ttl)
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
def should_create_temporary_view_in_snowflake() -> bool:
|
|
740
|
+
return str_to_bool(
|
|
741
|
+
global_config["snowpark.connect.temporary.views.create_in_snowflake"]
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
|
|
581
745
|
def auto_uppercase_column_identifiers() -> bool:
|
|
582
|
-
session_config = sessions_config[
|
|
583
|
-
|
|
746
|
+
session_config = sessions_config[get_spark_session_id()]
|
|
747
|
+
auto_upper_case_config = session_config[
|
|
584
748
|
"snowpark.connect.sql.identifiers.auto-uppercase"
|
|
585
|
-
]
|
|
749
|
+
]
|
|
750
|
+
if auto_upper_case_config:
|
|
751
|
+
return auto_upper_case_config.lower() in ("all", "only_columns")
|
|
752
|
+
|
|
753
|
+
return not global_config.spark_sql_caseSensitive
|
|
586
754
|
|
|
587
755
|
|
|
588
756
|
def auto_uppercase_non_column_identifiers() -> bool:
|
|
589
|
-
session_config = sessions_config[
|
|
590
|
-
|
|
757
|
+
session_config = sessions_config[get_spark_session_id()]
|
|
758
|
+
auto_upper_case_config = session_config[
|
|
591
759
|
"snowpark.connect.sql.identifiers.auto-uppercase"
|
|
592
|
-
]
|
|
760
|
+
]
|
|
761
|
+
if auto_upper_case_config:
|
|
762
|
+
return auto_upper_case_config.lower() in ("all", "all_except_columns")
|
|
763
|
+
|
|
764
|
+
return not global_config.spark_sql_caseSensitive
|
|
593
765
|
|
|
594
766
|
|
|
595
|
-
def
|
|
767
|
+
def external_table_location() -> Optional[str]:
|
|
768
|
+
session_config = sessions_config[get_spark_session_id()]
|
|
769
|
+
return session_config.get(
|
|
770
|
+
"snowpark.connect.sql.partition.external_table_location", None
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def parse_imports(
|
|
775
|
+
session: snowpark.Session, imports: str | None, language: str
|
|
776
|
+
) -> None:
|
|
596
777
|
if not imports:
|
|
597
778
|
return
|
|
598
779
|
|
|
599
780
|
# UDF needs to be recreated to include new imports
|
|
600
781
|
clear_external_udxf_cache(session)
|
|
782
|
+
if language == "java":
|
|
783
|
+
|
|
784
|
+
set_java_udf_creator_initialized_state(False)
|
|
601
785
|
|
|
602
786
|
for udf_import in imports.strip("[] ").split(","):
|
|
603
|
-
|
|
787
|
+
udf_import = udf_import.strip()
|
|
788
|
+
if udf_import:
|
|
789
|
+
session.add_import(udf_import)
|
|
604
790
|
|
|
605
791
|
|
|
606
792
|
def get_timestamp_type():
|
|
@@ -613,3 +799,100 @@ def get_timestamp_type():
|
|
|
613
799
|
# shouldn't happen since `spark.sql.timestampType` is always defined, and `spark.conf.unset` sets it to default (TIMESTAMP_LTZ)
|
|
614
800
|
timestamp_type = TimestampType(TimestampTimeZone.LTZ)
|
|
615
801
|
return timestamp_type
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def record_table_metadata(
|
|
805
|
+
table_identifier: str,
|
|
806
|
+
table_type: str,
|
|
807
|
+
data_source: str,
|
|
808
|
+
supports_column_rename: bool = True,
|
|
809
|
+
) -> None:
|
|
810
|
+
"""
|
|
811
|
+
Record metadata about a table for Spark compatibility checks.
|
|
812
|
+
|
|
813
|
+
Args:
|
|
814
|
+
table_identifier: Full table identifier (catalog.database.table)
|
|
815
|
+
table_type: "v1" or "v2"
|
|
816
|
+
data_source: Source format (parquet, csv, iceberg, etc.)
|
|
817
|
+
supports_column_rename: Whether the table supports RENAME COLUMN
|
|
818
|
+
"""
|
|
819
|
+
session_id = get_spark_session_id()
|
|
820
|
+
session_config = sessions_config[session_id]
|
|
821
|
+
|
|
822
|
+
# Normalize table identifier for consistent lookup
|
|
823
|
+
# Use the full catalog.database.table identifier to avoid conflicts
|
|
824
|
+
normalized_identifier = table_identifier.upper().strip('"')
|
|
825
|
+
|
|
826
|
+
session_config.table_metadata[normalized_identifier] = {
|
|
827
|
+
"table_type": table_type,
|
|
828
|
+
"data_source": data_source,
|
|
829
|
+
"supports_column_rename": supports_column_rename,
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def get_table_metadata(table_identifier: str) -> Dict[str, Any] | None:
|
|
834
|
+
"""
|
|
835
|
+
Get stored metadata for a table.
|
|
836
|
+
|
|
837
|
+
Args:
|
|
838
|
+
table_identifier: Full table identifier (catalog.database.table)
|
|
839
|
+
|
|
840
|
+
Returns:
|
|
841
|
+
Table metadata dict or None if not found
|
|
842
|
+
"""
|
|
843
|
+
session_id = get_spark_session_id()
|
|
844
|
+
session_config = sessions_config[session_id]
|
|
845
|
+
|
|
846
|
+
normalized_identifier = unquote_if_quoted(table_identifier).upper()
|
|
847
|
+
|
|
848
|
+
return session_config.table_metadata.get(normalized_identifier)
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def check_table_supports_operation(table_identifier: str, operation: str) -> bool:
|
|
852
|
+
"""
|
|
853
|
+
Check if a table supports a given operation based on metadata and config.
|
|
854
|
+
|
|
855
|
+
Args:
|
|
856
|
+
table_identifier: Full table identifier (catalog.database.table)
|
|
857
|
+
operation: Operation to check (e.g., "rename_column")
|
|
858
|
+
|
|
859
|
+
Returns:
|
|
860
|
+
True if operation is supported, False if should be blocked
|
|
861
|
+
"""
|
|
862
|
+
table_metadata = get_table_metadata(table_identifier)
|
|
863
|
+
|
|
864
|
+
if not table_metadata:
|
|
865
|
+
return True
|
|
866
|
+
|
|
867
|
+
session_id = get_spark_session_id()
|
|
868
|
+
session_config = sessions_config[session_id]
|
|
869
|
+
enable_extensions = str_to_bool(
|
|
870
|
+
session_config.get(
|
|
871
|
+
"snowpark.connect.enable_snowflake_extension_behavior", "false"
|
|
872
|
+
)
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
if enable_extensions:
|
|
876
|
+
return True
|
|
877
|
+
|
|
878
|
+
if operation == "rename_column":
|
|
879
|
+
return table_metadata.get("supports_column_rename", True)
|
|
880
|
+
|
|
881
|
+
return True
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
def get_scala_version() -> str:
|
|
885
|
+
return global_config.get("snowpark.connect.scala.version")
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
_java_udf_creator_initialized = False
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def is_java_udf_creator_initialized() -> bool:
|
|
892
|
+
global _java_udf_creator_initialized
|
|
893
|
+
return _java_udf_creator_initialized
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
def set_java_udf_creator_initialized_state(value: bool) -> None:
|
|
897
|
+
global _java_udf_creator_initialized
|
|
898
|
+
_java_udf_creator_initialized = value
|
|
@@ -16,3 +16,5 @@ MAP_IN_ARROW_EVAL_TYPE = 207 # eval_type for mapInArrow operations
|
|
|
16
16
|
COLUMN_METADATA_COLLISION_KEY = "{expr_id}_{key}"
|
|
17
17
|
|
|
18
18
|
DUPLICATE_KEY_FOUND_ERROR_TEMPLATE = "Duplicate key found: {key}. You can set spark.sql.mapKeyDedupPolicy to LAST_WIN to deduplicate map keys with last wins policy."
|
|
19
|
+
|
|
20
|
+
SPARK_VERSION = "3.5.3"
|