snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +680 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +237 -23
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +123 -5
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +85 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +110 -48
- snowflake/snowpark_connect/server.py +546 -456
- snowflake/snowpark_connect/server_common/__init__.py +500 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +187 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +163 -22
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
from snowflake import snowpark
|
|
6
6
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
7
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
8
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
7
9
|
from snowflake.snowpark_connect.relation.read.map_read_jdbc import (
|
|
8
10
|
close_connection,
|
|
9
11
|
create_connection,
|
|
@@ -35,7 +37,9 @@ def map_write_jdbc(
|
|
|
35
37
|
dbtable = None
|
|
36
38
|
|
|
37
39
|
if dbtable is None:
|
|
38
|
-
|
|
40
|
+
exception = ValueError("Include dbtable is required option")
|
|
41
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
42
|
+
raise exception
|
|
39
43
|
|
|
40
44
|
try:
|
|
41
45
|
JdbcDataFrameWriter(session, jdbc_options).jdbc_write_dbapi(
|
|
@@ -46,4 +50,6 @@ def map_write_jdbc(
|
|
|
46
50
|
write_mode=write_mode,
|
|
47
51
|
)
|
|
48
52
|
except Exception as e:
|
|
49
|
-
|
|
53
|
+
exception = Exception(f"Error accessing JDBC datasource for write: {e}")
|
|
54
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
55
|
+
raise exception
|
|
Binary file
|
|
@@ -1,18 +1,107 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
import pathlib
|
|
5
4
|
import threading
|
|
6
5
|
import time
|
|
7
6
|
|
|
8
7
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
9
8
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
10
9
|
|
|
11
|
-
_resources_initialized = threading.Event()
|
|
12
|
-
_initializer_lock = threading.Lock()
|
|
13
10
|
SPARK_VERSION = "3.5.6"
|
|
14
11
|
RESOURCE_PATH = "/snowflake/snowpark_connect/resources"
|
|
15
12
|
|
|
13
|
+
# On demand Scala UDF jar upload state - separate from general resource initialization
|
|
14
|
+
_scala_jars_uploaded = threading.Event()
|
|
15
|
+
_scala_jars_lock = threading.Lock()
|
|
16
|
+
|
|
17
|
+
# Define Scala resource names
|
|
18
|
+
SPARK_SQL_JAR = f"spark-sql_2.12-{SPARK_VERSION}.jar"
|
|
19
|
+
SPARK_CONNECT_CLIENT_JAR = f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar"
|
|
20
|
+
SPARK_COMMON_UTILS_JAR = f"spark-common-utils_2.12-{SPARK_VERSION}.jar"
|
|
21
|
+
SAS_SCALA_UDF_JAR = "sas-scala-udf_2.12-0.2.0.jar"
|
|
22
|
+
JSON_4S_JAR = "json4s-ast_2.12-3.7.0-M11.jar"
|
|
23
|
+
SCALA_REFLECT_JAR = "scala-reflect-2.12.18.jar"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _upload_scala_udf_jars_impl() -> None:
|
|
27
|
+
"""Upload Spark jar files required for creating Scala UDFs.
|
|
28
|
+
This is the internal implementation - use ensure_scala_udf_jars_uploaded() for thread-safe lazy loading."""
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
session = get_or_create_snowpark_session()
|
|
32
|
+
stage = session.get_session_stage()
|
|
33
|
+
resource_path = stage + RESOURCE_PATH
|
|
34
|
+
import snowpark_connect_deps_1
|
|
35
|
+
import snowpark_connect_deps_2
|
|
36
|
+
|
|
37
|
+
jar_files = [
|
|
38
|
+
SPARK_SQL_JAR,
|
|
39
|
+
SPARK_CONNECT_CLIENT_JAR,
|
|
40
|
+
SPARK_COMMON_UTILS_JAR,
|
|
41
|
+
SAS_SCALA_UDF_JAR,
|
|
42
|
+
JSON_4S_JAR,
|
|
43
|
+
SCALA_REFLECT_JAR, # Required for deserializing Scala lambdas
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# Path to includes/jars directory
|
|
47
|
+
includes_jars_dir = Path(__file__).parent / "includes" / "jars"
|
|
48
|
+
|
|
49
|
+
for jar_name in jar_files:
|
|
50
|
+
jar_path = None
|
|
51
|
+
|
|
52
|
+
# First check includes/jars directory
|
|
53
|
+
includes_jar_path = includes_jars_dir / jar_name
|
|
54
|
+
if includes_jar_path.exists():
|
|
55
|
+
jar_path = includes_jar_path
|
|
56
|
+
logger.info(f"Found {jar_name} in includes/jars")
|
|
57
|
+
else:
|
|
58
|
+
# Try to find the JAR in package 1 first, then package 2
|
|
59
|
+
try:
|
|
60
|
+
jar_path = snowpark_connect_deps_1.get_jar_path(jar_name)
|
|
61
|
+
except FileNotFoundError:
|
|
62
|
+
try:
|
|
63
|
+
jar_path = snowpark_connect_deps_2.get_jar_path(jar_name)
|
|
64
|
+
except FileNotFoundError:
|
|
65
|
+
raise FileNotFoundError(
|
|
66
|
+
f"JAR {jar_name} not found in includes/jars or either package"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
session.file.put(
|
|
71
|
+
str(jar_path),
|
|
72
|
+
resource_path,
|
|
73
|
+
auto_compress=False,
|
|
74
|
+
overwrite=False,
|
|
75
|
+
source_compression="NONE",
|
|
76
|
+
)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
raise RuntimeError(f"Failed to upload JAR {jar_name}: {e}")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def ensure_scala_udf_jars_uploaded() -> None:
|
|
82
|
+
"""Ensure Scala UDF jars are uploaded to Snowflake, uploading them lazily if not already done.
|
|
83
|
+
This function is thread-safe and will only upload once even if called from multiple threads."""
|
|
84
|
+
|
|
85
|
+
# Fast path: if already uploaded, return immediately without acquiring lock
|
|
86
|
+
if _scala_jars_uploaded.is_set():
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
# Slow path: need to upload, acquire lock to ensure only one thread does it
|
|
90
|
+
with _scala_jars_lock:
|
|
91
|
+
# Double-check pattern: another thread might have uploaded while we waited for the lock
|
|
92
|
+
if _scala_jars_uploaded.is_set():
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
start_time = time.time()
|
|
97
|
+
logger.info("Uploading Scala UDF jars on-demand...")
|
|
98
|
+
_upload_scala_udf_jars_impl()
|
|
99
|
+
_scala_jars_uploaded.set()
|
|
100
|
+
logger.info(f"Scala UDF jars uploaded in {time.time() - start_time:.2f}s")
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.error(f"Failed to upload Scala UDF jars: {e}")
|
|
103
|
+
raise
|
|
104
|
+
|
|
16
105
|
|
|
17
106
|
def initialize_resources() -> None:
|
|
18
107
|
"""Initialize all expensive resources. We should initialize what we can here, so that actual rpc calls like
|
|
@@ -44,34 +133,6 @@ def initialize_resources() -> None:
|
|
|
44
133
|
|
|
45
134
|
session.sql("select 1 as sf_connection_warm_up").collect()
|
|
46
135
|
|
|
47
|
-
def upload_scala_udf_jars() -> None:
|
|
48
|
-
"""Upload Spark jar files required for creating Scala UDFs."""
|
|
49
|
-
stage = session.get_session_stage()
|
|
50
|
-
resource_path = stage + RESOURCE_PATH
|
|
51
|
-
import snowflake
|
|
52
|
-
|
|
53
|
-
pyspark_jars = (
|
|
54
|
-
pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
|
|
55
|
-
)
|
|
56
|
-
jar_files = [
|
|
57
|
-
f"spark-sql_2.12-{SPARK_VERSION}.jar",
|
|
58
|
-
f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
|
|
59
|
-
f"spark-common-utils_2.12-{SPARK_VERSION}.jar",
|
|
60
|
-
"json4s-ast_2.12-3.7.0-M11.jar",
|
|
61
|
-
"json4s-native_2.12-3.7.0-M11.jar",
|
|
62
|
-
"json4s-core_2.12-3.7.0-M11.jar",
|
|
63
|
-
"paranamer-2.8.3.jar",
|
|
64
|
-
]
|
|
65
|
-
|
|
66
|
-
for jar in jar_files:
|
|
67
|
-
session.file.put(
|
|
68
|
-
str(pyspark_jars) + "/" + jar,
|
|
69
|
-
resource_path,
|
|
70
|
-
auto_compress=False,
|
|
71
|
-
overwrite=False,
|
|
72
|
-
source_compression="NONE",
|
|
73
|
-
)
|
|
74
|
-
|
|
75
136
|
start_time = time.time()
|
|
76
137
|
|
|
77
138
|
resources = [
|
|
@@ -80,7 +141,6 @@ def initialize_resources() -> None:
|
|
|
80
141
|
("Initialize Session Stage", initialize_session_stage), # Takes about 0.3s
|
|
81
142
|
("Initialize Session Catalog", initialize_catalog), # Takes about 1.2s
|
|
82
143
|
("Snowflake Connection Warm Up", warm_up_sf_connection), # Takes about 1s
|
|
83
|
-
("Upload Scala UDF Jars", upload_scala_udf_jars),
|
|
84
144
|
]
|
|
85
145
|
|
|
86
146
|
for name, resource_func in resources:
|
|
@@ -94,23 +154,25 @@ def initialize_resources() -> None:
|
|
|
94
154
|
if str(e).find("because the session has been closed") == -1:
|
|
95
155
|
logger.error(f"Failed to initialize {name}: {e}")
|
|
96
156
|
|
|
97
|
-
_resources_initialized.set()
|
|
98
157
|
logger.info(f"All resources initialized in {time.time() - start_time:.2f}s")
|
|
99
158
|
|
|
100
159
|
|
|
101
|
-
_resource_initializer = threading.Thread(
|
|
102
|
-
target=initialize_resources, name="ResourceInitializer"
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def initialize_resources_async() -> threading.Thread:
|
|
107
|
-
"""Start resource initialization in background."""
|
|
108
|
-
with _initializer_lock:
|
|
109
|
-
if not _resource_initializer.is_alive() and _resource_initializer.ident is None:
|
|
110
|
-
_resource_initializer.start()
|
|
111
|
-
return _resource_initializer
|
|
112
|
-
|
|
113
|
-
|
|
114
160
|
def wait_for_resource_initialization() -> None:
|
|
115
|
-
|
|
116
|
-
|
|
161
|
+
"""No-op function retained for backward compatibility.
|
|
162
|
+
|
|
163
|
+
This function is kept to maintain backward compatibility with external client code that may call it.
|
|
164
|
+
Previously, this function waited for asynchronous resource initialization to complete.
|
|
165
|
+
Now that resource initialization is synchronous, this function does nothing.
|
|
166
|
+
External callers can safely call this function without any effect.
|
|
167
|
+
"""
|
|
168
|
+
pass
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def set_upload_jars(upload: bool) -> None:
|
|
172
|
+
"""No-op function retained for backward compatibility.
|
|
173
|
+
This function is kept to maintain backward compatibility with external client code that may call it.
|
|
174
|
+
Previously, this function was used to set whether to upload jars required for Scala UDFs.
|
|
175
|
+
Now that Scala UDF jar upload has been moved to lazy on-demand loading via ensure_scala_udf_jars_uploaded(),
|
|
176
|
+
this function does nothing. External callers can safely call this function without any effect.
|
|
177
|
+
"""
|
|
178
|
+
pass
|