snowpark-connect 0.28.1__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/config.py +11 -2
- snowflake/snowpark_connect/expression/map_unresolved_function.py +172 -210
- snowflake/snowpark_connect/relation/io_utils.py +21 -1
- snowflake/snowpark_connect/relation/map_extension.py +21 -4
- snowflake/snowpark_connect/relation/map_map_partitions.py +7 -8
- snowflake/snowpark_connect/relation/map_relation.py +1 -3
- snowflake/snowpark_connect/relation/read/map_read.py +22 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +105 -26
- snowflake/snowpark_connect/relation/read/map_read_json.py +45 -34
- snowflake/snowpark_connect/relation/read/map_read_text.py +6 -1
- snowflake/snowpark_connect/relation/stage_locator.py +85 -53
- snowflake/snowpark_connect/relation/write/map_write.py +38 -4
- snowflake/snowpark_connect/server.py +18 -13
- snowflake/snowpark_connect/utils/context.py +0 -14
- snowflake/snowpark_connect/utils/io_utils.py +36 -0
- snowflake/snowpark_connect/utils/session.py +3 -0
- snowflake/snowpark_connect/utils/udf_cache.py +37 -7
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.29.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.29.0.dist-info}/RECORD +28 -28
- {snowpark_connect-0.28.1.data → snowpark_connect-0.29.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.28.1.data → snowpark_connect-0.29.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.28.1.data → snowpark_connect-0.29.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.29.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.28.1.dist-info → snowpark_connect-0.29.0.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
import os
|
|
6
6
|
|
|
7
7
|
from fsspec.core import url_to_fs
|
|
8
|
+
from pyspark.errors.exceptions.base import AnalysisException
|
|
8
9
|
from s3fs.core import S3FileSystem
|
|
9
10
|
|
|
10
11
|
from snowflake import snowpark
|
|
@@ -33,37 +34,42 @@ def get_paths_from_stage(
|
|
|
33
34
|
|
|
34
35
|
# TODO : What if GCP?
|
|
35
36
|
# TODO: What if already stage path?
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
_, bucket_name, path = parse_azure_url(p)
|
|
40
|
-
rewrite_paths.append(f"{stage_name}/{path}")
|
|
41
|
-
paths = rewrite_paths
|
|
42
|
-
else:
|
|
43
|
-
filesystem, parsed_path = url_to_fs(paths[0])
|
|
44
|
-
if isinstance(filesystem, S3FileSystem): # aws
|
|
45
|
-
# Remove bucket name from the path since the stage name will replace
|
|
46
|
-
# the bucket name in the path.
|
|
47
|
-
paths = [
|
|
48
|
-
f"{stage_name}/{'/'.join(url_to_fs(p)[1].split('/')[1:])}"
|
|
49
|
-
for p in paths
|
|
50
|
-
]
|
|
51
|
-
else: # local
|
|
52
|
-
# For local files, we need to preserve directory structure for partitioned data
|
|
53
|
-
# Instead of just using basename, we'll use the last few path components
|
|
54
|
-
new_paths = []
|
|
37
|
+
match get_cloud_from_url(paths[0]):
|
|
38
|
+
case "azure":
|
|
39
|
+
rewrite_paths = []
|
|
55
40
|
for p in paths:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
41
|
+
_, bucket_name, path = parse_azure_url(p)
|
|
42
|
+
rewrite_paths.append(f"{stage_name}/{path}")
|
|
43
|
+
paths = rewrite_paths
|
|
44
|
+
case "gcp":
|
|
45
|
+
raise AnalysisException(
|
|
46
|
+
"You must configure an integration for Google Cloud Storage to perform I/O operations rather than accessing the URL directly. Reference: https://docs.snowflake.com/en/user-guide/data-load-gcs-config"
|
|
47
|
+
)
|
|
48
|
+
case _:
|
|
49
|
+
filesystem, parsed_path = url_to_fs(paths[0])
|
|
50
|
+
if isinstance(filesystem, S3FileSystem): # aws
|
|
51
|
+
# Remove bucket name from the path since the stage name will replace
|
|
52
|
+
# the bucket name in the path.
|
|
53
|
+
paths = [
|
|
54
|
+
f"{stage_name}/{'/'.join(url_to_fs(p)[1].split('/')[1:])}"
|
|
55
|
+
for p in paths
|
|
56
|
+
]
|
|
57
|
+
else: # local
|
|
58
|
+
# For local files, we need to preserve directory structure for partitioned data
|
|
59
|
+
# Instead of just using basename, we'll use the last few path components
|
|
60
|
+
new_paths = []
|
|
61
|
+
for p in paths:
|
|
62
|
+
# Split the path and take the last 2-3 components to preserve structure
|
|
63
|
+
# but avoid very long paths
|
|
64
|
+
path_parts = p.split(os.sep)
|
|
65
|
+
if len(path_parts) >= 2:
|
|
66
|
+
# Take last 2 components (e.g., "base_case/x=abc")
|
|
67
|
+
relative_path = "/".join(path_parts[-2:])
|
|
68
|
+
else:
|
|
69
|
+
# Single component, use basename
|
|
70
|
+
relative_path = os.path.basename(p)
|
|
71
|
+
new_paths.append(f"{stage_name}/{relative_path}")
|
|
72
|
+
paths = new_paths
|
|
67
73
|
|
|
68
74
|
return paths
|
|
69
75
|
|
|
@@ -102,15 +108,21 @@ class StageLocator:
|
|
|
102
108
|
sql_query = f"CREATE OR REPLACE TEMP STAGE {stage_name[1:]} URL='azure://{account}.blob.core.windows.net/{bucket_name}'"
|
|
103
109
|
|
|
104
110
|
credential_session_key = (
|
|
105
|
-
f"fs.azure.sas.
|
|
111
|
+
f"fs.azure.sas.fixed.token.{account}.dfs.core.windows.net",
|
|
112
|
+
f"fs.azure.sas.{bucket_name}.{account}.blob.core.windows.net",
|
|
106
113
|
)
|
|
107
114
|
credential = sessions_config.get(spark_session_id, None)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
115
|
+
sas_token = None
|
|
116
|
+
for session_key in credential_session_key:
|
|
117
|
+
if (
|
|
118
|
+
credential is not None
|
|
119
|
+
and credential.get(session_key) is not None
|
|
120
|
+
and credential.get(session_key).strip() != ""
|
|
121
|
+
):
|
|
122
|
+
sas_token = credential.get(session_key)
|
|
123
|
+
break
|
|
124
|
+
if sas_token is not None:
|
|
125
|
+
sql_query += f" CREDENTIALS = (AZURE_SAS_TOKEN = '{sas_token}')"
|
|
114
126
|
|
|
115
127
|
logger.info(self.session.sql(sql_query).collect())
|
|
116
128
|
self.stages_for_azure[bucket_name] = stage_name
|
|
@@ -128,24 +140,44 @@ class StageLocator:
|
|
|
128
140
|
# but the rest of the time it's used, it does. We just drop it here.
|
|
129
141
|
sql_query = f"CREATE OR REPLACE TEMP STAGE {stage_name[1:]} URL='s3://{parsed_path.split('/')[0]}'"
|
|
130
142
|
credential = sessions_config.get(spark_session_id, None)
|
|
131
|
-
if
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
143
|
+
if credential is not None:
|
|
144
|
+
if ( # USE AWS KEYS to connect
|
|
145
|
+
credential.get("spark.hadoop.fs.s3a.access.key") is not None
|
|
146
|
+
and credential.get("spark.hadoop.fs.s3a.secret.key")
|
|
147
|
+
is not None
|
|
148
|
+
and credential.get("spark.hadoop.fs.s3a.access.key").strip()
|
|
149
|
+
!= ""
|
|
150
|
+
and credential.get("spark.hadoop.fs.s3a.secret.key").strip()
|
|
151
|
+
!= ""
|
|
152
|
+
):
|
|
153
|
+
aws_keys = f" AWS_KEY_ID = '{credential.get('spark.hadoop.fs.s3a.access.key')}'"
|
|
154
|
+
aws_keys += f" AWS_SECRET_KEY = '{credential.get('spark.hadoop.fs.s3a.secret.key')}'"
|
|
155
|
+
if (
|
|
156
|
+
credential.get("spark.hadoop.fs.s3a.session.token")
|
|
157
|
+
is not None
|
|
158
|
+
):
|
|
159
|
+
aws_keys += f" AWS_TOKEN = '{credential.get('spark.hadoop.fs.s3a.session.token')}'"
|
|
160
|
+
sql_query += f" CREDENTIALS = ({aws_keys})"
|
|
161
|
+
sql_query += " ENCRYPTION = ( TYPE = 'AWS_SSE_S3' )"
|
|
162
|
+
elif ( # USE AWS ROLE and KMS KEY to connect
|
|
163
|
+
credential.get(
|
|
164
|
+
"spark.hadoop.fs.s3a.server-side-encryption.key"
|
|
165
|
+
)
|
|
166
|
+
is not None
|
|
167
|
+
and credential.get(
|
|
168
|
+
"spark.hadoop.fs.s3a.server-side-encryption.key"
|
|
169
|
+
).strip()
|
|
170
|
+
!= ""
|
|
171
|
+
and credential.get("spark.hadoop.fs.s3a.assumed.role.arn")
|
|
144
172
|
is not None
|
|
173
|
+
and credential.get(
|
|
174
|
+
"spark.hadoop.fs.s3a.assumed.role.arn"
|
|
175
|
+
).strip()
|
|
176
|
+
!= ""
|
|
145
177
|
):
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
178
|
+
aws_role = f" AWS_ROLE = '{credential.get('spark.hadoop.fs.s3a.assumed.role.arn')}'"
|
|
179
|
+
sql_query += f" CREDENTIALS = ({aws_role})"
|
|
180
|
+
sql_query += f" ENCRYPTION = ( TYPE='AWS_SSE_KMS' KMS_KEY_ID = '{credential.get('spark.hadoop.fs.s3a.server-side-encryption.key')}' )"
|
|
149
181
|
|
|
150
182
|
logger.info(self.session.sql(sql_query).collect())
|
|
151
183
|
self.stages_for_aws[bucket_name] = stage_name
|
|
@@ -36,6 +36,8 @@ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
|
36
36
|
from snowflake.snowpark_connect.relation.io_utils import (
|
|
37
37
|
convert_file_prefix_path,
|
|
38
38
|
is_cloud_path,
|
|
39
|
+
is_supported_compression,
|
|
40
|
+
supported_compressions_for_format,
|
|
39
41
|
)
|
|
40
42
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
41
43
|
from snowflake.snowpark_connect.relation.read.reader_config import CsvWriterConfig
|
|
@@ -179,7 +181,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
179
181
|
f"Skipping REMOVE for root path {write_path} - too broad scope"
|
|
180
182
|
)
|
|
181
183
|
else:
|
|
182
|
-
remove_command = f"REMOVE {write_path}/"
|
|
184
|
+
remove_command = f"REMOVE '{write_path}/'"
|
|
183
185
|
session.sql(remove_command).collect()
|
|
184
186
|
logger.info(f"Successfully cleared directory: {write_path}")
|
|
185
187
|
except Exception as e:
|
|
@@ -208,6 +210,20 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
208
210
|
compression = write_op.options.get(
|
|
209
211
|
"compression", default_compression
|
|
210
212
|
).upper()
|
|
213
|
+
|
|
214
|
+
if not is_supported_compression(write_op.source, compression):
|
|
215
|
+
supported_compressions = supported_compressions_for_format(
|
|
216
|
+
write_op.source
|
|
217
|
+
)
|
|
218
|
+
raise AnalysisException(
|
|
219
|
+
f"Compression {compression} is not supported for {write_op.source} format. "
|
|
220
|
+
+ (
|
|
221
|
+
f"Supported compressions: {sorted(supported_compressions)}"
|
|
222
|
+
if supported_compressions
|
|
223
|
+
else "No compression supported for this format."
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
|
|
211
227
|
parameters = {
|
|
212
228
|
"location": temp_file_prefix_on_stage,
|
|
213
229
|
"file_format_type": write_op.source
|
|
@@ -417,9 +433,27 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
417
433
|
)
|
|
418
434
|
case _:
|
|
419
435
|
snowpark_table_name = _spark_to_snowflake(write_op.table.table_name)
|
|
436
|
+
save_method = write_op.table.save_method
|
|
437
|
+
|
|
438
|
+
if (
|
|
439
|
+
write_op.source == "snowflake"
|
|
440
|
+
and write_op.table.save_method
|
|
441
|
+
== commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_UNSPECIFIED
|
|
442
|
+
):
|
|
443
|
+
save_method = (
|
|
444
|
+
commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
|
|
445
|
+
)
|
|
446
|
+
if len(write_op.table.table_name) == 0:
|
|
447
|
+
dbtable_name = write_op.options.get("dbtable", "")
|
|
448
|
+
if len(dbtable_name) == 0:
|
|
449
|
+
raise SnowparkConnectNotImplementedError(
|
|
450
|
+
"Save command is not supported without a table name"
|
|
451
|
+
)
|
|
452
|
+
else:
|
|
453
|
+
snowpark_table_name = _spark_to_snowflake(dbtable_name)
|
|
420
454
|
|
|
421
455
|
if (
|
|
422
|
-
|
|
456
|
+
save_method
|
|
423
457
|
== commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
|
|
424
458
|
):
|
|
425
459
|
match write_mode:
|
|
@@ -481,7 +515,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
481
515
|
column_order=_column_order_for_write,
|
|
482
516
|
)
|
|
483
517
|
elif (
|
|
484
|
-
|
|
518
|
+
save_method
|
|
485
519
|
== commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
|
|
486
520
|
):
|
|
487
521
|
_validate_schema_and_get_writer(
|
|
@@ -493,7 +527,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
493
527
|
)
|
|
494
528
|
else:
|
|
495
529
|
raise SnowparkConnectNotImplementedError(
|
|
496
|
-
f"Save command not supported: {
|
|
530
|
+
f"Save command not supported: {save_method}"
|
|
497
531
|
)
|
|
498
532
|
|
|
499
533
|
|
|
@@ -1161,23 +1161,28 @@ def get_session(url: Optional[str] = None, conf: SparkConf = None) -> SparkSessi
|
|
|
1161
1161
|
|
|
1162
1162
|
|
|
1163
1163
|
def init_spark_session(conf: SparkConf = None) -> SparkSession:
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1164
|
+
if os.environ.get("JAVA_HOME") is None:
|
|
1165
|
+
try:
|
|
1166
|
+
# For Notebooks on SPCS
|
|
1167
|
+
from jdk4py import JAVA_HOME
|
|
1168
|
+
|
|
1169
|
+
os.environ["JAVA_HOME"] = str(JAVA_HOME)
|
|
1170
|
+
except ModuleNotFoundError:
|
|
1171
|
+
# For notebooks on Warehouse
|
|
1172
|
+
conda_prefix = os.environ.get("CONDA_PREFIX")
|
|
1173
|
+
if conda_prefix is not None:
|
|
1174
|
+
os.environ["JAVA_HOME"] = conda_prefix
|
|
1175
|
+
os.environ["JAVA_LD_LIBRARY_PATH"] = os.path.join(
|
|
1176
|
+
conda_prefix, "lib", "server"
|
|
1177
|
+
)
|
|
1178
|
+
logger.info("JAVA_HOME=%s", os.environ.get("JAVA_HOME", "Not defined"))
|
|
1176
1179
|
|
|
1177
1180
|
os.environ["SPARK_LOCAL_HOSTNAME"] = "127.0.0.1"
|
|
1178
1181
|
os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
|
|
1179
1182
|
|
|
1180
|
-
|
|
1183
|
+
from snowflake.snowpark_connect.utils.session import _get_current_snowpark_session
|
|
1184
|
+
|
|
1185
|
+
snowpark_session = _get_current_snowpark_session()
|
|
1181
1186
|
start_session(snowpark_session=snowpark_session)
|
|
1182
1187
|
return get_session(conf=conf)
|
|
1183
1188
|
|
|
@@ -30,9 +30,6 @@ _sql_aggregate_function_count = ContextVar[int](
|
|
|
30
30
|
"_contains_aggregate_function", default=0
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
-
# Context for parsing map_partitions
|
|
34
|
-
_map_partitions_stack = ContextVar[int]("_map_partitions_stack", default=0)
|
|
35
|
-
|
|
36
33
|
# We have to generate our own plan IDs that are different from Spark's.
|
|
37
34
|
# Spark plan IDs start at 0, so pick a "big enough" number to avoid overlaps.
|
|
38
35
|
_STARTING_SQL_PLAN_ID = 0x80000000
|
|
@@ -230,16 +227,6 @@ def push_evaluating_join_condition(join_type, left_keys, right_keys):
|
|
|
230
227
|
_is_evaluating_join_condition.set(prev)
|
|
231
228
|
|
|
232
229
|
|
|
233
|
-
@contextmanager
|
|
234
|
-
def push_map_partitions():
|
|
235
|
-
_map_partitions_stack.set(_map_partitions_stack.get() + 1)
|
|
236
|
-
yield
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def map_partitions_depth() -> int:
|
|
240
|
-
return _map_partitions_stack.get()
|
|
241
|
-
|
|
242
|
-
|
|
243
230
|
@contextmanager
|
|
244
231
|
def push_sql_scope():
|
|
245
232
|
"""
|
|
@@ -410,7 +397,6 @@ def clear_context_data() -> None:
|
|
|
410
397
|
_view_process_context.set([])
|
|
411
398
|
_next_sql_plan_id.set(_STARTING_SQL_PLAN_ID)
|
|
412
399
|
_sql_plan_name_map.set({})
|
|
413
|
-
_map_partitions_stack.set(0)
|
|
414
400
|
_sql_aggregate_function_count.set(0)
|
|
415
401
|
_sql_named_args.set({})
|
|
416
402
|
_sql_pos_args.set({})
|
|
@@ -3,10 +3,46 @@
|
|
|
3
3
|
#
|
|
4
4
|
import contextlib
|
|
5
5
|
import functools
|
|
6
|
+
import re
|
|
6
7
|
|
|
7
8
|
from snowflake.snowpark import Session
|
|
9
|
+
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
10
|
+
create_file_format_statement,
|
|
11
|
+
)
|
|
8
12
|
from snowflake.snowpark_connect.utils.identifiers import FQN
|
|
9
13
|
|
|
14
|
+
_MINUS_AT_THE_BEGINNING_REGEX = re.compile(r"^-")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def cached_file_format(
|
|
18
|
+
session: Session, file_format: str, format_type_options: dict[str, str]
|
|
19
|
+
) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Cache and return a file format name based on the given options.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
function_name = _MINUS_AT_THE_BEGINNING_REGEX.sub(
|
|
25
|
+
"1", str(hash(frozenset(format_type_options.items())))
|
|
26
|
+
)
|
|
27
|
+
file_format_name = f"__SNOWPARK_CONNECT_FILE_FORMAT__{file_format}_{function_name}"
|
|
28
|
+
if file_format_name in session._file_formats:
|
|
29
|
+
return file_format_name
|
|
30
|
+
|
|
31
|
+
session.sql(
|
|
32
|
+
create_file_format_statement(
|
|
33
|
+
file_format_name,
|
|
34
|
+
file_format,
|
|
35
|
+
format_type_options,
|
|
36
|
+
temp=True,
|
|
37
|
+
if_not_exist=True,
|
|
38
|
+
use_scoped_temp_objects=False,
|
|
39
|
+
is_generated=True,
|
|
40
|
+
)
|
|
41
|
+
).collect()
|
|
42
|
+
|
|
43
|
+
session._file_formats.add(file_format_name)
|
|
44
|
+
return file_format_name
|
|
45
|
+
|
|
10
46
|
|
|
11
47
|
@functools.cache
|
|
12
48
|
def file_format(
|
|
@@ -71,6 +71,9 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
71
71
|
init_builtin_udf_cache(session)
|
|
72
72
|
init_external_udxf_cache(session)
|
|
73
73
|
|
|
74
|
+
# file format cache
|
|
75
|
+
session._file_formats = set()
|
|
76
|
+
|
|
74
77
|
# Set experimental parameters (warnings globally suppressed)
|
|
75
78
|
session.ast_enabled = False
|
|
76
79
|
session.eliminate_numeric_sql_value_cast_enabled = False
|
|
@@ -98,7 +98,11 @@ def cached_udaf(
|
|
|
98
98
|
# Register the function outside the lock to avoid contention
|
|
99
99
|
wrapped_func = udaf(
|
|
100
100
|
udaf_type,
|
|
101
|
-
name=
|
|
101
|
+
name=[
|
|
102
|
+
Session.get_active_session().get_current_database(),
|
|
103
|
+
Session.get_active_session().get_current_schema(),
|
|
104
|
+
name,
|
|
105
|
+
],
|
|
102
106
|
return_type=return_type,
|
|
103
107
|
input_types=input_types,
|
|
104
108
|
imports=imports,
|
|
@@ -155,7 +159,11 @@ def cached_udf(
|
|
|
155
159
|
# but this will not cause any issues.
|
|
156
160
|
wrapped_func = udf(
|
|
157
161
|
_null_safe_wrapper,
|
|
158
|
-
name=
|
|
162
|
+
name=[
|
|
163
|
+
Session.get_active_session().get_current_database(),
|
|
164
|
+
Session.get_active_session().get_current_schema(),
|
|
165
|
+
name,
|
|
166
|
+
],
|
|
159
167
|
return_type=return_type,
|
|
160
168
|
input_types=input_types,
|
|
161
169
|
imports=imports,
|
|
@@ -205,7 +213,11 @@ def cached_udtf(
|
|
|
205
213
|
# Register the function outside the lock to avoid contention
|
|
206
214
|
wrapped_func = udtf(
|
|
207
215
|
func,
|
|
208
|
-
name=
|
|
216
|
+
name=[
|
|
217
|
+
Session.get_active_session().get_current_database(),
|
|
218
|
+
Session.get_active_session().get_current_schema(),
|
|
219
|
+
name,
|
|
220
|
+
],
|
|
209
221
|
output_schema=output_schema,
|
|
210
222
|
input_types=input_types,
|
|
211
223
|
imports=imports,
|
|
@@ -306,11 +318,20 @@ def register_cached_sql_udf(
|
|
|
306
318
|
)
|
|
307
319
|
|
|
308
320
|
with _lock:
|
|
309
|
-
|
|
321
|
+
function_identifier = ".".join(
|
|
322
|
+
[
|
|
323
|
+
Session.get_active_session().get_current_database(),
|
|
324
|
+
Session.get_active_session().get_current_schema(),
|
|
325
|
+
function_name,
|
|
326
|
+
]
|
|
327
|
+
)
|
|
328
|
+
cache[function_name] = function_identifier
|
|
329
|
+
else:
|
|
330
|
+
function_identifier = cache[function_name]
|
|
310
331
|
|
|
311
332
|
return functools.partial(
|
|
312
333
|
call_udf,
|
|
313
|
-
|
|
334
|
+
function_identifier,
|
|
314
335
|
)
|
|
315
336
|
|
|
316
337
|
|
|
@@ -384,9 +405,18 @@ def register_cached_java_udf(
|
|
|
384
405
|
)
|
|
385
406
|
|
|
386
407
|
with _lock:
|
|
387
|
-
|
|
408
|
+
function_identifier = ".".join(
|
|
409
|
+
[
|
|
410
|
+
Session.get_active_session().get_current_database(),
|
|
411
|
+
Session.get_active_session().get_current_schema(),
|
|
412
|
+
function_name,
|
|
413
|
+
]
|
|
414
|
+
)
|
|
415
|
+
cache[function_name] = function_identifier
|
|
416
|
+
else:
|
|
417
|
+
function_identifier = cache[function_name]
|
|
388
418
|
|
|
389
419
|
return functools.partial(
|
|
390
420
|
call_udf,
|
|
391
|
-
|
|
421
|
+
function_identifier,
|
|
392
422
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowpark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.29.0
|
|
4
4
|
Summary: Snowpark Connect for Spark
|
|
5
5
|
Author: Snowflake, Inc
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -16,7 +16,7 @@ Requires-Dist: jpype1
|
|
|
16
16
|
Requires-Dist: protobuf<5.0,>=4.25.3
|
|
17
17
|
Requires-Dist: s3fs>=2025.3.0
|
|
18
18
|
Requires-Dist: snowflake.core<2,>=1.0.5
|
|
19
|
-
Requires-Dist: snowflake-snowpark-python[pandas]<1.40.0,==1.39.
|
|
19
|
+
Requires-Dist: snowflake-snowpark-python[pandas]<1.40.0,==1.39.1
|
|
20
20
|
Requires-Dist: sqlglot>=26.3.8
|
|
21
21
|
Requires-Dist: jaydebeapi
|
|
22
22
|
Requires-Dist: aiobotocore~=2.23.0
|
|
@@ -27,6 +27,7 @@ Requires-Dist: grpcio<1.63,>=1.56.0
|
|
|
27
27
|
Requires-Dist: grpcio-status<1.63,>=1.56.0
|
|
28
28
|
Requires-Dist: googleapis-common-protos>=1.56.4
|
|
29
29
|
Requires-Dist: numpy<2,>=1.15
|
|
30
|
+
Requires-Dist: gcsfs>=2025.9.0
|
|
30
31
|
Dynamic: author
|
|
31
32
|
Dynamic: description
|
|
32
33
|
Dynamic: description-content-type
|