snowpark-connect 0.28.0__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/config.py +12 -3
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +31 -68
- snowflake/snowpark_connect/expression/map_unresolved_function.py +172 -210
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +207 -20
- snowflake/snowpark_connect/relation/io_utils.py +21 -1
- snowflake/snowpark_connect/relation/map_extension.py +21 -4
- snowflake/snowpark_connect/relation/map_map_partitions.py +7 -8
- snowflake/snowpark_connect/relation/map_relation.py +1 -3
- snowflake/snowpark_connect/relation/map_sql.py +112 -53
- snowflake/snowpark_connect/relation/read/map_read.py +22 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +105 -26
- snowflake/snowpark_connect/relation/read/map_read_json.py +45 -34
- snowflake/snowpark_connect/relation/read/map_read_table.py +58 -0
- snowflake/snowpark_connect/relation/read/map_read_text.py +6 -1
- snowflake/snowpark_connect/relation/stage_locator.py +85 -53
- snowflake/snowpark_connect/relation/write/map_write.py +95 -14
- snowflake/snowpark_connect/server.py +18 -13
- snowflake/snowpark_connect/utils/context.py +21 -14
- snowflake/snowpark_connect/utils/identifiers.py +8 -2
- snowflake/snowpark_connect/utils/io_utils.py +36 -0
- snowflake/snowpark_connect/utils/session.py +3 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +61 -0
- snowflake/snowpark_connect/utils/udf_cache.py +37 -7
- snowflake/snowpark_connect/utils/udf_utils.py +9 -8
- snowflake/snowpark_connect/utils/udtf_utils.py +3 -2
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/RECORD +36 -35
- {snowpark_connect-0.28.0.data → snowpark_connect-0.29.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.28.0.data → snowpark_connect-0.29.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.28.0.data → snowpark_connect-0.29.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/top_level.txt +0 -0
|
@@ -264,16 +264,22 @@ SESSION_CONFIG_KEY_WHITELIST = {
|
|
|
264
264
|
"snowpark.connect.udtf.compatibility_mode",
|
|
265
265
|
"snowpark.connect.views.duplicate_column_names_handling_mode",
|
|
266
266
|
"enable_snowflake_extension_behavior",
|
|
267
|
+
"spark.hadoop.fs.s3a.server-side-encryption.key",
|
|
268
|
+
"spark.hadoop.fs.s3a.assumed.role.arn",
|
|
267
269
|
}
|
|
268
|
-
|
|
270
|
+
AZURE_ACCOUNT_KEY = re.compile(
|
|
269
271
|
r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
|
|
270
272
|
)
|
|
273
|
+
AZURE_SAS_KEY = re.compile(
|
|
274
|
+
r"^fs\.azure\.sas\.fixed\.token\.[^\.]+\.dfs\.core\.windows\.net$"
|
|
275
|
+
)
|
|
271
276
|
|
|
272
277
|
|
|
273
278
|
def valid_session_config_key(key: str):
|
|
274
279
|
return (
|
|
275
280
|
key in SESSION_CONFIG_KEY_WHITELIST # AWS session keys
|
|
276
281
|
or AZURE_SAS_KEY.match(key) # Azure session keys
|
|
282
|
+
or AZURE_ACCOUNT_KEY.match(key) # Azure account keys
|
|
277
283
|
)
|
|
278
284
|
|
|
279
285
|
|
|
@@ -283,7 +289,7 @@ class SessionConfig:
|
|
|
283
289
|
default_session_config = {
|
|
284
290
|
"snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
|
|
285
291
|
"snowpark.connect.sql.passthrough": "false",
|
|
286
|
-
"snowpark.connect.cte.optimization_enabled": "
|
|
292
|
+
"snowpark.connect.cte.optimization_enabled": "false",
|
|
287
293
|
"snowpark.connect.udtf.compatibility_mode": "false",
|
|
288
294
|
"snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
|
|
289
295
|
"spark.sql.execution.pythonUDTF.arrow.enabled": "false",
|
|
@@ -578,7 +584,10 @@ def set_snowflake_parameters(
|
|
|
578
584
|
cte_enabled = str_to_bool(value)
|
|
579
585
|
snowpark_session.cte_optimization_enabled = cte_enabled
|
|
580
586
|
logger.info(f"Updated snowpark session CTE optimization: {cte_enabled}")
|
|
581
|
-
|
|
587
|
+
case "snowpark.connect.structured_types.fix":
|
|
588
|
+
# TODO: SNOW-2367714 Remove this once the fix is automatically enabled in Snowpark
|
|
589
|
+
snowpark.context._enable_fix_2360274 = str_to_bool(value)
|
|
590
|
+
logger.info(f"Updated snowpark session structured types fix: {value}")
|
|
582
591
|
case _:
|
|
583
592
|
pass
|
|
584
593
|
|
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
import re
|
|
5
|
-
import uuid
|
|
6
4
|
from collections import Counter
|
|
7
5
|
|
|
8
6
|
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
9
7
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
10
8
|
|
|
11
|
-
from snowflake.snowpark import
|
|
12
|
-
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
9
|
+
from snowflake.snowpark.types import StructField, StructType
|
|
13
10
|
from snowflake.snowpark_connect.column_name_handler import ColumnNames
|
|
14
11
|
from snowflake.snowpark_connect.config import global_config, sessions_config
|
|
15
12
|
from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
|
|
13
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
16
14
|
from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
|
|
17
15
|
from snowflake.snowpark_connect.expression import map_udf
|
|
18
16
|
from snowflake.snowpark_connect.relation import map_udtf
|
|
@@ -28,10 +26,7 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
|
28
26
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
29
27
|
SnowparkConnectNotImplementedError,
|
|
30
28
|
)
|
|
31
|
-
|
|
32
|
-
_INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
|
|
33
|
-
|
|
34
|
-
_CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
|
|
29
|
+
from snowflake.snowpark_connect.utils.temporary_view_cache import register_temp_view
|
|
35
30
|
|
|
36
31
|
|
|
37
32
|
def _create_column_rename_map(
|
|
@@ -98,32 +93,35 @@ def map_execution_command(
|
|
|
98
93
|
input_df = input_df_container.dataframe
|
|
99
94
|
column_map = input_df_container.column_map
|
|
100
95
|
|
|
96
|
+
# TODO: Remove code handling deduplication. When view are not materialized we don't have to care about it.
|
|
101
97
|
session_config = sessions_config[get_session_id()]
|
|
102
98
|
duplicate_column_names_handling_mode = session_config[
|
|
103
99
|
"snowpark.connect.views.duplicate_column_names_handling_mode"
|
|
104
100
|
]
|
|
105
101
|
|
|
102
|
+
spark_columns = input_df_container.column_map.get_spark_columns()
|
|
106
103
|
# rename columns to match spark names
|
|
107
104
|
if duplicate_column_names_handling_mode == "rename":
|
|
108
105
|
# deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
)
|
|
106
|
+
rename_map = _create_column_rename_map(column_map.columns, True)
|
|
107
|
+
snowpark_columns = list(rename_map.values())
|
|
108
|
+
input_df = input_df.rename(rename_map)
|
|
112
109
|
elif duplicate_column_names_handling_mode == "drop":
|
|
113
110
|
# Drop duplicate column names by removing all but the first occurrence.
|
|
114
111
|
duplicated_columns, remaining_columns = _find_duplicated_columns(
|
|
115
112
|
column_map.columns
|
|
116
113
|
)
|
|
114
|
+
rename_map = _create_column_rename_map(remaining_columns, False)
|
|
115
|
+
snowpark_columns = list(rename_map.values())
|
|
116
|
+
spark_columns = list(dict.fromkeys(spark_columns))
|
|
117
117
|
if len(duplicated_columns) > 0:
|
|
118
118
|
input_df = input_df.drop(*duplicated_columns)
|
|
119
|
-
input_df = input_df.rename(
|
|
120
|
-
_create_column_rename_map(remaining_columns, False)
|
|
121
|
-
)
|
|
119
|
+
input_df = input_df.rename(rename_map)
|
|
122
120
|
else:
|
|
123
121
|
# rename columns without deduplication
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
)
|
|
122
|
+
rename_map = _create_column_rename_map(column_map.columns, True)
|
|
123
|
+
snowpark_columns = list(rename_map.values())
|
|
124
|
+
input_df = input_df.rename(rename_map)
|
|
127
125
|
|
|
128
126
|
if req.is_global:
|
|
129
127
|
view_name = [global_config.spark_sql_globalTempDatabase, req.name]
|
|
@@ -132,18 +130,23 @@ def map_execution_command(
|
|
|
132
130
|
view_name = [
|
|
133
131
|
spark_to_sf_single_id_with_unquoting(part) for part in view_name
|
|
134
132
|
]
|
|
133
|
+
joined_view_name = ".".join(view_name)
|
|
135
134
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
135
|
+
schema = StructType(
|
|
136
|
+
[
|
|
137
|
+
StructField(field.name, field.datatype)
|
|
138
|
+
for field in input_df.schema.fields
|
|
139
|
+
]
|
|
140
|
+
)
|
|
141
|
+
input_df_container = DataFrameContainer.create_with_column_mapping(
|
|
142
|
+
dataframe=input_df,
|
|
143
|
+
spark_column_names=spark_columns,
|
|
144
|
+
snowpark_column_names=snowpark_columns,
|
|
145
|
+
parent_column_name_map=input_df_container.column_map,
|
|
146
|
+
cached_schema_getter=lambda: schema,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
register_temp_view(joined_view_name, input_df_container, req.replace)
|
|
147
150
|
case "write_stream_operation_start":
|
|
148
151
|
match request.plan.command.write_stream_operation_start.format:
|
|
149
152
|
case "console":
|
|
@@ -207,43 +210,3 @@ def map_execution_command(
|
|
|
207
210
|
raise SnowparkConnectNotImplementedError(
|
|
208
211
|
f"Command type {other} not implemented"
|
|
209
212
|
)
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def _generate_random_builtin_view_name() -> str:
|
|
213
|
-
return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
|
|
217
|
-
return "view definition refers to view being defined" in str(exc).lower()
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def _create_chained_view(input_df: DataFrame, view_name: str) -> None:
|
|
221
|
-
"""
|
|
222
|
-
In order to create a view, which references itself, Spark would here take the previous
|
|
223
|
-
definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
|
|
224
|
-
as a workaround, we create a chain of internal views instead. This function:
|
|
225
|
-
1. Renames previous definition of A to some internal name (instead of deleting).
|
|
226
|
-
2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
|
|
227
|
-
"""
|
|
228
|
-
|
|
229
|
-
session = Session.get_active_session()
|
|
230
|
-
|
|
231
|
-
view_name = ".".join(view_name)
|
|
232
|
-
|
|
233
|
-
tmp_name = _generate_random_builtin_view_name()
|
|
234
|
-
old_name_replacement = _generate_random_builtin_view_name()
|
|
235
|
-
|
|
236
|
-
input_df.create_or_replace_temp_view(tmp_name)
|
|
237
|
-
|
|
238
|
-
session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
|
|
239
|
-
|
|
240
|
-
ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
|
|
241
|
-
|
|
242
|
-
ddl = ddl.replace(view_name, old_name_replacement)
|
|
243
|
-
|
|
244
|
-
# GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
|
|
245
|
-
ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
|
|
246
|
-
|
|
247
|
-
session.sql(ddl).collect()
|
|
248
|
-
|
|
249
|
-
session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
|