snowpark-connect 0.28.0__py3-none-any.whl → 0.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (36) hide show
  1. snowflake/snowpark_connect/config.py +12 -3
  2. snowflake/snowpark_connect/execute_plan/map_execution_command.py +31 -68
  3. snowflake/snowpark_connect/expression/map_unresolved_function.py +172 -210
  4. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +207 -20
  5. snowflake/snowpark_connect/relation/io_utils.py +21 -1
  6. snowflake/snowpark_connect/relation/map_extension.py +21 -4
  7. snowflake/snowpark_connect/relation/map_map_partitions.py +7 -8
  8. snowflake/snowpark_connect/relation/map_relation.py +1 -3
  9. snowflake/snowpark_connect/relation/map_sql.py +112 -53
  10. snowflake/snowpark_connect/relation/read/map_read.py +22 -3
  11. snowflake/snowpark_connect/relation/read/map_read_csv.py +105 -26
  12. snowflake/snowpark_connect/relation/read/map_read_json.py +45 -34
  13. snowflake/snowpark_connect/relation/read/map_read_table.py +58 -0
  14. snowflake/snowpark_connect/relation/read/map_read_text.py +6 -1
  15. snowflake/snowpark_connect/relation/stage_locator.py +85 -53
  16. snowflake/snowpark_connect/relation/write/map_write.py +95 -14
  17. snowflake/snowpark_connect/server.py +18 -13
  18. snowflake/snowpark_connect/utils/context.py +21 -14
  19. snowflake/snowpark_connect/utils/identifiers.py +8 -2
  20. snowflake/snowpark_connect/utils/io_utils.py +36 -0
  21. snowflake/snowpark_connect/utils/session.py +3 -0
  22. snowflake/snowpark_connect/utils/temporary_view_cache.py +61 -0
  23. snowflake/snowpark_connect/utils/udf_cache.py +37 -7
  24. snowflake/snowpark_connect/utils/udf_utils.py +9 -8
  25. snowflake/snowpark_connect/utils/udtf_utils.py +3 -2
  26. snowflake/snowpark_connect/version.py +1 -1
  27. {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/METADATA +3 -2
  28. {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/RECORD +36 -35
  29. {snowpark_connect-0.28.0.data → snowpark_connect-0.29.0.data}/scripts/snowpark-connect +0 -0
  30. {snowpark_connect-0.28.0.data → snowpark_connect-0.29.0.data}/scripts/snowpark-session +0 -0
  31. {snowpark_connect-0.28.0.data → snowpark_connect-0.29.0.data}/scripts/snowpark-submit +0 -0
  32. {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/WHEEL +0 -0
  33. {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/LICENSE-binary +0 -0
  34. {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/LICENSE.txt +0 -0
  35. {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/licenses/NOTICE-binary +0 -0
  36. {snowpark_connect-0.28.0.dist-info → snowpark_connect-0.29.0.dist-info}/top_level.txt +0 -0
@@ -264,16 +264,22 @@ SESSION_CONFIG_KEY_WHITELIST = {
264
264
  "snowpark.connect.udtf.compatibility_mode",
265
265
  "snowpark.connect.views.duplicate_column_names_handling_mode",
266
266
  "enable_snowflake_extension_behavior",
267
+ "spark.hadoop.fs.s3a.server-side-encryption.key",
268
+ "spark.hadoop.fs.s3a.assumed.role.arn",
267
269
  }
268
- AZURE_SAS_KEY = re.compile(
270
+ AZURE_ACCOUNT_KEY = re.compile(
269
271
  r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
270
272
  )
273
+ AZURE_SAS_KEY = re.compile(
274
+ r"^fs\.azure\.sas\.fixed\.token\.[^\.]+\.dfs\.core\.windows\.net$"
275
+ )
271
276
 
272
277
 
273
278
  def valid_session_config_key(key: str):
274
279
  return (
275
280
  key in SESSION_CONFIG_KEY_WHITELIST # AWS session keys
276
281
  or AZURE_SAS_KEY.match(key) # Azure session keys
282
+ or AZURE_ACCOUNT_KEY.match(key) # Azure account keys
277
283
  )
278
284
 
279
285
 
@@ -283,7 +289,7 @@ class SessionConfig:
283
289
  default_session_config = {
284
290
  "snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
285
291
  "snowpark.connect.sql.passthrough": "false",
286
- "snowpark.connect.cte.optimization_enabled": "true",
292
+ "snowpark.connect.cte.optimization_enabled": "false",
287
293
  "snowpark.connect.udtf.compatibility_mode": "false",
288
294
  "snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
289
295
  "spark.sql.execution.pythonUDTF.arrow.enabled": "false",
@@ -578,7 +584,10 @@ def set_snowflake_parameters(
578
584
  cte_enabled = str_to_bool(value)
579
585
  snowpark_session.cte_optimization_enabled = cte_enabled
580
586
  logger.info(f"Updated snowpark session CTE optimization: {cte_enabled}")
581
-
587
+ case "snowpark.connect.structured_types.fix":
588
+ # TODO: SNOW-2367714 Remove this once the fix is automatically enabled in Snowpark
589
+ snowpark.context._enable_fix_2360274 = str_to_bool(value)
590
+ logger.info(f"Updated snowpark session structured types fix: {value}")
582
591
  case _:
583
592
  pass
584
593
 
@@ -1,18 +1,16 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
- import re
5
- import uuid
6
4
  from collections import Counter
7
5
 
8
6
  import pyspark.sql.connect.proto.base_pb2 as proto_base
9
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
10
8
 
11
- from snowflake.snowpark import DataFrame, Session
12
- from snowflake.snowpark.exceptions import SnowparkSQLException
9
+ from snowflake.snowpark.types import StructField, StructType
13
10
  from snowflake.snowpark_connect.column_name_handler import ColumnNames
14
11
  from snowflake.snowpark_connect.config import global_config, sessions_config
15
12
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
13
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
16
14
  from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
17
15
  from snowflake.snowpark_connect.expression import map_udf
18
16
  from snowflake.snowpark_connect.relation import map_udtf
@@ -28,10 +26,7 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
28
26
  from snowflake.snowpark_connect.utils.telemetry import (
29
27
  SnowparkConnectNotImplementedError,
30
28
  )
31
-
32
- _INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
33
-
34
- _CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
29
+ from snowflake.snowpark_connect.utils.temporary_view_cache import register_temp_view
35
30
 
36
31
 
37
32
  def _create_column_rename_map(
@@ -98,32 +93,35 @@ def map_execution_command(
98
93
  input_df = input_df_container.dataframe
99
94
  column_map = input_df_container.column_map
100
95
 
96
+ # TODO: Remove code handling deduplication. When view are not materialized we don't have to care about it.
101
97
  session_config = sessions_config[get_session_id()]
102
98
  duplicate_column_names_handling_mode = session_config[
103
99
  "snowpark.connect.views.duplicate_column_names_handling_mode"
104
100
  ]
105
101
 
102
+ spark_columns = input_df_container.column_map.get_spark_columns()
106
103
  # rename columns to match spark names
107
104
  if duplicate_column_names_handling_mode == "rename":
108
105
  # deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
109
- input_df = input_df.rename(
110
- _create_column_rename_map(column_map.columns, True)
111
- )
106
+ rename_map = _create_column_rename_map(column_map.columns, True)
107
+ snowpark_columns = list(rename_map.values())
108
+ input_df = input_df.rename(rename_map)
112
109
  elif duplicate_column_names_handling_mode == "drop":
113
110
  # Drop duplicate column names by removing all but the first occurrence.
114
111
  duplicated_columns, remaining_columns = _find_duplicated_columns(
115
112
  column_map.columns
116
113
  )
114
+ rename_map = _create_column_rename_map(remaining_columns, False)
115
+ snowpark_columns = list(rename_map.values())
116
+ spark_columns = list(dict.fromkeys(spark_columns))
117
117
  if len(duplicated_columns) > 0:
118
118
  input_df = input_df.drop(*duplicated_columns)
119
- input_df = input_df.rename(
120
- _create_column_rename_map(remaining_columns, False)
121
- )
119
+ input_df = input_df.rename(rename_map)
122
120
  else:
123
121
  # rename columns without deduplication
124
- input_df = input_df.rename(
125
- _create_column_rename_map(column_map.columns, False)
126
- )
122
+ rename_map = _create_column_rename_map(column_map.columns, True)
123
+ snowpark_columns = list(rename_map.values())
124
+ input_df = input_df.rename(rename_map)
127
125
 
128
126
  if req.is_global:
129
127
  view_name = [global_config.spark_sql_globalTempDatabase, req.name]
@@ -132,18 +130,23 @@ def map_execution_command(
132
130
  view_name = [
133
131
  spark_to_sf_single_id_with_unquoting(part) for part in view_name
134
132
  ]
133
+ joined_view_name = ".".join(view_name)
135
134
 
136
- if req.replace:
137
- try:
138
- input_df.create_or_replace_temp_view(view_name)
139
- except SnowparkSQLException as exc:
140
- if _is_error_caused_by_view_referencing_itself(exc):
141
- # This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
142
- _create_chained_view(input_df, view_name)
143
- else:
144
- raise
145
- else:
146
- input_df.create_temp_view(view_name)
135
+ schema = StructType(
136
+ [
137
+ StructField(field.name, field.datatype)
138
+ for field in input_df.schema.fields
139
+ ]
140
+ )
141
+ input_df_container = DataFrameContainer.create_with_column_mapping(
142
+ dataframe=input_df,
143
+ spark_column_names=spark_columns,
144
+ snowpark_column_names=snowpark_columns,
145
+ parent_column_name_map=input_df_container.column_map,
146
+ cached_schema_getter=lambda: schema,
147
+ )
148
+
149
+ register_temp_view(joined_view_name, input_df_container, req.replace)
147
150
  case "write_stream_operation_start":
148
151
  match request.plan.command.write_stream_operation_start.format:
149
152
  case "console":
@@ -207,43 +210,3 @@ def map_execution_command(
207
210
  raise SnowparkConnectNotImplementedError(
208
211
  f"Command type {other} not implemented"
209
212
  )
210
-
211
-
212
- def _generate_random_builtin_view_name() -> str:
213
- return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
214
-
215
-
216
- def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
217
- return "view definition refers to view being defined" in str(exc).lower()
218
-
219
-
220
- def _create_chained_view(input_df: DataFrame, view_name: str) -> None:
221
- """
222
- In order to create a view, which references itself, Spark would here take the previous
223
- definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
224
- as a workaround, we create a chain of internal views instead. This function:
225
- 1. Renames previous definition of A to some internal name (instead of deleting).
226
- 2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
227
- """
228
-
229
- session = Session.get_active_session()
230
-
231
- view_name = ".".join(view_name)
232
-
233
- tmp_name = _generate_random_builtin_view_name()
234
- old_name_replacement = _generate_random_builtin_view_name()
235
-
236
- input_df.create_or_replace_temp_view(tmp_name)
237
-
238
- session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
239
-
240
- ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
241
-
242
- ddl = ddl.replace(view_name, old_name_replacement)
243
-
244
- # GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
245
- ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
246
-
247
- session.sql(ddl).collect()
248
-
249
- session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()