snowpark-connect 0.20.2__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (67) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +28 -14
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  6. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  7. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  8. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  9. snowflake/snowpark_connect/expression/map_unresolved_function.py +279 -43
  10. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  11. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  12. snowflake/snowpark_connect/expression/typer.py +6 -6
  13. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  14. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  15. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  16. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  17. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  18. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  19. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  20. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  21. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  22. snowflake/snowpark_connect/relation/map_aggregate.py +72 -47
  23. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  24. snowflake/snowpark_connect/relation/map_column_ops.py +207 -144
  25. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  26. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  27. snowflake/snowpark_connect/relation/map_join.py +72 -63
  28. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  29. snowflake/snowpark_connect/relation/map_map_partitions.py +21 -16
  30. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  31. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  32. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  33. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  34. snowflake/snowpark_connect/relation/map_sql.py +155 -78
  35. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  36. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  37. snowflake/snowpark_connect/relation/map_udtf.py +6 -9
  38. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  39. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  40. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  41. snowflake/snowpark_connect/relation/read/map_read_json.py +7 -7
  42. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  43. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  44. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  45. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  46. snowflake/snowpark_connect/relation/utils.py +11 -5
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  48. snowflake/snowpark_connect/relation/write/map_write.py +199 -40
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  50. snowflake/snowpark_connect/server.py +34 -4
  51. snowflake/snowpark_connect/type_mapping.py +2 -23
  52. snowflake/snowpark_connect/utils/cache.py +27 -22
  53. snowflake/snowpark_connect/utils/context.py +33 -17
  54. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  55. snowflake/snowpark_connect/utils/session.py +41 -34
  56. snowflake/snowpark_connect/utils/telemetry.py +1 -2
  57. snowflake/snowpark_connect/version.py +1 -1
  58. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/METADATA +5 -3
  59. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/RECORD +67 -64
  60. snowpark_connect-0.21.0.dist-info/licenses/LICENSE-binary +568 -0
  61. snowpark_connect-0.21.0.dist-info/licenses/NOTICE-binary +1533 -0
  62. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-connect +0 -0
  63. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-session +0 -0
  64. {snowpark_connect-0.20.2.data → snowpark_connect-0.21.0.data}/scripts/snowpark-submit +0 -0
  65. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/WHEEL +0 -0
  66. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
  67. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.21.0.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ import typing
7
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
8
 
9
9
  from snowflake import snowpark
10
- from snowflake.snowpark_connect.column_name_handler import with_column_map
10
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
11
  from snowflake.snowpark_connect.relation.read.utils import (
12
12
  get_spark_column_names_from_snowpark_columns,
13
13
  rename_columns_as_snowflake_standard,
@@ -71,7 +71,7 @@ def map_read_text(
71
71
  schema: snowpark.types.StructType | None,
72
72
  session: snowpark.Session,
73
73
  paths: list[str],
74
- ) -> snowpark.DataFrame:
74
+ ) -> DataFrameContainer:
75
75
  """
76
76
  Read a TEXT file into a Snowpark DataFrame.
77
77
  """
@@ -98,9 +98,9 @@ def map_read_text(
98
98
  renamed_df, snowpark_column_names = rename_columns_as_snowflake_standard(
99
99
  df, rel.common.plan_id
100
100
  )
101
- return with_column_map(
102
- renamed_df,
103
- spark_column_names,
104
- snowpark_column_names,
105
- [f.datatype for f in df.schema.fields],
101
+ return DataFrameContainer.create_with_column_mapping(
102
+ dataframe=renamed_df,
103
+ spark_column_names=spark_column_names,
104
+ snowpark_column_names=snowpark_column_names,
105
+ snowpark_column_types=[f.datatype for f in df.schema.fields],
106
106
  )
@@ -32,6 +32,7 @@ from snowflake.snowpark_connect.column_name_handler import (
32
32
  ColumnNameMap,
33
33
  make_column_names_snowpark_compatible,
34
34
  )
35
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
35
36
  from snowflake.snowpark_connect.relation.map_relation import map_relation
36
37
 
37
38
  TYPE_MAP_FOR_TO_SCHEMA = {
@@ -91,7 +92,9 @@ TYPE_MAP_FOR_TO_SCHEMA = {
91
92
 
92
93
 
93
94
  def get_df_with_partition_row_number(
94
- df: snowpark.DataFrame, plan_id: int | None, row_number_column_name: str
95
+ container: DataFrameContainer,
96
+ plan_id: int | None,
97
+ row_number_column_name: str,
95
98
  ) -> snowpark.DataFrame:
96
99
  """
97
100
  Add a row number for each row in each partition for the given df, where
@@ -106,21 +109,24 @@ def get_df_with_partition_row_number(
106
109
  | c| 4| | c| 4| 0 |
107
110
  +---+---+ +---+---+------------+
108
111
  """
112
+ df = container.dataframe
113
+ column_map = container.column_map
114
+
109
115
  row_number_snowpark_column_name = make_column_names_snowpark_compatible(
110
- [row_number_column_name], plan_id, len(df._column_map.get_spark_columns())
116
+ [row_number_column_name], plan_id, len(column_map.get_spark_columns())
111
117
  )[0]
112
118
  row_number_snowpark_column = (
113
119
  snowpark_fn.row_number()
114
120
  .over(
115
121
  snowpark.window.Window.partition_by(
116
- *df._column_map.get_snowpark_columns()
122
+ *column_map.get_snowpark_columns()
117
123
  ).order_by(snowpark_fn.lit(1))
118
124
  )
119
125
  .alias(row_number_snowpark_column_name)
120
126
  )
121
127
 
122
128
  df_with_partition_number = df.select(
123
- *df._column_map.get_snowpark_columns(), row_number_snowpark_column
129
+ *column_map.get_snowpark_columns(), row_number_snowpark_column
124
130
  )
125
131
  return df_with_partition_number
126
132
 
@@ -197,7 +203,7 @@ def get_semantic_string(rel: relation_proto.Relation) -> str:
197
203
  """
198
204
  queries = [
199
205
  query
200
- for query_list in map_relation(rel)._plan.execution_queries.values()
206
+ for query_list in map_relation(rel).dataframe._plan.execution_queries.values()
201
207
  for query in query_list
202
208
  ]
203
209
 
@@ -10,6 +10,7 @@ import snowflake.snowpark
10
10
  from snowflake import snowpark
11
11
  from snowflake.snowpark import DataFrameWriter
12
12
  from snowflake.snowpark.dataframe import DataFrame
13
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
13
14
  from snowflake.snowpark_connect.relation.read import jdbc_read_dbapi
14
15
  from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDialect
15
16
  from snowflake.snowpark_connect.relation.read.utils import Connection
@@ -36,7 +37,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
36
37
 
37
38
  def jdbc_write_dbapi(
38
39
  self,
39
- input_df: DataFrame,
40
+ container: DataFrameContainer,
40
41
  create_connection: Callable[[dict[str, str]], "Connection"],
41
42
  close_connection: Callable[[Connection], None],
42
43
  table: str,
@@ -46,6 +47,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
46
47
  Write a Snowpark Dataframe data into table of a JDBC datasource.
47
48
  """
48
49
 
50
+ input_df = container.dataframe
49
51
  conn = create_connection(self.jdbc_options)
50
52
  try:
51
53
  url = self.jdbc_options.get("url", None)
@@ -53,32 +55,32 @@ class JdbcDataFrameWriter(DataFrameWriter):
53
55
 
54
56
  table_exist = self._does_table_exist(conn, table)
55
57
  insert_query = self._generate_insert_query(
56
- input_df,
58
+ container,
57
59
  table,
58
60
  )
59
61
 
60
62
  match write_mode:
61
63
  case "append":
62
64
  if not table_exist:
63
- self._create_table(conn, table, input_df, jdbc_dialect)
65
+ self._create_table(conn, table, container, jdbc_dialect)
64
66
  case "errorifexists":
65
67
  if table_exist:
66
68
  raise ValueError(
67
69
  "table is already exist and write mode is ERROR_IF_EXISTS"
68
70
  )
69
71
  else:
70
- self._create_table(conn, table, input_df, jdbc_dialect)
72
+ self._create_table(conn, table, container, jdbc_dialect)
71
73
  case "overwrite":
72
74
  if table_exist:
73
75
  self._drop_table(conn, table)
74
- self._create_table(conn, table, input_df, jdbc_dialect)
76
+ self._create_table(conn, table, container, jdbc_dialect)
75
77
  case "ignore":
76
78
  if table_exist:
77
79
  # With Ignore write mode, if table already exists, the save operation is expected
78
80
  # to not save the contents of the DataFrame and to not change the existing data.
79
81
  return
80
82
  else:
81
- self._create_table(conn, table, input_df, jdbc_dialect)
83
+ self._create_table(conn, table, container, jdbc_dialect)
82
84
  case _:
83
85
  raise ValueError(f"Invalid write mode value{write_mode}")
84
86
 
@@ -92,14 +94,14 @@ class JdbcDataFrameWriter(DataFrameWriter):
92
94
  finally:
93
95
  close_connection(conn)
94
96
 
95
- def _generate_insert_query(self, input_df: DataFrame, table: str) -> str:
97
+ def _generate_insert_query(self, container: DataFrameContainer, table: str) -> str:
96
98
  """
97
99
  Generates INSERT statement with placeholders.
98
- :param input_df: Snowpark dataframe to save
100
+ :param container: Snowpark dataframe container
99
101
  :param table: JDBC datasource table name
100
102
  :return: INSERT SQL statement
101
103
  """
102
- true_names = input_df._column_map.get_spark_columns()
104
+ true_names = container.column_map.get_spark_columns()
103
105
  # quote each column name to match PySpark's case-sensitive column naming behavior.
104
106
  quoted_column_names = ",".join([f'"{col}"' for col in true_names])
105
107
  place_holders = ",".join(["?"] * len(true_names))
@@ -145,7 +147,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
145
147
  self,
146
148
  conn: Connection,
147
149
  table: str,
148
- input_df: DataFrame,
150
+ container,
149
151
  jdbc_dialect: JdbcDialect,
150
152
  ) -> None:
151
153
  """
@@ -154,14 +156,15 @@ class JdbcDataFrameWriter(DataFrameWriter):
154
156
 
155
157
  :param conn: A Python DBAPI connection over JDBC connection
156
158
  :param table: DBC datasource table name
157
- :param input_df: Snowpark dataframe to save
159
+ :param container: Snowpark dataframe container
158
160
  :param jdbc_dialect: JDBC specific dialect
159
161
  :return: None
160
162
  """
163
+ input_df = container.dataframe
161
164
  columns_str = ""
162
165
  fields = input_df.schema.fields
163
166
  total_columns = len(fields)
164
- column_map = input_df._column_map
167
+ column_map = container.column_map
165
168
 
166
169
  column_index = 0
167
170
  for field in fields:
@@ -9,20 +9,30 @@ from pathlib import Path
9
9
  import pyspark.sql.connect.proto.base_pb2 as proto_base
10
10
  import pyspark.sql.connect.proto.commands_pb2 as commands_proto
11
11
  from pyspark.errors.exceptions.base import AnalysisException
12
- from pyspark.sql.connect.types import StructType
13
12
 
14
13
  from snowflake import snowpark
15
14
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
16
15
  quote_name_without_upper_casing,
17
16
  unquote_if_quoted,
18
17
  )
18
+ from snowflake.snowpark.exceptions import SnowparkSQLException
19
19
  from snowflake.snowpark.functions import col, lit, object_construct
20
+ from snowflake.snowpark.types import (
21
+ ArrayType,
22
+ DataType,
23
+ DateType,
24
+ MapType,
25
+ StringType,
26
+ StructType,
27
+ TimestampType,
28
+ _NumericType,
29
+ )
20
30
  from snowflake.snowpark_connect.config import (
21
- auto_uppercase_ddl,
22
31
  global_config,
23
32
  sessions_config,
24
33
  str_to_bool,
25
34
  )
35
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
26
36
  from snowflake.snowpark_connect.relation.io_utils import (
27
37
  convert_file_prefix_path,
28
38
  is_cloud_path,
@@ -32,16 +42,19 @@ from snowflake.snowpark_connect.relation.read.reader_config import CsvWriterConf
32
42
  from snowflake.snowpark_connect.relation.stage_locator import get_paths_from_stage
33
43
  from snowflake.snowpark_connect.relation.utils import random_string
34
44
  from snowflake.snowpark_connect.type_mapping import snowpark_to_iceberg_type
35
- from snowflake.snowpark_connect.utils.attribute_handling import (
45
+ from snowflake.snowpark_connect.utils.context import get_session_id
46
+ from snowflake.snowpark_connect.utils.identifiers import (
47
+ spark_to_sf_single_id,
36
48
  split_fully_qualified_spark_name,
37
49
  )
38
- from snowflake.snowpark_connect.utils.context import get_session_id
39
50
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
40
51
  from snowflake.snowpark_connect.utils.telemetry import (
41
52
  SnowparkConnectNotImplementedError,
42
53
  telemetry,
43
54
  )
44
55
 
56
+ _column_order_for_write = "name"
57
+
45
58
 
46
59
  # TODO: We will revise/refactor this after changes for all formats are finalized.
47
60
  def clean_params(params):
@@ -85,14 +98,9 @@ def get_param_from_options(params, options, source):
85
98
  params["format_type_options"]["NULL_IF"] = options["nullValue"]
86
99
 
87
100
 
88
- def _spark_to_snowflake_single_id(name: str) -> str:
89
- name = quote_name_without_upper_casing(name)
90
- return name.upper() if auto_uppercase_ddl() else name
91
-
92
-
93
101
  def _spark_to_snowflake(multipart_id: str) -> str:
94
102
  return ".".join(
95
- _spark_to_snowflake_single_id(part)
103
+ spark_to_sf_single_id(part)
96
104
  for part in split_fully_qualified_spark_name(multipart_id)
97
105
  )
98
106
 
@@ -115,9 +123,8 @@ def map_write(request: proto_base.ExecutePlanRequest):
115
123
  case commands_proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE:
116
124
  write_mode = "ignore"
117
125
 
118
- input_df: snowpark.DataFrame = handle_column_names(
119
- map_relation(write_op.input), write_op.source
120
- )
126
+ result = map_relation(write_op.input)
127
+ input_df: snowpark.DataFrame = handle_column_names(result, write_op.source)
121
128
  session: snowpark.Session = get_or_create_snowpark_session()
122
129
 
123
130
  # Snowflake saveAsTable doesn't support format
@@ -198,7 +205,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
198
205
  options = dict(write_op.options)
199
206
  if write_mode is None:
200
207
  write_mode = "errorifexists"
201
- map_write_jdbc(input_df, session, options, write_mode)
208
+ map_write_jdbc(result, session, options, write_mode)
202
209
  case "iceberg":
203
210
  table_name = (
204
211
  write_op.path
@@ -220,7 +227,14 @@ def map_write(request: proto_base.ExecutePlanRequest):
220
227
  snowpark_session=session,
221
228
  )
222
229
  write_mode = "append"
223
- input_df.write.saveAsTable(table_name=snowpark_table_name, mode=write_mode)
230
+
231
+ _validate_schema_and_get_writer(
232
+ input_df, write_mode, snowpark_table_name
233
+ ).saveAsTable(
234
+ table_name=snowpark_table_name,
235
+ mode=write_mode,
236
+ column_order=_column_order_for_write,
237
+ )
224
238
  case _:
225
239
  snowpark_table_name = _spark_to_snowflake(write_op.table.table_name)
226
240
 
@@ -228,17 +242,23 @@ def map_write(request: proto_base.ExecutePlanRequest):
228
242
  write_op.table.save_method
229
243
  == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
230
244
  ):
231
- input_df.write.saveAsTable(
245
+ _validate_schema_and_get_writer(
246
+ input_df, write_mode, snowpark_table_name
247
+ ).saveAsTable(
232
248
  table_name=snowpark_table_name,
233
249
  mode=write_mode,
250
+ column_order=_column_order_for_write,
234
251
  )
235
252
  elif (
236
253
  write_op.table.save_method
237
254
  == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
238
255
  ):
239
- input_df.write.saveAsTable(
256
+ _validate_schema_and_get_writer(
257
+ input_df, write_mode, snowpark_table_name
258
+ ).saveAsTable(
240
259
  table_name=snowpark_table_name,
241
260
  mode=write_mode or "append",
261
+ column_order=_column_order_for_write,
242
262
  )
243
263
  else:
244
264
  raise SnowparkConnectNotImplementedError(
@@ -265,10 +285,8 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
265
285
  )
266
286
 
267
287
  snowpark_table_name = _spark_to_snowflake(write_op.table_name)
268
-
269
- input_df: snowpark.DataFrame = handle_column_names(
270
- map_relation(write_op.input), "table"
271
- )
288
+ result = map_relation(write_op.input)
289
+ input_df: snowpark.DataFrame = handle_column_names(result, "table")
272
290
  session: snowpark.Session = get_or_create_snowpark_session()
273
291
 
274
292
  if write_op.table_name is None or write_op.table_name == "":
@@ -304,18 +322,163 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
304
322
  schema=input_df.schema,
305
323
  snowpark_session=session,
306
324
  )
307
-
308
- input_df.write.saveAsTable(
325
+ _validate_schema_and_get_writer(
326
+ input_df, write_mode, snowpark_table_name
327
+ ).saveAsTable(
309
328
  table_name=snowpark_table_name,
310
329
  mode="append",
330
+ column_order=_column_order_for_write,
311
331
  )
312
332
  else:
313
- input_df.write.saveAsTable(
333
+ _validate_schema_and_get_writer(
334
+ input_df, write_mode, snowpark_table_name
335
+ ).saveAsTable(
314
336
  table_name=snowpark_table_name,
315
337
  mode=write_mode,
338
+ column_order=_column_order_for_write,
316
339
  )
317
340
 
318
341
 
342
+ def _validate_schema_and_get_writer(
343
+ input_df: snowpark.DataFrame, write_mode: str, snowpark_table_name: str
344
+ ) -> snowpark.DataFrameWriter:
345
+ if write_mode == "overwrite":
346
+ return input_df.write
347
+
348
+ table_schema = None
349
+ try:
350
+ table_schema = (
351
+ get_or_create_snowpark_session().table(snowpark_table_name).schema
352
+ )
353
+ except SnowparkSQLException as e:
354
+ msg = e.message
355
+ if "SQL compilation error" in msg and "does not exist" in msg:
356
+ pass
357
+ else:
358
+ raise e
359
+
360
+ if table_schema is None:
361
+ # If table does not exist, we can skip the schema validation
362
+ return input_df.write
363
+
364
+ _validate_schema_for_append(table_schema, input_df.schema, snowpark_table_name)
365
+
366
+ # if table exists and case sensitivity is not enabled, we need to rename the columns to match existing table schema
367
+ if not global_config.spark_sql_caseSensitive:
368
+
369
+ for field in input_df.schema.fields:
370
+ # Find the matching field in the table schema (case-insensitive)
371
+ col_name = field.name
372
+ renamed = col_name
373
+ matching_field = next(
374
+ (f for f in table_schema.fields if f.name.lower() == col_name.lower()),
375
+ None,
376
+ )
377
+ if matching_field is not None and matching_field != col_name:
378
+ renamed = matching_field.name
379
+ input_df = input_df.withColumnRenamed(col_name, renamed)
380
+ # Cast column if type does not match
381
+
382
+ if field.datatype != matching_field.datatype:
383
+ if isinstance(matching_field.datatype, StructType):
384
+ input_df = input_df.withColumn(
385
+ renamed,
386
+ col(renamed).cast(matching_field.datatype, rename_fields=True),
387
+ )
388
+ else:
389
+ input_df = input_df.withColumn(
390
+ renamed, col(renamed).cast(matching_field.datatype)
391
+ )
392
+ return input_df.write
393
+
394
+
395
+ def _validate_schema_for_append(
396
+ table_schema: DataType, data_schema: DataType, snowpark_table_name: str
397
+ ):
398
+ match (table_schema, data_schema):
399
+ case (_, _) if table_schema == data_schema:
400
+ return
401
+
402
+ case (StructType() as table_struct, StructType() as data_struct):
403
+
404
+ def _comparable_col_name(col: str) -> str:
405
+ return col if global_config.spark_sql_caseSensitive else col.lower()
406
+
407
+ def invalid_struct_schema():
408
+ raise AnalysisException(
409
+ f"Cannot resolve columns for the existing table {snowpark_table_name} ({table_schema.simple_string()}) with the data schema ({data_schema.simple_string()})."
410
+ )
411
+
412
+ if len(table_struct.fields) != len(data_struct.fields):
413
+ raise AnalysisException(
414
+ f"The column number of the existing table {snowpark_table_name} ({table_schema.simple_string()}) doesn't match the data schema ({data_schema.simple_string()}).)"
415
+ )
416
+
417
+ table_field_names = {
418
+ _comparable_col_name(field.name) for field in table_struct.fields
419
+ }
420
+ data_field_names = {
421
+ _comparable_col_name(field.name) for field in data_struct.fields
422
+ }
423
+
424
+ if table_field_names != data_field_names:
425
+ invalid_struct_schema()
426
+
427
+ for data_field in data_struct.fields:
428
+ matching_table_field = next(
429
+ (
430
+ f
431
+ for f in table_struct.fields
432
+ if _comparable_col_name(f.name)
433
+ == _comparable_col_name(data_field.name)
434
+ ),
435
+ None,
436
+ )
437
+
438
+ if matching_table_field is None:
439
+ invalid_struct_schema()
440
+ else:
441
+ _validate_schema_for_append(
442
+ matching_table_field.datatype,
443
+ data_field.datatype,
444
+ snowpark_table_name,
445
+ )
446
+
447
+ return
448
+
449
+ case (StringType(), _) if not isinstance(
450
+ data_schema, (StructType, ArrayType, MapType, TimestampType, DateType)
451
+ ):
452
+ return
453
+
454
+ case (_, _) if isinstance(table_schema, _NumericType) and isinstance(
455
+ data_schema, _NumericType
456
+ ):
457
+ return
458
+
459
+ case (ArrayType() as table_array, ArrayType() as data_array):
460
+ _validate_schema_for_append(
461
+ table_array.element_type, data_array.element_type, snowpark_table_name
462
+ )
463
+
464
+ case (MapType() as table_map, MapType() as data_map):
465
+ _validate_schema_for_append(
466
+ table_map.key_type, data_map.key_type, snowpark_table_name
467
+ )
468
+ _validate_schema_for_append(
469
+ table_map.value_type, data_map.value_type, snowpark_table_name
470
+ )
471
+
472
+ case (TimestampType(), _) if isinstance(data_schema, (DateType, TimestampType)):
473
+ return
474
+ case (DateType(), _) if isinstance(data_schema, (DateType, TimestampType)):
475
+ return
476
+ case (_, _):
477
+ raise AnalysisException(
478
+ f"[INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST] Cannot write incompatible data for the table {snowpark_table_name}: Cannot safely cast {data_schema.simple_string()} to {table_schema.simple_string()}"
479
+ )
480
+
481
+
319
482
  def create_iceberg_table(
320
483
  snowpark_table_name: str,
321
484
  location: str,
@@ -323,7 +486,7 @@ def create_iceberg_table(
323
486
  snowpark_session: snowpark.Session,
324
487
  ):
325
488
  table_schema = [
326
- f"{_spark_to_snowflake_single_id(field.name)} {snowpark_to_iceberg_type(field.datatype)}"
489
+ f"{spark_to_sf_single_id(unquote_if_quoted(field.name), is_column = True)} {snowpark_to_iceberg_type(field.datatype)}"
327
490
  for field in schema.fields
328
491
  ]
329
492
 
@@ -374,26 +537,22 @@ def rewrite_df(input_df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
374
537
  return rewritten_df.select(object_construct(*construct_key_values))
375
538
 
376
539
 
377
- def handle_column_names(df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
540
+ def handle_column_names(
541
+ container: DataFrameContainer, source: str
542
+ ) -> snowpark.DataFrame:
378
543
  """
379
- Handle column names.
380
-
381
- Quote column name in these scenarios:
382
- 0. Not write to table
383
- 1. Customer enabled case sensitivity in config
544
+ Handle column names before write so they match spark schema.
384
545
  """
385
- if not hasattr(df, "_column_map") or source == "jdbc":
546
+ df = container.dataframe
547
+ if source == "jdbc":
386
548
  # don't change column names for jdbc sources as we directly use spark column names for writing to the destination tables.
387
549
  return df
388
- column_map = df._column_map
389
- case_sensitive = global_config.spark_sql_caseSensitive
390
- for column in df.columns:
391
- spark_column_name = unquote_if_quoted(
392
- column_map.get_spark_column_name_from_snowpark_column_name(column)
550
+ column_map = container.column_map
551
+
552
+ for column in column_map.columns:
553
+ df = df.withColumnRenamed(
554
+ column.snowpark_name, quote_name_without_upper_casing(column.spark_name)
393
555
  )
394
- if source in ("csv", "parquet", "json") or case_sensitive:
395
- spark_column_name = f'"{spark_column_name}"'
396
- df = df.withColumnRenamed(column, spark_column_name)
397
556
  return df
398
557
 
399
558
 
@@ -3,6 +3,7 @@
3
3
  #
4
4
 
5
5
  from snowflake import snowpark
6
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
6
7
  from snowflake.snowpark_connect.relation.read.map_read_jdbc import (
7
8
  close_connection,
8
9
  create_connection,
@@ -14,7 +15,7 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
14
15
 
15
16
 
16
17
  def map_write_jdbc(
17
- input_df: snowpark.DataFrame,
18
+ container: DataFrameContainer,
18
19
  session: snowpark.Session,
19
20
  options: dict[str, str],
20
21
  write_mode: str,
@@ -38,7 +39,7 @@ def map_write_jdbc(
38
39
 
39
40
  try:
40
41
  JdbcDataFrameWriter(session, jdbc_options).jdbc_write_dbapi(
41
- input_df,
42
+ container,
42
43
  create_connection,
43
44
  close_connection,
44
45
  table=dbtable,
@@ -112,10 +112,38 @@ _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = 128 * 1024 * 1024
112
112
  _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE = 64 * 1024 # 64kb
113
113
 
114
114
 
115
+ def _sanitize_file_paths(text: str) -> str:
116
+ """
117
+ Sanitize file paths in error messages by replacing them with placeholders.
118
+ Only matches actual file paths, not module names or class names.
119
+ """
120
+ import re
121
+
122
+ # Pattern to match file paths in traceback "File" lines only
123
+ # This targets the specific format: File "/path/to/file.py", line XX
124
+ file_line_pattern = r'(File\s+["\'])([^"\']+)(["\'],\s+line\s+\d+)'
125
+
126
+ def replace_file_path(match):
127
+ return f"{match.group(1)}<redacted_file_path>{match.group(3)}"
128
+
129
+ return re.sub(file_line_pattern, replace_file_path, text)
130
+
131
+
115
132
  def _handle_exception(context, e: Exception):
116
133
  import traceback
117
134
 
118
- traceback.print_exc()
135
+ # traceback.print_exc()
136
+ # SNOWFLAKE_SHOW_ERROR_TRACE controls sanitized traceback printing (default: false)
137
+ show_traceback = os.getenv("SNOWFLAKE_SHOW_ERROR_TRACE", "false").lower() == "true"
138
+
139
+ if show_traceback:
140
+ # Show detailed traceback (includes error info naturally)
141
+ error_traceback = traceback.format_exc()
142
+ sanitized_traceback = _sanitize_file_paths(error_traceback)
143
+ logger.error(sanitized_traceback)
144
+ else:
145
+ # Show only basic error information, no traceback
146
+ logger.error("Error: %s - %s", type(e).__name__, str(e))
119
147
 
120
148
  telemetry.report_request_failure(e)
121
149
 
@@ -195,12 +223,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
195
223
  telemetry.initialize_request_summary(request)
196
224
  match request.WhichOneof("analyze"):
197
225
  case "schema":
198
- snowpark_df = map_relation(request.schema.plan.root)
226
+ result = map_relation(request.schema.plan.root)
227
+ snowpark_df = result.dataframe
199
228
  snowpark_schema: snowpark.types.StructType = snowpark_df.schema
200
229
  schema = proto_base.AnalyzePlanResponse.Schema(
201
230
  schema=types_proto.DataType(
202
231
  **snowpark_to_proto_type(
203
- snowpark_schema, snowpark_df._column_map, snowpark_df
232
+ snowpark_schema, result.column_map, snowpark_df
204
233
  )
205
234
  )
206
235
  )
@@ -262,7 +291,8 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
262
291
  # Snowflake only exposes simplified execution plans, similar to Spark's optimized logical plans.
263
292
  # Snowpark provides the execution plan IFF the dataframe maps to a single query.
264
293
  # TODO: Do we need to return a Spark-like plan?
265
- snowpark_df = map_relation(request.explain.plan.root)
294
+ result = map_relation(request.explain.plan.root)
295
+ snowpark_df = result.dataframe
266
296
  return proto_base.AnalyzePlanResponse(
267
297
  session_id=request.session_id,
268
298
  explain=proto_base.AnalyzePlanResponse.Explain(