snowpark-connect 0.21.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (56) hide show
  1. snowflake/snowpark_connect/config.py +19 -14
  2. snowflake/snowpark_connect/error/error_utils.py +32 -0
  3. snowflake/snowpark_connect/error/exceptions.py +4 -0
  4. snowflake/snowpark_connect/expression/hybrid_column_map.py +192 -0
  5. snowflake/snowpark_connect/expression/literal.py +9 -12
  6. snowflake/snowpark_connect/expression/map_cast.py +20 -4
  7. snowflake/snowpark_connect/expression/map_expression.py +8 -1
  8. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  9. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +32 -5
  10. snowflake/snowpark_connect/expression/map_unresolved_function.py +269 -134
  11. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +8 -8
  12. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +4 -2
  13. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +127 -21
  14. snowflake/snowpark_connect/relation/map_aggregate.py +154 -18
  15. snowflake/snowpark_connect/relation/map_column_ops.py +59 -8
  16. snowflake/snowpark_connect/relation/map_extension.py +58 -24
  17. snowflake/snowpark_connect/relation/map_local_relation.py +8 -1
  18. snowflake/snowpark_connect/relation/map_map_partitions.py +3 -1
  19. snowflake/snowpark_connect/relation/map_row_ops.py +30 -1
  20. snowflake/snowpark_connect/relation/map_sql.py +40 -196
  21. snowflake/snowpark_connect/relation/map_udtf.py +4 -4
  22. snowflake/snowpark_connect/relation/read/map_read.py +2 -1
  23. snowflake/snowpark_connect/relation/read/map_read_json.py +12 -1
  24. snowflake/snowpark_connect/relation/read/map_read_parquet.py +8 -1
  25. snowflake/snowpark_connect/relation/read/reader_config.py +10 -0
  26. snowflake/snowpark_connect/relation/read/utils.py +7 -6
  27. snowflake/snowpark_connect/relation/utils.py +170 -1
  28. snowflake/snowpark_connect/relation/write/map_write.py +306 -87
  29. snowflake/snowpark_connect/server.py +34 -5
  30. snowflake/snowpark_connect/type_mapping.py +6 -2
  31. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  32. snowflake/snowpark_connect/utils/env_utils.py +55 -0
  33. snowflake/snowpark_connect/utils/session.py +21 -4
  34. snowflake/snowpark_connect/utils/telemetry.py +213 -61
  35. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  36. snowflake/snowpark_connect/version.py +1 -1
  37. snowflake/snowpark_decoder/__init__.py +0 -0
  38. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  39. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  40. snowflake/snowpark_decoder/dp_session.py +111 -0
  41. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  42. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.23.0.dist-info}/METADATA +2 -2
  43. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.23.0.dist-info}/RECORD +55 -44
  44. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.23.0.dist-info}/top_level.txt +1 -0
  45. spark/__init__.py +0 -0
  46. spark/connect/__init__.py +0 -0
  47. spark/connect/envelope_pb2.py +31 -0
  48. spark/connect/envelope_pb2.pyi +46 -0
  49. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  50. {snowpark_connect-0.21.0.data → snowpark_connect-0.23.0.data}/scripts/snowpark-connect +0 -0
  51. {snowpark_connect-0.21.0.data → snowpark_connect-0.23.0.data}/scripts/snowpark-session +0 -0
  52. {snowpark_connect-0.21.0.data → snowpark_connect-0.23.0.data}/scripts/snowpark-submit +0 -0
  53. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.23.0.dist-info}/WHEEL +0 -0
  54. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.23.0.dist-info}/licenses/LICENSE-binary +0 -0
  55. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.23.0.dist-info}/licenses/LICENSE.txt +0 -0
  56. {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.23.0.dist-info}/licenses/NOTICE-binary +0 -0
@@ -73,12 +73,13 @@ def rename_columns_as_snowflake_standard(
73
73
  return df, []
74
74
 
75
75
  new_columns = make_column_names_snowpark_compatible(df.columns, plan_id)
76
- return (
77
- df.select(
78
- *(df.col(orig).alias(alias) for orig, alias in zip(df.columns, new_columns))
79
- ),
80
- new_columns,
81
- )
76
+ result = df.toDF(*new_columns)
77
+ if result._select_statement is not None:
78
+ # do not allow snowpark to flatten the to_df result
79
+ # TODO: remove after SNOW-2203706 is fixed
80
+ result._select_statement.flatten_disabled = True
81
+
82
+ return (result, new_columns)
82
83
 
83
84
 
84
85
  class Connection(Protocol):
@@ -6,12 +6,28 @@ import random
6
6
  import re
7
7
  import string
8
8
  import time
9
- from typing import Sequence
9
+ from collections.abc import Callable
10
+ from typing import AbstractSet, List, Optional, Sequence
10
11
 
11
12
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
12
13
 
13
14
  import snowflake.snowpark.functions as snowpark_fn
14
15
  from snowflake import snowpark
16
+ from snowflake.snowpark._internal.analyzer.expression import (
17
+ COLUMN_DEPENDENCY_ALL,
18
+ COLUMN_DEPENDENCY_DOLLAR,
19
+ Expression,
20
+ FunctionExpression,
21
+ derive_dependent_columns,
22
+ )
23
+ from snowflake.snowpark._internal.analyzer.select_statement import (
24
+ SEQUENCE_DEPENDENT_DATA_GENERATION,
25
+ ColumnChangeState,
26
+ ColumnStateDict,
27
+ SelectStatement,
28
+ )
29
+ from snowflake.snowpark._internal.analyzer.unary_expression import Alias
30
+ from snowflake.snowpark._internal.analyzer.window_expression import WindowExpression
15
31
  from snowflake.snowpark.types import (
16
32
  BinaryType,
17
33
  BooleanType,
@@ -223,3 +239,156 @@ def snowpark_functions_col(name: str, column_map: ColumnNameMap) -> snowpark.Col
223
239
  """
224
240
  is_qualified_name = name not in column_map.get_snowpark_columns()
225
241
  return snowpark_fn.col(name, _is_qualified_name=is_qualified_name)
242
+
243
+
244
+ def can_sort_be_flattened(
245
+ select_statement: Optional[SelectStatement], *sort_expressions: Optional[Expression]
246
+ ) -> bool:
247
+ """
248
+ Checks if the given SelectStatement can be "flattened" when sorting with regard to the given sort expressions.
249
+ Flattening means that the given SelectStatement can be enhanced and reused instead of being treated
250
+ as a subquery in the FROM clause after a "sort" or "filter" operation. Flattening allows accessing dropped columns
251
+ for sort and filter expressions.
252
+ """
253
+ if not select_statement or select_statement.flatten_disabled:
254
+ return False
255
+
256
+ # In some cases, flattening sort can lead to leaving the "order by" clause in a subquery,
257
+ # which can cause incorrect ordering. We want to avoid flattening sort when all its dependent columns
258
+ # are available in the current projection.
259
+ dependent_columns_in_sort = derive_dependent_columns(*sort_expressions)
260
+ columns_in_projection = _get_columns_in_projection(select_statement.projection)
261
+ if len(dependent_columns_in_sort - columns_in_projection) == 0:
262
+ return False
263
+
264
+ return _can_clause_dependent_columns_flatten(
265
+ dependent_columns_in_sort, select_statement.column_states
266
+ ) and not _has_data_generator_exp(select_statement.projection)
267
+
268
+
269
+ def can_filter_be_flattened(
270
+ select_statement: Optional[SelectStatement], condition: Expression
271
+ ) -> bool:
272
+ """
273
+ Checks if the given SelectStatement can be "flattened" when filtering with regard to the given condition.
274
+ Flattening means that the given SelectStatement can be enhanced and reused instead of being treated
275
+ as a subquery in the FROM clause after a "sort" or "filter" operation. Flattening allows accessing dropped columns
276
+ for sort and filter expressions.
277
+ """
278
+ if not select_statement or select_statement.flatten_disabled:
279
+ return False
280
+
281
+ return all(
282
+ [
283
+ _can_clause_dependent_columns_flatten(
284
+ derive_dependent_columns(condition), select_statement.column_states
285
+ ),
286
+ not _has_data_generator_or_window_exp(select_statement.projection),
287
+ select_statement.order_by is None,
288
+ select_statement.limit_ is None,
289
+ ]
290
+ )
291
+
292
+
293
+ def _get_columns_in_projection(
294
+ projection: Optional[List[Expression]],
295
+ ) -> AbstractSet[str]:
296
+ if projection is None:
297
+ return set()
298
+
299
+ columns = set()
300
+ for expression in projection:
301
+ if hasattr(expression, "name") and expression.name:
302
+ columns.add(expression.name)
303
+ elif hasattr(expression, "children"):
304
+ columns.update(_get_columns_in_projection(expression.children))
305
+
306
+ return columns
307
+
308
+
309
+ def _is_self_alias(expression):
310
+ """
311
+ Check if the expression is a self-alias, meaning it has an alias that is the same as its name.
312
+ A self-alias can be flattened, even if Snowpark treats it as a CHANGED_EXP.
313
+ """
314
+ if not isinstance(expression, Alias):
315
+ return False
316
+
317
+ first_child_with_name = expression.child
318
+ while (
319
+ first_child_with_name
320
+ and hasattr(first_child_with_name, "child")
321
+ and not hasattr(first_child_with_name, "name")
322
+ ):
323
+ first_child_with_name = first_child_with_name.child
324
+
325
+ return (
326
+ first_child_with_name
327
+ and (first_child_with_name, "name")
328
+ and first_child_with_name.name == expression.name
329
+ )
330
+
331
+
332
+ def _can_clause_dependent_columns_flatten(
333
+ dependent_columns: Optional[AbstractSet[str]],
334
+ subquery_column_states: ColumnStateDict,
335
+ ) -> bool:
336
+ if dependent_columns == COLUMN_DEPENDENCY_DOLLAR:
337
+ return False
338
+ elif (
339
+ subquery_column_states.has_changed_columns
340
+ or subquery_column_states.has_new_columns
341
+ ):
342
+ if dependent_columns == COLUMN_DEPENDENCY_ALL:
343
+ return False
344
+
345
+ assert dependent_columns is not None
346
+ for dc in dependent_columns:
347
+ dc_state = subquery_column_states.get(dc)
348
+ if dc_state:
349
+ if (
350
+ dc_state.change_state == ColumnChangeState.CHANGED_EXP
351
+ and not _is_self_alias(dc_state.expression)
352
+ ):
353
+ return False
354
+ return True
355
+
356
+
357
+ def _has_data_generator_exp(expressions: List[Expression]) -> bool:
358
+ return _has_expression(expressions, [_is_generator_expression])
359
+
360
+
361
+ def _has_data_generator_or_window_exp(expressions: List[Expression]) -> bool:
362
+ return _has_expression(
363
+ expressions, [_is_generator_expression, _is_window_expression]
364
+ )
365
+
366
+
367
+ def _has_expression(
368
+ expressions: Optional[List[Expression]], checks: List[Callable[[Expression], bool]]
369
+ ) -> bool:
370
+ if expressions is None:
371
+ return False
372
+
373
+ for exp in expressions:
374
+ if not exp:
375
+ continue
376
+
377
+ if any([check(exp) for check in checks]):
378
+ return True
379
+
380
+ if _has_expression(exp.children, checks):
381
+ return True
382
+
383
+ return False
384
+
385
+
386
+ def _is_window_expression(exp: Expression) -> bool:
387
+ return isinstance(exp, WindowExpression)
388
+
389
+
390
+ def _is_generator_expression(exp: Expression) -> bool:
391
+ # https://docs.snowflake.com/en/sql-reference/functions-data-generation
392
+ return isinstance(exp, FunctionExpression) and (
393
+ exp.is_data_generator or exp.name.lower() in SEQUENCE_DEPENDENT_DATA_GENERATION
394
+ )
@@ -214,27 +214,78 @@ def map_write(request: proto_base.ExecutePlanRequest):
214
214
  )
215
215
  snowpark_table_name = _spark_to_snowflake(table_name)
216
216
 
217
- if write_mode == "overwrite":
218
- if check_snowflake_table_existance(snowpark_table_name, session):
219
- session.sql(f"DELETE FROM {snowpark_table_name}").collect()
220
- write_mode = "append"
221
-
222
- if write_mode in (None, "", "overwrite"):
223
- create_iceberg_table(
224
- snowpark_table_name=snowpark_table_name,
225
- location=write_op.options.get("location", None),
226
- schema=input_df.schema,
227
- snowpark_session=session,
228
- )
229
- write_mode = "append"
230
-
231
- _validate_schema_and_get_writer(
232
- input_df, write_mode, snowpark_table_name
233
- ).saveAsTable(
234
- table_name=snowpark_table_name,
235
- mode=write_mode,
236
- column_order=_column_order_for_write,
237
- )
217
+ match write_mode:
218
+ case None | "error" | "errorifexists":
219
+ if check_snowflake_table_existence(snowpark_table_name, session):
220
+ raise AnalysisException(
221
+ f"Table {snowpark_table_name} already exists"
222
+ )
223
+ create_iceberg_table(
224
+ snowpark_table_name=snowpark_table_name,
225
+ location=write_op.options.get("location", None),
226
+ schema=input_df.schema,
227
+ snowpark_session=session,
228
+ )
229
+ _validate_schema_and_get_writer(
230
+ input_df, "append", snowpark_table_name
231
+ ).saveAsTable(
232
+ table_name=snowpark_table_name,
233
+ mode="append",
234
+ column_order=_column_order_for_write,
235
+ )
236
+ case "append":
237
+ if check_table_type(snowpark_table_name, session) != "ICEBERG":
238
+ raise AnalysisException(
239
+ f"Table {snowpark_table_name} is not an iceberg table"
240
+ )
241
+ _validate_schema_and_get_writer(
242
+ input_df, "append", snowpark_table_name
243
+ ).saveAsTable(
244
+ table_name=snowpark_table_name,
245
+ mode="append",
246
+ column_order=_column_order_for_write,
247
+ )
248
+ case "ignore":
249
+ if not check_snowflake_table_existence(
250
+ snowpark_table_name, session
251
+ ):
252
+ create_iceberg_table(
253
+ snowpark_table_name=snowpark_table_name,
254
+ location=write_op.options.get("location", None),
255
+ schema=input_df.schema,
256
+ snowpark_session=session,
257
+ )
258
+ _validate_schema_and_get_writer(
259
+ input_df, "append", snowpark_table_name
260
+ ).saveAsTable(
261
+ table_name=snowpark_table_name,
262
+ mode="append",
263
+ column_order=_column_order_for_write,
264
+ )
265
+ case "overwrite":
266
+ if check_snowflake_table_existence(snowpark_table_name, session):
267
+ if check_table_type(snowpark_table_name, session) != "ICEBERG":
268
+ raise AnalysisException(
269
+ f"Table {snowpark_table_name} is not an iceberg table"
270
+ )
271
+ else:
272
+ create_iceberg_table(
273
+ snowpark_table_name=snowpark_table_name,
274
+ location=write_op.options.get("location", None),
275
+ schema=input_df.schema,
276
+ snowpark_session=session,
277
+ )
278
+ _validate_schema_and_get_writer(
279
+ input_df, "truncate", snowpark_table_name
280
+ ).saveAsTable(
281
+ table_name=snowpark_table_name,
282
+ mode="truncate",
283
+ column_order=_column_order_for_write,
284
+ )
285
+ case _:
286
+ raise SnowparkConnectNotImplementedError(
287
+ f"Write mode {write_mode} is not supported"
288
+ )
238
289
  case _:
239
290
  snowpark_table_name = _spark_to_snowflake(write_op.table.table_name)
240
291
 
@@ -242,13 +293,46 @@ def map_write(request: proto_base.ExecutePlanRequest):
242
293
  write_op.table.save_method
243
294
  == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
244
295
  ):
245
- _validate_schema_and_get_writer(
246
- input_df, write_mode, snowpark_table_name
247
- ).saveAsTable(
248
- table_name=snowpark_table_name,
249
- mode=write_mode,
250
- column_order=_column_order_for_write,
251
- )
296
+ match write_mode:
297
+ case "overwrite":
298
+ if check_snowflake_table_existence(
299
+ snowpark_table_name, session
300
+ ):
301
+ if (
302
+ check_table_type(snowpark_table_name, session)
303
+ != "TABLE"
304
+ ):
305
+ raise AnalysisException(
306
+ f"Table {snowpark_table_name} is not a FDN table"
307
+ )
308
+ write_mode = "truncate"
309
+ _validate_schema_and_get_writer(
310
+ input_df, write_mode, snowpark_table_name
311
+ ).saveAsTable(
312
+ table_name=snowpark_table_name,
313
+ mode=write_mode,
314
+ column_order=_column_order_for_write,
315
+ )
316
+ case "append":
317
+ if check_table_type(snowpark_table_name, session) != "TABLE":
318
+ raise AnalysisException(
319
+ f"Table {snowpark_table_name} is not a FDN table"
320
+ )
321
+ _validate_schema_and_get_writer(
322
+ input_df, write_mode, snowpark_table_name
323
+ ).saveAsTable(
324
+ table_name=snowpark_table_name,
325
+ mode=write_mode,
326
+ column_order=_column_order_for_write,
327
+ )
328
+ case _:
329
+ _validate_schema_and_get_writer(
330
+ input_df, write_mode, snowpark_table_name
331
+ ).saveAsTable(
332
+ table_name=snowpark_table_name,
333
+ mode=write_mode,
334
+ column_order=_column_order_for_write,
335
+ )
252
336
  elif (
253
337
  write_op.table.save_method
254
338
  == commands_proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
@@ -268,21 +352,6 @@ def map_write(request: proto_base.ExecutePlanRequest):
268
352
 
269
353
  def map_write_v2(request: proto_base.ExecutePlanRequest):
270
354
  write_op = request.plan.command.write_operation_v2
271
- match write_op.mode:
272
- case commands_proto.WriteOperationV2.MODE_APPEND:
273
- write_mode = "append"
274
- case commands_proto.WriteOperationV2.MODE_CREATE:
275
- write_mode = "errorifexists"
276
- case commands_proto.WriteOperationV2.MODE_OVERWRITE:
277
- write_mode = "overwrite"
278
- case commands_proto.WriteOperationV2.MODE_REPLACE:
279
- write_mode = "overwrite"
280
- case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
281
- write_mode = "overwrite"
282
- case _:
283
- raise SnowparkConnectNotImplementedError(
284
- f"Write operation {write_op.mode} not implemented."
285
- )
286
355
 
287
356
  snowpark_table_name = _spark_to_snowflake(write_op.table_name)
288
357
  result = map_relation(write_op.input)
@@ -294,55 +363,176 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
294
363
  "Write operation V2 only support table writing now"
295
364
  )
296
365
 
297
- # For OVERWRITE and APPEND modes, check if table exists first - Spark requires table to exist for these operations
298
- if write_op.mode in (
299
- commands_proto.WriteOperationV2.MODE_OVERWRITE,
300
- commands_proto.WriteOperationV2.MODE_APPEND,
301
- ):
302
- if not check_snowflake_table_existance(snowpark_table_name, session):
303
- raise AnalysisException(
304
- f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found. "
305
- f"Verify the spelling and correctness of the schema and catalog.\n"
306
- )
307
-
308
366
  if write_op.provider.lower() == "iceberg":
309
- if write_mode == "overwrite" and check_snowflake_table_existance(
310
- snowpark_table_name, session
311
- ):
312
- session.sql(f"DELETE FROM {snowpark_table_name}").collect()
313
- write_mode = "append"
314
-
315
- if write_mode in (
316
- "errorifexists",
317
- "overwrite",
318
- ):
319
- create_iceberg_table(
320
- snowpark_table_name=snowpark_table_name,
321
- location=write_op.table_properties.get("location"),
322
- schema=input_df.schema,
323
- snowpark_session=session,
324
- )
325
- _validate_schema_and_get_writer(
326
- input_df, write_mode, snowpark_table_name
327
- ).saveAsTable(
328
- table_name=snowpark_table_name,
329
- mode="append",
330
- column_order=_column_order_for_write,
331
- )
367
+ match write_op.mode:
368
+ case commands_proto.WriteOperationV2.MODE_CREATE:
369
+ if check_snowflake_table_existence(snowpark_table_name, session):
370
+ raise AnalysisException(
371
+ f"Table {snowpark_table_name} already exists"
372
+ )
373
+ create_iceberg_table(
374
+ snowpark_table_name=snowpark_table_name,
375
+ location=write_op.table_properties.get("location"),
376
+ schema=input_df.schema,
377
+ snowpark_session=session,
378
+ )
379
+ _validate_schema_and_get_writer(
380
+ input_df, "append", snowpark_table_name
381
+ ).saveAsTable(
382
+ table_name=snowpark_table_name,
383
+ mode="append",
384
+ column_order=_column_order_for_write,
385
+ )
386
+ case commands_proto.WriteOperationV2.MODE_APPEND:
387
+ if not check_snowflake_table_existence(snowpark_table_name, session):
388
+ raise AnalysisException(
389
+ f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
390
+ )
391
+ if check_table_type(snowpark_table_name, session) != "ICEBERG":
392
+ raise AnalysisException(
393
+ f"Table {snowpark_table_name} is not an iceberg table"
394
+ )
395
+ _validate_schema_and_get_writer(
396
+ input_df, "append", snowpark_table_name
397
+ ).saveAsTable(
398
+ table_name=snowpark_table_name,
399
+ mode="append",
400
+ column_order=_column_order_for_write,
401
+ )
402
+ case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
403
+ # TODO: handle the filter condition for MODE_OVERWRITE
404
+ if check_snowflake_table_existence(snowpark_table_name, session):
405
+ if check_table_type(snowpark_table_name, session) != "ICEBERG":
406
+ raise AnalysisException(
407
+ f"Table {snowpark_table_name} is not an iceberg table"
408
+ )
409
+ else:
410
+ raise AnalysisException(
411
+ f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
412
+ )
413
+ _validate_schema_and_get_writer(
414
+ input_df, "truncate", snowpark_table_name
415
+ ).saveAsTable(
416
+ table_name=snowpark_table_name,
417
+ mode="truncate",
418
+ column_order=_column_order_for_write,
419
+ )
420
+ case commands_proto.WriteOperationV2.MODE_REPLACE:
421
+ if check_snowflake_table_existence(snowpark_table_name, session):
422
+ create_iceberg_table(
423
+ snowpark_table_name=snowpark_table_name,
424
+ location=write_op.table_properties.get("location"),
425
+ schema=input_df.schema,
426
+ snowpark_session=session,
427
+ mode="replace",
428
+ )
429
+ else:
430
+ raise AnalysisException(
431
+ f"Table {snowpark_table_name} does not exist"
432
+ )
433
+ _validate_schema_and_get_writer(
434
+ input_df, "replace", snowpark_table_name
435
+ ).saveAsTable(
436
+ table_name=snowpark_table_name,
437
+ mode="append",
438
+ column_order=_column_order_for_write,
439
+ )
440
+ case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
441
+ create_iceberg_table(
442
+ snowpark_table_name=snowpark_table_name,
443
+ location=write_op.table_properties.get("location"),
444
+ schema=input_df.schema,
445
+ snowpark_session=session,
446
+ mode="create_or_replace",
447
+ )
448
+ _validate_schema_and_get_writer(
449
+ input_df, "create_or_replace", snowpark_table_name
450
+ ).saveAsTable(
451
+ table_name=snowpark_table_name,
452
+ mode="append",
453
+ column_order=_column_order_for_write,
454
+ )
455
+ case _:
456
+ raise SnowparkConnectNotImplementedError(
457
+ f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
458
+ )
332
459
  else:
333
- _validate_schema_and_get_writer(
334
- input_df, write_mode, snowpark_table_name
335
- ).saveAsTable(
336
- table_name=snowpark_table_name,
337
- mode=write_mode,
338
- column_order=_column_order_for_write,
339
- )
460
+ match write_op.mode:
461
+ case commands_proto.WriteOperationV2.MODE_CREATE:
462
+ _validate_schema_and_get_writer(
463
+ input_df, "errorifexists", snowpark_table_name
464
+ ).saveAsTable(
465
+ table_name=snowpark_table_name,
466
+ mode="errorifexists",
467
+ column_order=_column_order_for_write,
468
+ )
469
+ case commands_proto.WriteOperationV2.MODE_APPEND:
470
+ if not check_snowflake_table_existence(snowpark_table_name, session):
471
+ raise AnalysisException(
472
+ f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
473
+ )
474
+ if check_table_type(snowpark_table_name, session) != "TABLE":
475
+ raise AnalysisException(
476
+ f"Table {snowpark_table_name} is not a FDN table"
477
+ )
478
+ _validate_schema_and_get_writer(
479
+ input_df, "append", snowpark_table_name
480
+ ).saveAsTable(
481
+ table_name=snowpark_table_name,
482
+ mode="append",
483
+ column_order=_column_order_for_write,
484
+ )
485
+ case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
486
+ # TODO: handle the filter condition for MODE_OVERWRITE
487
+ if check_snowflake_table_existence(snowpark_table_name, session):
488
+ if check_table_type(snowpark_table_name, session) != "TABLE":
489
+ raise AnalysisException(
490
+ f"Table {snowpark_table_name} is not a FDN table"
491
+ )
492
+ else:
493
+ raise AnalysisException(
494
+ f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
495
+ )
496
+ _validate_schema_and_get_writer(
497
+ input_df, "truncate", snowpark_table_name
498
+ ).saveAsTable(
499
+ table_name=snowpark_table_name,
500
+ mode="truncate",
501
+ column_order=_column_order_for_write,
502
+ )
503
+ case commands_proto.WriteOperationV2.MODE_REPLACE:
504
+ if not check_snowflake_table_existence(snowpark_table_name, session):
505
+ raise AnalysisException(
506
+ f"Table {snowpark_table_name} does not exist"
507
+ )
508
+ _validate_schema_and_get_writer(
509
+ input_df, "replace", snowpark_table_name
510
+ ).saveAsTable(
511
+ table_name=snowpark_table_name,
512
+ mode="overwrite",
513
+ column_order=_column_order_for_write,
514
+ )
515
+ case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
516
+ _validate_schema_and_get_writer(
517
+ input_df, "create_or_replace", snowpark_table_name
518
+ ).saveAsTable(
519
+ table_name=snowpark_table_name,
520
+ mode="overwrite",
521
+ column_order=_column_order_for_write,
522
+ )
523
+ case _:
524
+ raise SnowparkConnectNotImplementedError(
525
+ f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
526
+ )
340
527
 
341
528
 
342
529
  def _validate_schema_and_get_writer(
343
530
  input_df: snowpark.DataFrame, write_mode: str, snowpark_table_name: str
344
531
  ) -> snowpark.DataFrameWriter:
345
- if write_mode == "overwrite":
532
+ if write_mode is not None and write_mode.lower() in (
533
+ "replace",
534
+ "create_or_replace",
535
+ ):
346
536
  return input_df.write
347
537
 
348
538
  table_schema = None
@@ -484,6 +674,7 @@ def create_iceberg_table(
484
674
  location: str,
485
675
  schema: StructType,
486
676
  snowpark_session: snowpark.Session,
677
+ mode: str = "create",
487
678
  ):
488
679
  table_schema = [
489
680
  f"{spark_to_sf_single_id(unquote_if_quoted(field.name), is_column = True)} {snowpark_to_iceberg_type(field.datatype)}"
@@ -506,8 +697,20 @@ def create_iceberg_table(
506
697
  else f"EXTERNAL_VOLUME = '{config_external_volume}'"
507
698
  )
508
699
 
700
+ match mode:
701
+ case "create":
702
+ create_sql = "CREATE"
703
+ case "replace":
704
+ # There's no replace for iceberg table, so we use create or replace
705
+ create_sql = "CREATE OR REPLACE"
706
+ case "create_or_replace":
707
+ create_sql = "CREATE OR REPLACE"
708
+ case _:
709
+ raise SnowparkConnectNotImplementedError(
710
+ f"Write mode {mode} is not supported for iceberg table"
711
+ )
509
712
  sql = f"""
510
- CREATE ICEBERG TABLE {snowpark_table_name} ({",".join(table_schema)})
713
+ {create_sql} ICEBERG TABLE {snowpark_table_name} ({",".join(table_schema)})
511
714
  CATALOG = 'SNOWFLAKE'
512
715
  {external_volume}
513
716
  {base_location};
@@ -584,7 +787,7 @@ def _truncate_directory(directory_path: Path) -> None:
584
787
  shutil.rmtree(file)
585
788
 
586
789
 
587
- def check_snowflake_table_existance(
790
+ def check_snowflake_table_existence(
588
791
  snowpark_table_name: str,
589
792
  snowpark_session: snowpark.Session,
590
793
  ):
@@ -593,3 +796,19 @@ def check_snowflake_table_existance(
593
796
  return True
594
797
  except Exception:
595
798
  return False
799
+
800
+
801
+ def check_table_type(
802
+ snowpark_table_name: str,
803
+ snowpark_session: snowpark.Session,
804
+ ) -> str:
805
+ # currently we only support iceberg table and FDN table
806
+ metadata = snowpark_session.sql(
807
+ f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
808
+ ).collect()
809
+ if metadata is None or len(metadata) == 0:
810
+ raise AnalysisException(f"Table {snowpark_table_name} does not exist")
811
+ metadata = metadata[0]
812
+ if metadata.as_dict().get("is_iceberg") == "Y":
813
+ return "ICEBERG"
814
+ return "TABLE"