snowpark-connect 0.32.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (106) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +91 -40
  2. snowflake/snowpark_connect/column_qualifier.py +0 -4
  3. snowflake/snowpark_connect/config.py +9 -0
  4. snowflake/snowpark_connect/expression/hybrid_column_map.py +5 -4
  5. snowflake/snowpark_connect/expression/literal.py +12 -12
  6. snowflake/snowpark_connect/expression/map_sql_expression.py +18 -4
  7. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +150 -29
  8. snowflake/snowpark_connect/expression/map_unresolved_function.py +93 -55
  9. snowflake/snowpark_connect/relation/map_aggregate.py +156 -257
  10. snowflake/snowpark_connect/relation/map_column_ops.py +19 -0
  11. snowflake/snowpark_connect/relation/map_join.py +454 -252
  12. snowflake/snowpark_connect/relation/map_row_ops.py +136 -54
  13. snowflake/snowpark_connect/relation/map_sql.py +335 -90
  14. snowflake/snowpark_connect/relation/read/map_read.py +9 -1
  15. snowflake/snowpark_connect/relation/read/map_read_csv.py +19 -2
  16. snowflake/snowpark_connect/relation/read/map_read_json.py +90 -2
  17. snowflake/snowpark_connect/relation/read/map_read_parquet.py +3 -0
  18. snowflake/snowpark_connect/relation/read/map_read_text.py +4 -0
  19. snowflake/snowpark_connect/relation/read/reader_config.py +10 -0
  20. snowflake/snowpark_connect/relation/read/utils.py +41 -0
  21. snowflake/snowpark_connect/relation/utils.py +50 -2
  22. snowflake/snowpark_connect/relation/write/map_write.py +251 -292
  23. snowflake/snowpark_connect/resources_initializer.py +25 -13
  24. snowflake/snowpark_connect/server.py +9 -24
  25. snowflake/snowpark_connect/type_mapping.py +2 -0
  26. snowflake/snowpark_connect/typed_column.py +2 -2
  27. snowflake/snowpark_connect/utils/context.py +0 -14
  28. snowflake/snowpark_connect/utils/expression_transformer.py +163 -0
  29. snowflake/snowpark_connect/utils/sequence.py +21 -0
  30. snowflake/snowpark_connect/utils/session.py +4 -1
  31. snowflake/snowpark_connect/utils/udf_helper.py +1 -0
  32. snowflake/snowpark_connect/utils/udtf_helper.py +3 -0
  33. snowflake/snowpark_connect/version.py +1 -1
  34. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/METADATA +4 -2
  35. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/RECORD +43 -104
  36. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  99. {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-connect +0 -0
  100. {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-session +0 -0
  101. {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-submit +0 -0
  102. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/WHEEL +0 -0
  103. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE-binary +0 -0
  104. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  105. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/NOTICE-binary +0 -0
  106. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/top_level.txt +0 -0
@@ -2,8 +2,10 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
 
5
+ import copy
5
6
  import os
6
7
  import shutil
8
+ import uuid
7
9
  from pathlib import Path
8
10
 
9
11
  import pyspark.sql.connect.proto.base_pb2 as proto_base
@@ -29,6 +31,7 @@ from snowflake.snowpark.types import (
29
31
  )
30
32
  from snowflake.snowpark_connect.config import (
31
33
  auto_uppercase_column_identifiers,
34
+ get_success_file_generation_enabled,
32
35
  global_config,
33
36
  sessions_config,
34
37
  str_to_bool,
@@ -117,6 +120,57 @@ def _spark_to_snowflake(multipart_id: str) -> str:
117
120
  )
118
121
 
119
122
 
123
+ def _validate_table_exist_and_of_type(
124
+ snowpark_table_name: str,
125
+ session: snowpark.Session,
126
+ table_type: str,
127
+ table_schema_or_error: DataType | SnowparkSQLException,
128
+ ) -> None:
129
+ if not isinstance(table_schema_or_error, DataType):
130
+ exception = AnalysisException(
131
+ f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{snowpark_table_name}` cannot be found."
132
+ )
133
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
134
+ raise exception
135
+ _validate_table_type(snowpark_table_name, session, table_type)
136
+
137
+
138
+ def _validate_table_type(
139
+ snowpark_table_name: str,
140
+ session: snowpark.Session,
141
+ table_type: str,
142
+ ) -> None:
143
+ actual_type = get_table_type(snowpark_table_name, session)
144
+ if table_type == "iceberg":
145
+ if actual_type not in ("ICEBERG", "TABLE"):
146
+ exception = AnalysisException(
147
+ f"Table {snowpark_table_name} is not an iceberg table"
148
+ )
149
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
150
+ raise exception
151
+ elif table_type == "fdn":
152
+ if actual_type not in ("NORMAL", "TABLE"):
153
+ exception = AnalysisException(
154
+ f"Table {snowpark_table_name} is not a FDN table"
155
+ )
156
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
157
+ raise exception
158
+ else:
159
+ raise ValueError(
160
+ f"Invalid table_type: {table_type}. Must be 'iceberg' or 'fdn'"
161
+ )
162
+
163
+
164
+ def _validate_table_does_not_exist(
165
+ snowpark_table_name: str,
166
+ table_schema_or_error: DataType | SnowparkSQLException,
167
+ ) -> None:
168
+ if isinstance(table_schema_or_error, DataType):
169
+ exception = AnalysisException(f"Table {snowpark_table_name} already exists")
170
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
171
+ raise exception
172
+
173
+
120
174
  def map_write(request: proto_base.ExecutePlanRequest):
121
175
  write_op = request.plan.command.write_operation
122
176
  telemetry.report_io_write(write_op.source)
@@ -198,20 +252,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
198
252
  )
199
253
 
200
254
  if overwrite:
201
- try:
202
- path_after_stage = (
203
- write_path.split("/", 1)[1] if "/" in write_path else ""
204
- )
205
- if not path_after_stage or path_after_stage == "/":
206
- logger.warning(
207
- f"Skipping REMOVE for root path {write_path} - too broad scope"
208
- )
209
- else:
210
- remove_command = f"REMOVE '{write_path}/'"
211
- session.sql(remove_command).collect()
212
- logger.info(f"Successfully cleared directory: {write_path}")
213
- except Exception as e:
214
- logger.warning(f"Could not clear directory {write_path}: {e}")
255
+ remove_command = f"REMOVE '{write_path}'"
256
+ session.sql(remove_command).collect()
257
+ logger.info(f"Successfully cleared directory: {write_path}")
215
258
 
216
259
  if should_write_to_single_file and partition_hint is None:
217
260
  # Single file: generate complete filename with extension
@@ -240,7 +283,6 @@ def map_write(request: proto_base.ExecutePlanRequest):
240
283
  "format_type_options": {
241
284
  "COMPRESSION": compression,
242
285
  },
243
- "overwrite": overwrite,
244
286
  }
245
287
  # Download from the base write path to ensure we fetch whatever Snowflake produced.
246
288
  # Using the base avoids coupling to exact filenames/prefixes.
@@ -300,15 +342,20 @@ def map_write(request: proto_base.ExecutePlanRequest):
300
342
  # Execute multiple COPY INTO operations, one per target file.
301
343
  # Since we write per-partition with distinct prefixes, download from the base write path.
302
344
  download_stage_path = write_path
345
+
346
+ # We need to create a new set of parameters with single=True
347
+ shared_uuid = str(uuid.uuid4())
348
+ part_params = copy.deepcopy(dict(parameters))
349
+ part_params["single"] = True
303
350
  for part_idx in range(partition_hint):
304
- part_params = dict(parameters)
305
351
  # Preserve Spark-like filename prefix per partition so downloaded basenames
306
352
  # match the expected Spark pattern (with possible Snowflake counters appended).
307
353
  per_part_prefix = generate_spark_compatible_filename(
308
354
  task_id=part_idx,
309
355
  attempt_number=0,
310
- compression=None,
311
- format_ext="", # prefix only; Snowflake appends extension/counters
356
+ compression=compression,
357
+ format_ext=extension,
358
+ shared_uuid=shared_uuid,
312
359
  )
313
360
  part_params["location"] = f"{write_path}/{per_part_prefix}"
314
361
  (
@@ -318,6 +365,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
318
365
  )
319
366
  else:
320
367
  rewritten_df.write.copy_into_location(**parameters)
368
+
369
+ generate_success = get_success_file_generation_enabled()
370
+
321
371
  if not is_cloud_path(write_op.path):
322
372
  store_files_locally(
323
373
  download_stage_path,
@@ -325,6 +375,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
325
375
  overwrite,
326
376
  session,
327
377
  )
378
+ if generate_success:
379
+ _write_success_file_locally(write_op.path)
380
+ else:
381
+ if generate_success:
382
+ _write_success_file_to_stage(
383
+ download_stage_path, session, parameters
384
+ )
328
385
  case "jdbc":
329
386
  from snowflake.snowpark_connect.relation.write.map_write_jdbc import (
330
387
  map_write_jdbc,
@@ -347,14 +404,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
347
404
  table_schema_or_error = _get_table_schema_or_error(
348
405
  snowpark_table_name, session
349
406
  )
350
- if isinstance(table_schema_or_error, DataType): # Table exists
351
- exception = AnalysisException(
352
- f"Table {snowpark_table_name} already exists"
353
- )
354
- attach_custom_error_code(
355
- exception, ErrorCodes.INVALID_OPERATION
356
- )
357
- raise exception
407
+ _validate_table_does_not_exist(
408
+ snowpark_table_name, table_schema_or_error
409
+ )
358
410
  create_iceberg_table(
359
411
  snowpark_table_name=snowpark_table_name,
360
412
  location=write_op.options.get("location", None),
@@ -373,17 +425,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
373
425
  snowpark_table_name, session
374
426
  )
375
427
  if isinstance(table_schema_or_error, DataType): # Table exists
376
- if get_table_type(snowpark_table_name, session) not in (
377
- "ICEBERG",
378
- "TABLE",
379
- ):
380
- exception = AnalysisException(
381
- f"Table {snowpark_table_name} is not an iceberg table"
382
- )
383
- attach_custom_error_code(
384
- exception, ErrorCodes.INVALID_OPERATION
385
- )
386
- raise exception
428
+ _validate_table_type(snowpark_table_name, session, "iceberg")
387
429
  else:
388
430
  create_iceberg_table(
389
431
  snowpark_table_name=snowpark_table_name,
@@ -423,29 +465,25 @@ def map_write(request: proto_base.ExecutePlanRequest):
423
465
  snowpark_table_name, session
424
466
  )
425
467
  if isinstance(table_schema_or_error, DataType): # Table exists
426
- if get_table_type(snowpark_table_name, session) not in (
427
- "ICEBERG",
428
- "TABLE",
429
- ):
430
- exception = AnalysisException(
431
- f"Table {snowpark_table_name} is not an iceberg table"
432
- )
433
- attach_custom_error_code(
434
- exception, ErrorCodes.INVALID_OPERATION
435
- )
436
- raise exception
468
+ _validate_table_type(snowpark_table_name, session, "iceberg")
469
+ create_iceberg_table(
470
+ snowpark_table_name=snowpark_table_name,
471
+ location=write_op.options.get("location", None),
472
+ schema=input_df.schema,
473
+ snowpark_session=session,
474
+ mode="replace",
475
+ )
437
476
  else:
438
477
  create_iceberg_table(
439
478
  snowpark_table_name=snowpark_table_name,
440
479
  location=write_op.options.get("location", None),
441
480
  schema=input_df.schema,
442
481
  snowpark_session=session,
482
+ mode="create",
443
483
  )
444
- _validate_schema_and_get_writer(
445
- input_df, "truncate", snowpark_table_name, table_schema_or_error
446
- ).saveAsTable(
484
+ _get_writer_for_table_creation(input_df).saveAsTable(
447
485
  table_name=snowpark_table_name,
448
- mode="truncate",
486
+ mode="append",
449
487
  column_order=_column_order_for_write,
450
488
  )
451
489
  case _:
@@ -491,18 +529,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
491
529
  snowpark_table_name, session
492
530
  )
493
531
  if isinstance(table_schema_or_error, DataType): # Table exists
494
- if get_table_type(snowpark_table_name, session) not in (
495
- "NORMAL",
496
- "TABLE",
497
- ):
498
- exception = AnalysisException(
499
- f"Table {snowpark_table_name} is not a FDN table"
500
- )
501
- attach_custom_error_code(
502
- exception, ErrorCodes.INVALID_OPERATION
503
- )
504
- raise exception
505
- write_mode = "truncate"
532
+ _validate_table_type(snowpark_table_name, session, "fdn")
533
+
534
+ write_mode = "overwrite"
506
535
  _validate_schema_and_get_writer(
507
536
  input_df,
508
537
  write_mode,
@@ -511,27 +540,15 @@ def map_write(request: proto_base.ExecutePlanRequest):
511
540
  ).saveAsTable(
512
541
  table_name=snowpark_table_name,
513
542
  mode=write_mode,
543
+ copy_grants=True,
514
544
  column_order=_column_order_for_write,
515
545
  )
516
546
  case "append":
517
547
  table_schema_or_error = _get_table_schema_or_error(
518
548
  snowpark_table_name, session
519
549
  )
520
- if isinstance(
521
- table_schema_or_error, DataType
522
- ) and get_table_type( # Table exists
523
- snowpark_table_name, session
524
- ) not in (
525
- "NORMAL",
526
- "TABLE",
527
- ):
528
- exception = AnalysisException(
529
- f"Table {snowpark_table_name} is not a FDN table"
530
- )
531
- attach_custom_error_code(
532
- exception, ErrorCodes.INVALID_OPERATION
533
- )
534
- raise exception
550
+ if isinstance(table_schema_or_error, DataType): # Table exists
551
+ _validate_table_type(snowpark_table_name, session, "fdn")
535
552
 
536
553
  _validate_schema_and_get_writer(
537
554
  input_df,
@@ -598,113 +615,105 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
598
615
  attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
599
616
  raise exception
600
617
 
601
- if write_op.provider.lower() == "iceberg":
602
- match write_op.mode:
603
- case commands_proto.WriteOperationV2.MODE_CREATE:
604
- table_schema_or_error = _get_table_schema_or_error(
605
- snowpark_table_name, session
606
- )
607
- if isinstance(table_schema_or_error, DataType): # Table exists
608
- exception = AnalysisException(
609
- f"Table {snowpark_table_name} already exists"
610
- )
611
- attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
612
- raise exception
618
+ is_iceberg = write_op.provider.lower() == "iceberg"
619
+ table_type = "iceberg" if is_iceberg else "fdn"
620
+
621
+ match write_op.mode:
622
+ case commands_proto.WriteOperationV2.MODE_CREATE:
623
+ table_schema_or_error = _get_table_schema_or_error(
624
+ snowpark_table_name, session
625
+ )
626
+ _validate_table_does_not_exist(snowpark_table_name, table_schema_or_error)
627
+
628
+ if is_iceberg:
613
629
  create_iceberg_table(
614
630
  snowpark_table_name=snowpark_table_name,
615
631
  location=write_op.table_properties.get("location"),
616
632
  schema=input_df.schema,
617
633
  snowpark_session=session,
618
634
  )
619
- _validate_schema_and_get_writer(
620
- input_df, "append", snowpark_table_name, table_schema_or_error
621
- ).saveAsTable(
622
- table_name=snowpark_table_name,
623
- mode="append",
624
- column_order=_column_order_for_write,
625
- )
626
- case commands_proto.WriteOperationV2.MODE_APPEND:
627
- table_schema_or_error = _get_table_schema_or_error(
628
- snowpark_table_name, session
629
- )
630
- if not isinstance(table_schema_or_error, DataType): # Table not exists
631
- exception = AnalysisException(
632
- f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
633
- )
634
- attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
635
- raise exception
636
- if get_table_type(snowpark_table_name, session) not in (
637
- "ICEBERG",
638
- "TABLE",
639
- ):
640
- exception = AnalysisException(
641
- f"Table {snowpark_table_name} is not an iceberg table"
642
- )
643
- attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
644
- raise exception
645
- _validate_schema_and_get_writer(
646
- input_df, "append", snowpark_table_name, table_schema_or_error
647
- ).saveAsTable(
648
- table_name=snowpark_table_name,
649
- mode="append",
650
- column_order=_column_order_for_write,
651
- )
652
- case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
653
- # TODO: handle the filter condition for MODE_OVERWRITE
654
- table_schema_or_error = _get_table_schema_or_error(
655
- snowpark_table_name, session
656
- )
657
- if isinstance(table_schema_or_error, DataType): # Table exists
658
- if get_table_type(snowpark_table_name, session) not in (
659
- "ICEBERG",
660
- "TABLE",
661
- ):
662
- exception = AnalysisException(
663
- f"Table {snowpark_table_name} is not an iceberg table"
664
- )
665
- attach_custom_error_code(
666
- exception, ErrorCodes.INVALID_OPERATION
667
- )
668
- raise exception
669
- else:
670
- exception = AnalysisException(
671
- f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
672
- )
673
- attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
674
- raise exception
675
- _validate_schema_and_get_writer(
676
- input_df, "truncate", snowpark_table_name, table_schema_or_error
677
- ).saveAsTable(
678
- table_name=snowpark_table_name,
679
- mode="truncate",
680
- column_order=_column_order_for_write,
635
+ _get_writer_for_table_creation(input_df).saveAsTable(
636
+ table_name=snowpark_table_name,
637
+ mode="append" if is_iceberg else "errorifexists",
638
+ column_order=_column_order_for_write,
639
+ )
640
+
641
+ case commands_proto.WriteOperationV2.MODE_APPEND:
642
+ table_schema_or_error = _get_table_schema_or_error(
643
+ snowpark_table_name, session
644
+ )
645
+ _validate_table_exist_and_of_type(
646
+ snowpark_table_name, session, table_type, table_schema_or_error
647
+ )
648
+ _validate_schema_and_get_writer(
649
+ input_df, "append", snowpark_table_name, table_schema_or_error
650
+ ).saveAsTable(
651
+ table_name=snowpark_table_name,
652
+ mode="append",
653
+ column_order=_column_order_for_write,
654
+ )
655
+
656
+ case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
657
+ # TODO: handle the filter condition for MODE_OVERWRITE
658
+ table_schema_or_error = _get_table_schema_or_error(
659
+ snowpark_table_name, session
660
+ )
661
+ _validate_table_exist_and_of_type(
662
+ snowpark_table_name, session, table_type, table_schema_or_error
663
+ )
664
+
665
+ if is_iceberg:
666
+ create_iceberg_table(
667
+ snowpark_table_name=snowpark_table_name,
668
+ location=write_op.options.get("location", None),
669
+ schema=input_df.schema,
670
+ snowpark_session=session,
671
+ mode="replace",
681
672
  )
682
- case commands_proto.WriteOperationV2.MODE_REPLACE:
683
- table_schema_or_error = _get_table_schema_or_error(
684
- snowpark_table_name, session
673
+ writer = _get_writer_for_table_creation(input_df)
674
+ save_mode = "append"
675
+ else:
676
+ writer = _validate_schema_and_get_writer(
677
+ input_df, "overwrite", snowpark_table_name, table_schema_or_error
685
678
  )
686
- if isinstance(table_schema_or_error, DataType): # Table exists
687
- create_iceberg_table(
688
- snowpark_table_name=snowpark_table_name,
689
- location=write_op.table_properties.get("location"),
690
- schema=input_df.schema,
691
- snowpark_session=session,
692
- mode="replace",
693
- )
694
- else:
695
- exception = AnalysisException(
696
- f"Table {snowpark_table_name} does not exist"
697
- )
698
- attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
699
- raise exception
700
- _validate_schema_and_get_writer(
701
- input_df, "replace", snowpark_table_name, table_schema_or_error
702
- ).saveAsTable(
703
- table_name=snowpark_table_name,
704
- mode="append",
705
- column_order=_column_order_for_write,
679
+ save_mode = "overwrite"
680
+
681
+ writer.saveAsTable(
682
+ table_name=snowpark_table_name,
683
+ mode=save_mode,
684
+ column_order=_column_order_for_write,
685
+ )
686
+
687
+ case commands_proto.WriteOperationV2.MODE_REPLACE:
688
+ table_schema_or_error = _get_table_schema_or_error(
689
+ snowpark_table_name, session
690
+ )
691
+ _validate_table_exist_and_of_type(
692
+ snowpark_table_name, session, table_type, table_schema_or_error
693
+ )
694
+
695
+ if is_iceberg:
696
+ create_iceberg_table(
697
+ snowpark_table_name=snowpark_table_name,
698
+ location=write_op.table_properties.get("location"),
699
+ schema=input_df.schema,
700
+ snowpark_session=session,
701
+ mode="replace",
706
702
  )
707
- case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
703
+ save_mode = "append"
704
+ else:
705
+ save_mode = "overwrite"
706
+
707
+ _validate_schema_and_get_writer(
708
+ input_df, "replace", snowpark_table_name, table_schema_or_error
709
+ ).saveAsTable(
710
+ table_name=snowpark_table_name,
711
+ mode=save_mode,
712
+ column_order=_column_order_for_write,
713
+ )
714
+
715
+ case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
716
+ if is_iceberg:
708
717
  create_iceberg_table(
709
718
  snowpark_table_name=snowpark_table_name,
710
719
  location=write_op.table_properties.get("location"),
@@ -712,116 +721,24 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
712
721
  snowpark_session=session,
713
722
  mode="create_or_replace",
714
723
  )
715
- _validate_schema_and_get_writer(
716
- input_df, "create_or_replace", snowpark_table_name
717
- ).saveAsTable(
718
- table_name=snowpark_table_name,
719
- mode="append",
720
- column_order=_column_order_for_write,
721
- )
722
- case _:
723
- exception = SnowparkConnectNotImplementedError(
724
- f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
725
- )
726
- attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
727
- raise exception
728
- else:
729
- match write_op.mode:
730
- case commands_proto.WriteOperationV2.MODE_CREATE:
731
- _validate_schema_and_get_writer(
732
- input_df, "errorifexists", snowpark_table_name
733
- ).saveAsTable(
734
- table_name=snowpark_table_name,
735
- mode="errorifexists",
736
- column_order=_column_order_for_write,
737
- )
738
- case commands_proto.WriteOperationV2.MODE_APPEND:
739
- table_schema_or_error = _get_table_schema_or_error(
740
- snowpark_table_name, session
741
- )
742
- if not isinstance(table_schema_or_error, DataType): # Table not exists
743
- exception = AnalysisException(
744
- f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
745
- )
746
- attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
747
- raise exception
748
- if get_table_type(snowpark_table_name, session) not in (
749
- "NORMAL",
750
- "TABLE",
751
- ):
752
- exception = AnalysisException(
753
- f"Table {snowpark_table_name} is not a FDN table"
754
- )
755
- attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
756
- raise exception
757
- _validate_schema_and_get_writer(
758
- input_df, "append", snowpark_table_name, table_schema_or_error
759
- ).saveAsTable(
760
- table_name=snowpark_table_name,
761
- mode="append",
762
- column_order=_column_order_for_write,
763
- )
764
- case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
765
- # TODO: handle the filter condition for MODE_OVERWRITE
766
- table_schema_or_error = _get_table_schema_or_error(
767
- snowpark_table_name, session
768
- )
769
- if isinstance(table_schema_or_error, DataType): # Table exists
770
- if get_table_type(snowpark_table_name, session) not in (
771
- "NORMAL",
772
- "TABLE",
773
- ):
774
- exception = AnalysisException(
775
- f"Table {snowpark_table_name} is not a FDN table"
776
- )
777
- attach_custom_error_code(
778
- exception, ErrorCodes.INVALID_OPERATION
779
- )
780
- raise exception
781
- else:
782
- exception = AnalysisException(
783
- f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
784
- )
785
- attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
786
- raise exception
787
- _validate_schema_and_get_writer(
788
- input_df, "truncate", snowpark_table_name, table_schema_or_error
789
- ).saveAsTable(
790
- table_name=snowpark_table_name,
791
- mode="truncate",
792
- column_order=_column_order_for_write,
793
- )
794
- case commands_proto.WriteOperationV2.MODE_REPLACE:
795
- table_schema_or_error = _get_table_schema_or_error(
796
- snowpark_table_name, session
797
- )
798
- if not isinstance(table_schema_or_error, DataType): # Table not exists
799
- exception = AnalysisException(
800
- f"Table {snowpark_table_name} does not exist"
801
- )
802
- attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
803
- raise exception
804
- _validate_schema_and_get_writer(
805
- input_df, "replace", snowpark_table_name, table_schema_or_error
806
- ).saveAsTable(
807
- table_name=snowpark_table_name,
808
- mode="overwrite",
809
- column_order=_column_order_for_write,
810
- )
811
- case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
812
- _validate_schema_and_get_writer(
813
- input_df, "create_or_replace", snowpark_table_name
814
- ).saveAsTable(
815
- table_name=snowpark_table_name,
816
- mode="overwrite",
817
- column_order=_column_order_for_write,
818
- )
819
- case _:
820
- exception = SnowparkConnectNotImplementedError(
821
- f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
822
- )
823
- attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
824
- raise exception
724
+ save_mode = "append"
725
+ else:
726
+ save_mode = "overwrite"
727
+
728
+ _validate_schema_and_get_writer(
729
+ input_df, "create_or_replace", snowpark_table_name
730
+ ).saveAsTable(
731
+ table_name=snowpark_table_name,
732
+ mode=save_mode,
733
+ column_order=_column_order_for_write,
734
+ )
735
+
736
+ case _:
737
+ exception = SnowparkConnectNotImplementedError(
738
+ f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
739
+ )
740
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
741
+ raise exception
825
742
 
826
743
 
827
744
  def _get_table_schema_or_error(
@@ -856,6 +773,7 @@ def _validate_schema_and_get_writer(
856
773
  if write_mode is not None and write_mode.lower() in (
857
774
  "replace",
858
775
  "create_or_replace",
776
+ "overwrite",
859
777
  ):
860
778
  return _get_writer_for_table_creation(input_df)
861
779
 
@@ -1053,14 +971,13 @@ def create_iceberg_table(
1053
971
  if config_external_volume is None or config_external_volume == ""
1054
972
  else f"EXTERNAL_VOLUME = '{config_external_volume}'"
1055
973
  )
1056
-
974
+ copy_grants = ""
1057
975
  match mode:
1058
976
  case "create":
1059
977
  create_sql = "CREATE"
1060
- case "replace":
978
+ case "replace" | "create_or_replace":
1061
979
  # There's no replace for iceberg table, so we use create or replace
1062
- create_sql = "CREATE OR REPLACE"
1063
- case "create_or_replace":
980
+ copy_grants = "COPY GRANTS"
1064
981
  create_sql = "CREATE OR REPLACE"
1065
982
  case _:
1066
983
  exception = SnowparkConnectNotImplementedError(
@@ -1072,7 +989,8 @@ def create_iceberg_table(
1072
989
  {create_sql} ICEBERG TABLE {snowpark_table_name} ({",".join(table_schema)})
1073
990
  CATALOG = 'SNOWFLAKE'
1074
991
  {external_volume}
1075
- {base_location};
992
+ {base_location}
993
+ {copy_grants};
1076
994
  """
1077
995
  snowpark_session.sql(sql).collect()
1078
996
 
@@ -1133,6 +1051,47 @@ def handle_column_names(
1133
1051
  return df
1134
1052
 
1135
1053
 
1054
+ def _write_success_file_locally(directory_path: str) -> None:
1055
+ """
1056
+ Write a _SUCCESS marker file to a local directory.
1057
+ """
1058
+ try:
1059
+ success_file = Path(directory_path) / "_SUCCESS"
1060
+ success_file.touch()
1061
+ logger.debug(f"Created _SUCCESS file at {directory_path}")
1062
+ except Exception as e:
1063
+ logger.warning(f"Failed to create _SUCCESS file at {directory_path}: {e}")
1064
+
1065
+
1066
+ def _write_success_file_to_stage(
1067
+ stage_path: str,
1068
+ session: snowpark.Session,
1069
+ parameters: dict,
1070
+ ) -> None:
1071
+ """
1072
+ Write a _SUCCESS marker file to a stage location.
1073
+ """
1074
+ try:
1075
+ # Create a dummy dataframe with one row containing "SUCCESS"
1076
+ success_df = session.create_dataframe([["SUCCESS"]]).to_df(["STATUS"])
1077
+ success_params = copy.deepcopy(parameters)
1078
+ success_params["location"] = f"{stage_path}/_SUCCESS"
1079
+ success_params["single"] = True
1080
+ success_params["header"] = True
1081
+
1082
+ # Set CSV format with explicit no compression for _SUCCESS file
1083
+ success_params["file_format_type"] = "csv"
1084
+ success_params["format_type_options"] = {
1085
+ "COMPRESSION": "NONE",
1086
+ }
1087
+
1088
+ success_df.write.copy_into_location(**success_params)
1089
+
1090
+ logger.debug(f"Created _SUCCESS file at {stage_path}")
1091
+ except Exception as e:
1092
+ logger.warning(f"Failed to create _SUCCESS file at {stage_path}: {e}")
1093
+
1094
+
1136
1095
  def store_files_locally(
1137
1096
  stage_path: str, target_path: str, overwrite: bool, session: snowpark.Session
1138
1097
  ) -> None: