snowpark-connect 0.32.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +91 -40
- snowflake/snowpark_connect/column_qualifier.py +0 -4
- snowflake/snowpark_connect/config.py +9 -0
- snowflake/snowpark_connect/expression/hybrid_column_map.py +5 -4
- snowflake/snowpark_connect/expression/literal.py +12 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +18 -4
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +150 -29
- snowflake/snowpark_connect/expression/map_unresolved_function.py +93 -55
- snowflake/snowpark_connect/relation/map_aggregate.py +156 -257
- snowflake/snowpark_connect/relation/map_column_ops.py +19 -0
- snowflake/snowpark_connect/relation/map_join.py +454 -252
- snowflake/snowpark_connect/relation/map_row_ops.py +136 -54
- snowflake/snowpark_connect/relation/map_sql.py +335 -90
- snowflake/snowpark_connect/relation/read/map_read.py +9 -1
- snowflake/snowpark_connect/relation/read/map_read_csv.py +19 -2
- snowflake/snowpark_connect/relation/read/map_read_json.py +90 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +3 -0
- snowflake/snowpark_connect/relation/read/map_read_text.py +4 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +10 -0
- snowflake/snowpark_connect/relation/read/utils.py +41 -0
- snowflake/snowpark_connect/relation/utils.py +50 -2
- snowflake/snowpark_connect/relation/write/map_write.py +251 -292
- snowflake/snowpark_connect/resources_initializer.py +25 -13
- snowflake/snowpark_connect/server.py +9 -24
- snowflake/snowpark_connect/type_mapping.py +2 -0
- snowflake/snowpark_connect/typed_column.py +2 -2
- snowflake/snowpark_connect/utils/context.py +0 -14
- snowflake/snowpark_connect/utils/expression_transformer.py +163 -0
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +4 -1
- snowflake/snowpark_connect/utils/udf_helper.py +1 -0
- snowflake/snowpark_connect/utils/udtf_helper.py +3 -0
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/METADATA +4 -2
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/RECORD +43 -104
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
+
import copy
|
|
5
6
|
import os
|
|
6
7
|
import shutil
|
|
8
|
+
import uuid
|
|
7
9
|
from pathlib import Path
|
|
8
10
|
|
|
9
11
|
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
@@ -29,6 +31,7 @@ from snowflake.snowpark.types import (
|
|
|
29
31
|
)
|
|
30
32
|
from snowflake.snowpark_connect.config import (
|
|
31
33
|
auto_uppercase_column_identifiers,
|
|
34
|
+
get_success_file_generation_enabled,
|
|
32
35
|
global_config,
|
|
33
36
|
sessions_config,
|
|
34
37
|
str_to_bool,
|
|
@@ -117,6 +120,57 @@ def _spark_to_snowflake(multipart_id: str) -> str:
|
|
|
117
120
|
)
|
|
118
121
|
|
|
119
122
|
|
|
123
|
+
def _validate_table_exist_and_of_type(
|
|
124
|
+
snowpark_table_name: str,
|
|
125
|
+
session: snowpark.Session,
|
|
126
|
+
table_type: str,
|
|
127
|
+
table_schema_or_error: DataType | SnowparkSQLException,
|
|
128
|
+
) -> None:
|
|
129
|
+
if not isinstance(table_schema_or_error, DataType):
|
|
130
|
+
exception = AnalysisException(
|
|
131
|
+
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{snowpark_table_name}` cannot be found."
|
|
132
|
+
)
|
|
133
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
134
|
+
raise exception
|
|
135
|
+
_validate_table_type(snowpark_table_name, session, table_type)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _validate_table_type(
|
|
139
|
+
snowpark_table_name: str,
|
|
140
|
+
session: snowpark.Session,
|
|
141
|
+
table_type: str,
|
|
142
|
+
) -> None:
|
|
143
|
+
actual_type = get_table_type(snowpark_table_name, session)
|
|
144
|
+
if table_type == "iceberg":
|
|
145
|
+
if actual_type not in ("ICEBERG", "TABLE"):
|
|
146
|
+
exception = AnalysisException(
|
|
147
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
148
|
+
)
|
|
149
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
150
|
+
raise exception
|
|
151
|
+
elif table_type == "fdn":
|
|
152
|
+
if actual_type not in ("NORMAL", "TABLE"):
|
|
153
|
+
exception = AnalysisException(
|
|
154
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
155
|
+
)
|
|
156
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
157
|
+
raise exception
|
|
158
|
+
else:
|
|
159
|
+
raise ValueError(
|
|
160
|
+
f"Invalid table_type: {table_type}. Must be 'iceberg' or 'fdn'"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _validate_table_does_not_exist(
|
|
165
|
+
snowpark_table_name: str,
|
|
166
|
+
table_schema_or_error: DataType | SnowparkSQLException,
|
|
167
|
+
) -> None:
|
|
168
|
+
if isinstance(table_schema_or_error, DataType):
|
|
169
|
+
exception = AnalysisException(f"Table {snowpark_table_name} already exists")
|
|
170
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
171
|
+
raise exception
|
|
172
|
+
|
|
173
|
+
|
|
120
174
|
def map_write(request: proto_base.ExecutePlanRequest):
|
|
121
175
|
write_op = request.plan.command.write_operation
|
|
122
176
|
telemetry.report_io_write(write_op.source)
|
|
@@ -198,20 +252,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
198
252
|
)
|
|
199
253
|
|
|
200
254
|
if overwrite:
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
)
|
|
205
|
-
if not path_after_stage or path_after_stage == "/":
|
|
206
|
-
logger.warning(
|
|
207
|
-
f"Skipping REMOVE for root path {write_path} - too broad scope"
|
|
208
|
-
)
|
|
209
|
-
else:
|
|
210
|
-
remove_command = f"REMOVE '{write_path}/'"
|
|
211
|
-
session.sql(remove_command).collect()
|
|
212
|
-
logger.info(f"Successfully cleared directory: {write_path}")
|
|
213
|
-
except Exception as e:
|
|
214
|
-
logger.warning(f"Could not clear directory {write_path}: {e}")
|
|
255
|
+
remove_command = f"REMOVE '{write_path}'"
|
|
256
|
+
session.sql(remove_command).collect()
|
|
257
|
+
logger.info(f"Successfully cleared directory: {write_path}")
|
|
215
258
|
|
|
216
259
|
if should_write_to_single_file and partition_hint is None:
|
|
217
260
|
# Single file: generate complete filename with extension
|
|
@@ -240,7 +283,6 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
240
283
|
"format_type_options": {
|
|
241
284
|
"COMPRESSION": compression,
|
|
242
285
|
},
|
|
243
|
-
"overwrite": overwrite,
|
|
244
286
|
}
|
|
245
287
|
# Download from the base write path to ensure we fetch whatever Snowflake produced.
|
|
246
288
|
# Using the base avoids coupling to exact filenames/prefixes.
|
|
@@ -300,15 +342,20 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
300
342
|
# Execute multiple COPY INTO operations, one per target file.
|
|
301
343
|
# Since we write per-partition with distinct prefixes, download from the base write path.
|
|
302
344
|
download_stage_path = write_path
|
|
345
|
+
|
|
346
|
+
# We need to create a new set of parameters with single=True
|
|
347
|
+
shared_uuid = str(uuid.uuid4())
|
|
348
|
+
part_params = copy.deepcopy(dict(parameters))
|
|
349
|
+
part_params["single"] = True
|
|
303
350
|
for part_idx in range(partition_hint):
|
|
304
|
-
part_params = dict(parameters)
|
|
305
351
|
# Preserve Spark-like filename prefix per partition so downloaded basenames
|
|
306
352
|
# match the expected Spark pattern (with possible Snowflake counters appended).
|
|
307
353
|
per_part_prefix = generate_spark_compatible_filename(
|
|
308
354
|
task_id=part_idx,
|
|
309
355
|
attempt_number=0,
|
|
310
|
-
compression=
|
|
311
|
-
format_ext=
|
|
356
|
+
compression=compression,
|
|
357
|
+
format_ext=extension,
|
|
358
|
+
shared_uuid=shared_uuid,
|
|
312
359
|
)
|
|
313
360
|
part_params["location"] = f"{write_path}/{per_part_prefix}"
|
|
314
361
|
(
|
|
@@ -318,6 +365,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
318
365
|
)
|
|
319
366
|
else:
|
|
320
367
|
rewritten_df.write.copy_into_location(**parameters)
|
|
368
|
+
|
|
369
|
+
generate_success = get_success_file_generation_enabled()
|
|
370
|
+
|
|
321
371
|
if not is_cloud_path(write_op.path):
|
|
322
372
|
store_files_locally(
|
|
323
373
|
download_stage_path,
|
|
@@ -325,6 +375,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
325
375
|
overwrite,
|
|
326
376
|
session,
|
|
327
377
|
)
|
|
378
|
+
if generate_success:
|
|
379
|
+
_write_success_file_locally(write_op.path)
|
|
380
|
+
else:
|
|
381
|
+
if generate_success:
|
|
382
|
+
_write_success_file_to_stage(
|
|
383
|
+
download_stage_path, session, parameters
|
|
384
|
+
)
|
|
328
385
|
case "jdbc":
|
|
329
386
|
from snowflake.snowpark_connect.relation.write.map_write_jdbc import (
|
|
330
387
|
map_write_jdbc,
|
|
@@ -347,14 +404,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
347
404
|
table_schema_or_error = _get_table_schema_or_error(
|
|
348
405
|
snowpark_table_name, session
|
|
349
406
|
)
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
)
|
|
354
|
-
attach_custom_error_code(
|
|
355
|
-
exception, ErrorCodes.INVALID_OPERATION
|
|
356
|
-
)
|
|
357
|
-
raise exception
|
|
407
|
+
_validate_table_does_not_exist(
|
|
408
|
+
snowpark_table_name, table_schema_or_error
|
|
409
|
+
)
|
|
358
410
|
create_iceberg_table(
|
|
359
411
|
snowpark_table_name=snowpark_table_name,
|
|
360
412
|
location=write_op.options.get("location", None),
|
|
@@ -373,17 +425,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
373
425
|
snowpark_table_name, session
|
|
374
426
|
)
|
|
375
427
|
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
376
|
-
|
|
377
|
-
"ICEBERG",
|
|
378
|
-
"TABLE",
|
|
379
|
-
):
|
|
380
|
-
exception = AnalysisException(
|
|
381
|
-
f"Table {snowpark_table_name} is not an iceberg table"
|
|
382
|
-
)
|
|
383
|
-
attach_custom_error_code(
|
|
384
|
-
exception, ErrorCodes.INVALID_OPERATION
|
|
385
|
-
)
|
|
386
|
-
raise exception
|
|
428
|
+
_validate_table_type(snowpark_table_name, session, "iceberg")
|
|
387
429
|
else:
|
|
388
430
|
create_iceberg_table(
|
|
389
431
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -423,29 +465,25 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
423
465
|
snowpark_table_name, session
|
|
424
466
|
)
|
|
425
467
|
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
exception, ErrorCodes.INVALID_OPERATION
|
|
435
|
-
)
|
|
436
|
-
raise exception
|
|
468
|
+
_validate_table_type(snowpark_table_name, session, "iceberg")
|
|
469
|
+
create_iceberg_table(
|
|
470
|
+
snowpark_table_name=snowpark_table_name,
|
|
471
|
+
location=write_op.options.get("location", None),
|
|
472
|
+
schema=input_df.schema,
|
|
473
|
+
snowpark_session=session,
|
|
474
|
+
mode="replace",
|
|
475
|
+
)
|
|
437
476
|
else:
|
|
438
477
|
create_iceberg_table(
|
|
439
478
|
snowpark_table_name=snowpark_table_name,
|
|
440
479
|
location=write_op.options.get("location", None),
|
|
441
480
|
schema=input_df.schema,
|
|
442
481
|
snowpark_session=session,
|
|
482
|
+
mode="create",
|
|
443
483
|
)
|
|
444
|
-
|
|
445
|
-
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
446
|
-
).saveAsTable(
|
|
484
|
+
_get_writer_for_table_creation(input_df).saveAsTable(
|
|
447
485
|
table_name=snowpark_table_name,
|
|
448
|
-
mode="
|
|
486
|
+
mode="append",
|
|
449
487
|
column_order=_column_order_for_write,
|
|
450
488
|
)
|
|
451
489
|
case _:
|
|
@@ -491,18 +529,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
491
529
|
snowpark_table_name, session
|
|
492
530
|
)
|
|
493
531
|
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
):
|
|
498
|
-
exception = AnalysisException(
|
|
499
|
-
f"Table {snowpark_table_name} is not a FDN table"
|
|
500
|
-
)
|
|
501
|
-
attach_custom_error_code(
|
|
502
|
-
exception, ErrorCodes.INVALID_OPERATION
|
|
503
|
-
)
|
|
504
|
-
raise exception
|
|
505
|
-
write_mode = "truncate"
|
|
532
|
+
_validate_table_type(snowpark_table_name, session, "fdn")
|
|
533
|
+
|
|
534
|
+
write_mode = "overwrite"
|
|
506
535
|
_validate_schema_and_get_writer(
|
|
507
536
|
input_df,
|
|
508
537
|
write_mode,
|
|
@@ -511,27 +540,15 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
511
540
|
).saveAsTable(
|
|
512
541
|
table_name=snowpark_table_name,
|
|
513
542
|
mode=write_mode,
|
|
543
|
+
copy_grants=True,
|
|
514
544
|
column_order=_column_order_for_write,
|
|
515
545
|
)
|
|
516
546
|
case "append":
|
|
517
547
|
table_schema_or_error = _get_table_schema_or_error(
|
|
518
548
|
snowpark_table_name, session
|
|
519
549
|
)
|
|
520
|
-
if isinstance(
|
|
521
|
-
|
|
522
|
-
) and get_table_type( # Table exists
|
|
523
|
-
snowpark_table_name, session
|
|
524
|
-
) not in (
|
|
525
|
-
"NORMAL",
|
|
526
|
-
"TABLE",
|
|
527
|
-
):
|
|
528
|
-
exception = AnalysisException(
|
|
529
|
-
f"Table {snowpark_table_name} is not a FDN table"
|
|
530
|
-
)
|
|
531
|
-
attach_custom_error_code(
|
|
532
|
-
exception, ErrorCodes.INVALID_OPERATION
|
|
533
|
-
)
|
|
534
|
-
raise exception
|
|
550
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
551
|
+
_validate_table_type(snowpark_table_name, session, "fdn")
|
|
535
552
|
|
|
536
553
|
_validate_schema_and_get_writer(
|
|
537
554
|
input_df,
|
|
@@ -598,113 +615,105 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
598
615
|
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
599
616
|
raise exception
|
|
600
617
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
raise exception
|
|
618
|
+
is_iceberg = write_op.provider.lower() == "iceberg"
|
|
619
|
+
table_type = "iceberg" if is_iceberg else "fdn"
|
|
620
|
+
|
|
621
|
+
match write_op.mode:
|
|
622
|
+
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
623
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
624
|
+
snowpark_table_name, session
|
|
625
|
+
)
|
|
626
|
+
_validate_table_does_not_exist(snowpark_table_name, table_schema_or_error)
|
|
627
|
+
|
|
628
|
+
if is_iceberg:
|
|
613
629
|
create_iceberg_table(
|
|
614
630
|
snowpark_table_name=snowpark_table_name,
|
|
615
631
|
location=write_op.table_properties.get("location"),
|
|
616
632
|
schema=input_df.schema,
|
|
617
633
|
snowpark_session=session,
|
|
618
634
|
)
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
)
|
|
657
|
-
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
658
|
-
if get_table_type(snowpark_table_name, session) not in (
|
|
659
|
-
"ICEBERG",
|
|
660
|
-
"TABLE",
|
|
661
|
-
):
|
|
662
|
-
exception = AnalysisException(
|
|
663
|
-
f"Table {snowpark_table_name} is not an iceberg table"
|
|
664
|
-
)
|
|
665
|
-
attach_custom_error_code(
|
|
666
|
-
exception, ErrorCodes.INVALID_OPERATION
|
|
667
|
-
)
|
|
668
|
-
raise exception
|
|
669
|
-
else:
|
|
670
|
-
exception = AnalysisException(
|
|
671
|
-
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
672
|
-
)
|
|
673
|
-
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
674
|
-
raise exception
|
|
675
|
-
_validate_schema_and_get_writer(
|
|
676
|
-
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
677
|
-
).saveAsTable(
|
|
678
|
-
table_name=snowpark_table_name,
|
|
679
|
-
mode="truncate",
|
|
680
|
-
column_order=_column_order_for_write,
|
|
635
|
+
_get_writer_for_table_creation(input_df).saveAsTable(
|
|
636
|
+
table_name=snowpark_table_name,
|
|
637
|
+
mode="append" if is_iceberg else "errorifexists",
|
|
638
|
+
column_order=_column_order_for_write,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
642
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
643
|
+
snowpark_table_name, session
|
|
644
|
+
)
|
|
645
|
+
_validate_table_exist_and_of_type(
|
|
646
|
+
snowpark_table_name, session, table_type, table_schema_or_error
|
|
647
|
+
)
|
|
648
|
+
_validate_schema_and_get_writer(
|
|
649
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
650
|
+
).saveAsTable(
|
|
651
|
+
table_name=snowpark_table_name,
|
|
652
|
+
mode="append",
|
|
653
|
+
column_order=_column_order_for_write,
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
657
|
+
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
658
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
659
|
+
snowpark_table_name, session
|
|
660
|
+
)
|
|
661
|
+
_validate_table_exist_and_of_type(
|
|
662
|
+
snowpark_table_name, session, table_type, table_schema_or_error
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
if is_iceberg:
|
|
666
|
+
create_iceberg_table(
|
|
667
|
+
snowpark_table_name=snowpark_table_name,
|
|
668
|
+
location=write_op.options.get("location", None),
|
|
669
|
+
schema=input_df.schema,
|
|
670
|
+
snowpark_session=session,
|
|
671
|
+
mode="replace",
|
|
681
672
|
)
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
673
|
+
writer = _get_writer_for_table_creation(input_df)
|
|
674
|
+
save_mode = "append"
|
|
675
|
+
else:
|
|
676
|
+
writer = _validate_schema_and_get_writer(
|
|
677
|
+
input_df, "overwrite", snowpark_table_name, table_schema_or_error
|
|
685
678
|
)
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
679
|
+
save_mode = "overwrite"
|
|
680
|
+
|
|
681
|
+
writer.saveAsTable(
|
|
682
|
+
table_name=snowpark_table_name,
|
|
683
|
+
mode=save_mode,
|
|
684
|
+
column_order=_column_order_for_write,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
688
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
689
|
+
snowpark_table_name, session
|
|
690
|
+
)
|
|
691
|
+
_validate_table_exist_and_of_type(
|
|
692
|
+
snowpark_table_name, session, table_type, table_schema_or_error
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
if is_iceberg:
|
|
696
|
+
create_iceberg_table(
|
|
697
|
+
snowpark_table_name=snowpark_table_name,
|
|
698
|
+
location=write_op.table_properties.get("location"),
|
|
699
|
+
schema=input_df.schema,
|
|
700
|
+
snowpark_session=session,
|
|
701
|
+
mode="replace",
|
|
706
702
|
)
|
|
707
|
-
|
|
703
|
+
save_mode = "append"
|
|
704
|
+
else:
|
|
705
|
+
save_mode = "overwrite"
|
|
706
|
+
|
|
707
|
+
_validate_schema_and_get_writer(
|
|
708
|
+
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
709
|
+
).saveAsTable(
|
|
710
|
+
table_name=snowpark_table_name,
|
|
711
|
+
mode=save_mode,
|
|
712
|
+
column_order=_column_order_for_write,
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
|
|
716
|
+
if is_iceberg:
|
|
708
717
|
create_iceberg_table(
|
|
709
718
|
snowpark_table_name=snowpark_table_name,
|
|
710
719
|
location=write_op.table_properties.get("location"),
|
|
@@ -712,116 +721,24 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
712
721
|
snowpark_session=session,
|
|
713
722
|
mode="create_or_replace",
|
|
714
723
|
)
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
).saveAsTable(
|
|
734
|
-
table_name=snowpark_table_name,
|
|
735
|
-
mode="errorifexists",
|
|
736
|
-
column_order=_column_order_for_write,
|
|
737
|
-
)
|
|
738
|
-
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
739
|
-
table_schema_or_error = _get_table_schema_or_error(
|
|
740
|
-
snowpark_table_name, session
|
|
741
|
-
)
|
|
742
|
-
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
743
|
-
exception = AnalysisException(
|
|
744
|
-
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
745
|
-
)
|
|
746
|
-
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
747
|
-
raise exception
|
|
748
|
-
if get_table_type(snowpark_table_name, session) not in (
|
|
749
|
-
"NORMAL",
|
|
750
|
-
"TABLE",
|
|
751
|
-
):
|
|
752
|
-
exception = AnalysisException(
|
|
753
|
-
f"Table {snowpark_table_name} is not a FDN table"
|
|
754
|
-
)
|
|
755
|
-
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
756
|
-
raise exception
|
|
757
|
-
_validate_schema_and_get_writer(
|
|
758
|
-
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
759
|
-
).saveAsTable(
|
|
760
|
-
table_name=snowpark_table_name,
|
|
761
|
-
mode="append",
|
|
762
|
-
column_order=_column_order_for_write,
|
|
763
|
-
)
|
|
764
|
-
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
765
|
-
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
766
|
-
table_schema_or_error = _get_table_schema_or_error(
|
|
767
|
-
snowpark_table_name, session
|
|
768
|
-
)
|
|
769
|
-
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
770
|
-
if get_table_type(snowpark_table_name, session) not in (
|
|
771
|
-
"NORMAL",
|
|
772
|
-
"TABLE",
|
|
773
|
-
):
|
|
774
|
-
exception = AnalysisException(
|
|
775
|
-
f"Table {snowpark_table_name} is not a FDN table"
|
|
776
|
-
)
|
|
777
|
-
attach_custom_error_code(
|
|
778
|
-
exception, ErrorCodes.INVALID_OPERATION
|
|
779
|
-
)
|
|
780
|
-
raise exception
|
|
781
|
-
else:
|
|
782
|
-
exception = AnalysisException(
|
|
783
|
-
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
784
|
-
)
|
|
785
|
-
attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
|
|
786
|
-
raise exception
|
|
787
|
-
_validate_schema_and_get_writer(
|
|
788
|
-
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
789
|
-
).saveAsTable(
|
|
790
|
-
table_name=snowpark_table_name,
|
|
791
|
-
mode="truncate",
|
|
792
|
-
column_order=_column_order_for_write,
|
|
793
|
-
)
|
|
794
|
-
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
795
|
-
table_schema_or_error = _get_table_schema_or_error(
|
|
796
|
-
snowpark_table_name, session
|
|
797
|
-
)
|
|
798
|
-
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
799
|
-
exception = AnalysisException(
|
|
800
|
-
f"Table {snowpark_table_name} does not exist"
|
|
801
|
-
)
|
|
802
|
-
attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
|
|
803
|
-
raise exception
|
|
804
|
-
_validate_schema_and_get_writer(
|
|
805
|
-
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
806
|
-
).saveAsTable(
|
|
807
|
-
table_name=snowpark_table_name,
|
|
808
|
-
mode="overwrite",
|
|
809
|
-
column_order=_column_order_for_write,
|
|
810
|
-
)
|
|
811
|
-
case commands_proto.WriteOperationV2.MODE_CREATE_OR_REPLACE:
|
|
812
|
-
_validate_schema_and_get_writer(
|
|
813
|
-
input_df, "create_or_replace", snowpark_table_name
|
|
814
|
-
).saveAsTable(
|
|
815
|
-
table_name=snowpark_table_name,
|
|
816
|
-
mode="overwrite",
|
|
817
|
-
column_order=_column_order_for_write,
|
|
818
|
-
)
|
|
819
|
-
case _:
|
|
820
|
-
exception = SnowparkConnectNotImplementedError(
|
|
821
|
-
f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
|
|
822
|
-
)
|
|
823
|
-
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
824
|
-
raise exception
|
|
724
|
+
save_mode = "append"
|
|
725
|
+
else:
|
|
726
|
+
save_mode = "overwrite"
|
|
727
|
+
|
|
728
|
+
_validate_schema_and_get_writer(
|
|
729
|
+
input_df, "create_or_replace", snowpark_table_name
|
|
730
|
+
).saveAsTable(
|
|
731
|
+
table_name=snowpark_table_name,
|
|
732
|
+
mode=save_mode,
|
|
733
|
+
column_order=_column_order_for_write,
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
case _:
|
|
737
|
+
exception = SnowparkConnectNotImplementedError(
|
|
738
|
+
f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
|
|
739
|
+
)
|
|
740
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
741
|
+
raise exception
|
|
825
742
|
|
|
826
743
|
|
|
827
744
|
def _get_table_schema_or_error(
|
|
@@ -856,6 +773,7 @@ def _validate_schema_and_get_writer(
|
|
|
856
773
|
if write_mode is not None and write_mode.lower() in (
|
|
857
774
|
"replace",
|
|
858
775
|
"create_or_replace",
|
|
776
|
+
"overwrite",
|
|
859
777
|
):
|
|
860
778
|
return _get_writer_for_table_creation(input_df)
|
|
861
779
|
|
|
@@ -1053,14 +971,13 @@ def create_iceberg_table(
|
|
|
1053
971
|
if config_external_volume is None or config_external_volume == ""
|
|
1054
972
|
else f"EXTERNAL_VOLUME = '{config_external_volume}'"
|
|
1055
973
|
)
|
|
1056
|
-
|
|
974
|
+
copy_grants = ""
|
|
1057
975
|
match mode:
|
|
1058
976
|
case "create":
|
|
1059
977
|
create_sql = "CREATE"
|
|
1060
|
-
case "replace":
|
|
978
|
+
case "replace" | "create_or_replace":
|
|
1061
979
|
# There's no replace for iceberg table, so we use create or replace
|
|
1062
|
-
|
|
1063
|
-
case "create_or_replace":
|
|
980
|
+
copy_grants = "COPY GRANTS"
|
|
1064
981
|
create_sql = "CREATE OR REPLACE"
|
|
1065
982
|
case _:
|
|
1066
983
|
exception = SnowparkConnectNotImplementedError(
|
|
@@ -1072,7 +989,8 @@ def create_iceberg_table(
|
|
|
1072
989
|
{create_sql} ICEBERG TABLE {snowpark_table_name} ({",".join(table_schema)})
|
|
1073
990
|
CATALOG = 'SNOWFLAKE'
|
|
1074
991
|
{external_volume}
|
|
1075
|
-
{base_location}
|
|
992
|
+
{base_location}
|
|
993
|
+
{copy_grants};
|
|
1076
994
|
"""
|
|
1077
995
|
snowpark_session.sql(sql).collect()
|
|
1078
996
|
|
|
@@ -1133,6 +1051,47 @@ def handle_column_names(
|
|
|
1133
1051
|
return df
|
|
1134
1052
|
|
|
1135
1053
|
|
|
1054
|
+
def _write_success_file_locally(directory_path: str) -> None:
|
|
1055
|
+
"""
|
|
1056
|
+
Write a _SUCCESS marker file to a local directory.
|
|
1057
|
+
"""
|
|
1058
|
+
try:
|
|
1059
|
+
success_file = Path(directory_path) / "_SUCCESS"
|
|
1060
|
+
success_file.touch()
|
|
1061
|
+
logger.debug(f"Created _SUCCESS file at {directory_path}")
|
|
1062
|
+
except Exception as e:
|
|
1063
|
+
logger.warning(f"Failed to create _SUCCESS file at {directory_path}: {e}")
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
def _write_success_file_to_stage(
|
|
1067
|
+
stage_path: str,
|
|
1068
|
+
session: snowpark.Session,
|
|
1069
|
+
parameters: dict,
|
|
1070
|
+
) -> None:
|
|
1071
|
+
"""
|
|
1072
|
+
Write a _SUCCESS marker file to a stage location.
|
|
1073
|
+
"""
|
|
1074
|
+
try:
|
|
1075
|
+
# Create a dummy dataframe with one row containing "SUCCESS"
|
|
1076
|
+
success_df = session.create_dataframe([["SUCCESS"]]).to_df(["STATUS"])
|
|
1077
|
+
success_params = copy.deepcopy(parameters)
|
|
1078
|
+
success_params["location"] = f"{stage_path}/_SUCCESS"
|
|
1079
|
+
success_params["single"] = True
|
|
1080
|
+
success_params["header"] = True
|
|
1081
|
+
|
|
1082
|
+
# Set CSV format with explicit no compression for _SUCCESS file
|
|
1083
|
+
success_params["file_format_type"] = "csv"
|
|
1084
|
+
success_params["format_type_options"] = {
|
|
1085
|
+
"COMPRESSION": "NONE",
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
success_df.write.copy_into_location(**success_params)
|
|
1089
|
+
|
|
1090
|
+
logger.debug(f"Created _SUCCESS file at {stage_path}")
|
|
1091
|
+
except Exception as e:
|
|
1092
|
+
logger.warning(f"Failed to create _SUCCESS file at {stage_path}: {e}")
|
|
1093
|
+
|
|
1094
|
+
|
|
1136
1095
|
def store_files_locally(
|
|
1137
1096
|
stage_path: str, target_path: str, overwrite: bool, session: snowpark.Session
|
|
1138
1097
|
) -> None:
|