snowpark-connect 0.27.0__py3-none-any.whl → 0.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +3 -93
- snowflake/snowpark_connect/config.py +99 -1
- snowflake/snowpark_connect/dataframe_container.py +0 -6
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +31 -68
- snowflake/snowpark_connect/expression/map_expression.py +22 -7
- snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +4 -26
- snowflake/snowpark_connect/expression/map_unresolved_function.py +12 -3
- snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +207 -20
- snowflake/snowpark_connect/relation/map_extension.py +14 -10
- snowflake/snowpark_connect/relation/map_join.py +62 -258
- snowflake/snowpark_connect/relation/map_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_sql.py +464 -68
- snowflake/snowpark_connect/relation/read/map_read_table.py +58 -0
- snowflake/snowpark_connect/relation/write/map_write.py +228 -120
- snowflake/snowpark_connect/resources_initializer.py +20 -5
- snowflake/snowpark_connect/server.py +16 -17
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/context.py +21 -0
- snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
- snowflake/snowpark_connect/utils/identifiers.py +128 -2
- snowflake/snowpark_connect/utils/io_utils.py +21 -1
- snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
- snowflake/snowpark_connect/utils/session.py +16 -26
- snowflake/snowpark_connect/utils/telemetry.py +53 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +61 -0
- snowflake/snowpark_connect/utils/udf_utils.py +9 -8
- snowflake/snowpark_connect/utils/udtf_utils.py +3 -2
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/METADATA +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/RECORD +41 -41
- snowflake/snowpark_connect/hidden_column.py +0 -39
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/top_level.txt +0 -0
|
@@ -11,11 +11,17 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
11
11
|
unquote_if_quoted,
|
|
12
12
|
)
|
|
13
13
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
14
|
+
from snowflake.snowpark.types import StructField, StructType
|
|
15
|
+
from snowflake.snowpark_connect.column_name_handler import (
|
|
16
|
+
ColumnNameMap,
|
|
17
|
+
make_column_names_snowpark_compatible,
|
|
18
|
+
)
|
|
14
19
|
from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
|
|
15
20
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
16
21
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
17
22
|
rename_columns_as_snowflake_standard,
|
|
18
23
|
)
|
|
24
|
+
from snowflake.snowpark_connect.utils.context import get_processed_views
|
|
19
25
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
20
26
|
split_fully_qualified_spark_name,
|
|
21
27
|
)
|
|
@@ -23,6 +29,7 @@ from snowflake.snowpark_connect.utils.session import _get_current_snowpark_sessi
|
|
|
23
29
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
24
30
|
SnowparkConnectNotImplementedError,
|
|
25
31
|
)
|
|
32
|
+
from snowflake.snowpark_connect.utils.temporary_view_cache import get_temp_view
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
def post_process_df(
|
|
@@ -64,15 +71,66 @@ def post_process_df(
|
|
|
64
71
|
raise
|
|
65
72
|
|
|
66
73
|
|
|
74
|
+
def _get_temporary_view(
|
|
75
|
+
temp_view: DataFrameContainer, table_name: str, plan_id: int
|
|
76
|
+
) -> DataFrameContainer:
|
|
77
|
+
fields_names = [field.name for field in temp_view.dataframe.schema.fields]
|
|
78
|
+
fields_types = [field.datatype for field in temp_view.dataframe.schema.fields]
|
|
79
|
+
|
|
80
|
+
snowpark_column_names = make_column_names_snowpark_compatible(fields_names, plan_id)
|
|
81
|
+
# Rename columns in dataframe to prevent conflicting names during joins
|
|
82
|
+
renamed_df = temp_view.dataframe.select(
|
|
83
|
+
*(
|
|
84
|
+
temp_view.dataframe.col(orig).alias(alias)
|
|
85
|
+
for orig, alias in zip(fields_names, snowpark_column_names)
|
|
86
|
+
)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
new_column_map = ColumnNameMap(
|
|
90
|
+
spark_column_names=temp_view.column_map.get_spark_columns(),
|
|
91
|
+
snowpark_column_names=snowpark_column_names,
|
|
92
|
+
column_metadata=temp_view.column_map.column_metadata,
|
|
93
|
+
column_qualifiers=[split_fully_qualified_spark_name(table_name)]
|
|
94
|
+
* len(temp_view.column_map.get_spark_columns()),
|
|
95
|
+
parent_column_name_map=temp_view.column_map.get_parent_column_name_map(),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
schema = StructType(
|
|
99
|
+
[
|
|
100
|
+
StructField(name, type, _is_column=False)
|
|
101
|
+
for name, type in zip(snowpark_column_names, fields_types)
|
|
102
|
+
]
|
|
103
|
+
)
|
|
104
|
+
return DataFrameContainer(
|
|
105
|
+
dataframe=renamed_df,
|
|
106
|
+
column_map=new_column_map,
|
|
107
|
+
table_name=temp_view.table_name,
|
|
108
|
+
alias=temp_view.alias,
|
|
109
|
+
partition_hint=temp_view.partition_hint,
|
|
110
|
+
cached_schema_getter=lambda: schema,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
67
114
|
def get_table_from_name(
|
|
68
115
|
table_name: str, session: snowpark.Session, plan_id: int
|
|
69
116
|
) -> DataFrameContainer:
|
|
70
117
|
"""Get table from name returning a container."""
|
|
118
|
+
|
|
119
|
+
# Verify if recursive view read is not attempted
|
|
120
|
+
if table_name in get_processed_views():
|
|
121
|
+
raise AnalysisException(
|
|
122
|
+
f"[RECURSIVE_VIEW] Recursive view `{table_name}` detected (cycle: `{table_name}` -> `{table_name}`)"
|
|
123
|
+
)
|
|
124
|
+
|
|
71
125
|
snowpark_name = ".".join(
|
|
72
126
|
quote_name_without_upper_casing(part)
|
|
73
127
|
for part in split_fully_qualified_spark_name(table_name)
|
|
74
128
|
)
|
|
75
129
|
|
|
130
|
+
temp_view = get_temp_view(snowpark_name)
|
|
131
|
+
if temp_view:
|
|
132
|
+
return _get_temporary_view(temp_view, table_name, plan_id)
|
|
133
|
+
|
|
76
134
|
if auto_uppercase_non_column_identifiers():
|
|
77
135
|
snowpark_name = snowpark_name.upper()
|
|
78
136
|
|
|
@@ -50,6 +50,7 @@ from snowflake.snowpark_connect.utils.identifiers import (
|
|
|
50
50
|
spark_to_sf_single_id,
|
|
51
51
|
split_fully_qualified_spark_name,
|
|
52
52
|
)
|
|
53
|
+
from snowflake.snowpark_connect.utils.io_utils import get_table_type
|
|
53
54
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
54
55
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
55
56
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
@@ -217,8 +218,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
217
218
|
},
|
|
218
219
|
"overwrite": overwrite,
|
|
219
220
|
}
|
|
220
|
-
#
|
|
221
|
-
|
|
221
|
+
# Download from the base write path to ensure we fetch whatever Snowflake produced.
|
|
222
|
+
# Using the base avoids coupling to exact filenames/prefixes.
|
|
223
|
+
download_stage_path = write_path
|
|
222
224
|
|
|
223
225
|
# Check for partition hint early to determine precedence over single option
|
|
224
226
|
partition_hint = result.partition_hint
|
|
@@ -237,13 +239,19 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
237
239
|
raise SnowparkConnectNotImplementedError(
|
|
238
240
|
"Partitioning is only supported for parquet format"
|
|
239
241
|
)
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
242
|
+
# Build Spark-style directory structure: col1=value1/col2=value2/...
|
|
243
|
+
# Example produced expression (Snowflake SQL):
|
|
244
|
+
# 'department=' || TO_VARCHAR("department") || '/' || 'region=' || TO_VARCHAR("region")
|
|
245
|
+
partitioning_column_names = list(write_op.partitioning_columns)
|
|
246
|
+
partition_expr_parts: list[str] = []
|
|
247
|
+
for col_name in partitioning_column_names:
|
|
248
|
+
quoted = f'"{col_name}"'
|
|
249
|
+
segment = f"'{col_name}=' || COALESCE(TO_VARCHAR({quoted}), '__HIVE_DEFAULT_PARTITION__')"
|
|
250
|
+
partition_expr_parts.append(segment)
|
|
251
|
+
parameters["partition_by"] = " || '/' || ".join(partition_expr_parts)
|
|
252
|
+
# When using PARTITION BY, Snowflake writes into subdirectories under the base path.
|
|
253
|
+
# Download from the base write path to preserve partition directories locally.
|
|
254
|
+
download_stage_path = write_path
|
|
247
255
|
|
|
248
256
|
# If a partition hint is present (from DataFrame.repartition(n)), optionally split the
|
|
249
257
|
# write into n COPY INTO calls by assigning a synthetic partition id. Controlled by config.
|
|
@@ -311,7 +319,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
311
319
|
|
|
312
320
|
match write_mode:
|
|
313
321
|
case None | "error" | "errorifexists":
|
|
314
|
-
|
|
322
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
323
|
+
snowpark_table_name, session
|
|
324
|
+
)
|
|
325
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
315
326
|
raise AnalysisException(
|
|
316
327
|
f"Table {snowpark_table_name} already exists"
|
|
317
328
|
)
|
|
@@ -322,29 +333,45 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
322
333
|
snowpark_session=session,
|
|
323
334
|
)
|
|
324
335
|
_validate_schema_and_get_writer(
|
|
325
|
-
input_df, "append", snowpark_table_name
|
|
336
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
326
337
|
).saveAsTable(
|
|
327
338
|
table_name=snowpark_table_name,
|
|
328
339
|
mode="append",
|
|
329
340
|
column_order=_column_order_for_write,
|
|
330
341
|
)
|
|
331
342
|
case "append":
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
343
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
344
|
+
snowpark_table_name, session
|
|
345
|
+
)
|
|
346
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
347
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
348
|
+
"ICEBERG",
|
|
349
|
+
"TABLE",
|
|
350
|
+
):
|
|
351
|
+
raise AnalysisException(
|
|
352
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
create_iceberg_table(
|
|
356
|
+
snowpark_table_name=snowpark_table_name,
|
|
357
|
+
location=write_op.options.get("location", None),
|
|
358
|
+
schema=input_df.schema,
|
|
359
|
+
snowpark_session=session,
|
|
360
|
+
)
|
|
337
361
|
_validate_schema_and_get_writer(
|
|
338
|
-
input_df, "append", snowpark_table_name
|
|
362
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
339
363
|
).saveAsTable(
|
|
340
364
|
table_name=snowpark_table_name,
|
|
341
365
|
mode="append",
|
|
342
366
|
column_order=_column_order_for_write,
|
|
343
367
|
)
|
|
344
368
|
case "ignore":
|
|
345
|
-
|
|
369
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
346
370
|
snowpark_table_name, session
|
|
347
|
-
)
|
|
371
|
+
)
|
|
372
|
+
if not isinstance(
|
|
373
|
+
table_schema_or_error, DataType
|
|
374
|
+
): # Table not exists
|
|
348
375
|
create_iceberg_table(
|
|
349
376
|
snowpark_table_name=snowpark_table_name,
|
|
350
377
|
location=write_op.options.get("location", None),
|
|
@@ -359,13 +386,17 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
359
386
|
column_order=_column_order_for_write,
|
|
360
387
|
)
|
|
361
388
|
case "overwrite":
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
389
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
390
|
+
snowpark_table_name, session
|
|
391
|
+
)
|
|
392
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
393
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
394
|
+
"ICEBERG",
|
|
395
|
+
"TABLE",
|
|
396
|
+
):
|
|
397
|
+
raise AnalysisException(
|
|
398
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
399
|
+
)
|
|
369
400
|
else:
|
|
370
401
|
create_iceberg_table(
|
|
371
402
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -374,7 +405,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
374
405
|
snowpark_session=session,
|
|
375
406
|
)
|
|
376
407
|
_validate_schema_and_get_writer(
|
|
377
|
-
input_df, "truncate", snowpark_table_name
|
|
408
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
378
409
|
).saveAsTable(
|
|
379
410
|
table_name=snowpark_table_name,
|
|
380
411
|
mode="truncate",
|
|
@@ -393,33 +424,49 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
393
424
|
):
|
|
394
425
|
match write_mode:
|
|
395
426
|
case "overwrite":
|
|
396
|
-
|
|
427
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
397
428
|
snowpark_table_name, session
|
|
398
|
-
)
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
429
|
+
)
|
|
430
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
431
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
432
|
+
"NORMAL",
|
|
433
|
+
"TABLE",
|
|
434
|
+
):
|
|
435
|
+
raise AnalysisException(
|
|
436
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
437
|
+
)
|
|
407
438
|
write_mode = "truncate"
|
|
408
439
|
_validate_schema_and_get_writer(
|
|
409
|
-
input_df,
|
|
440
|
+
input_df,
|
|
441
|
+
write_mode,
|
|
442
|
+
snowpark_table_name,
|
|
443
|
+
table_schema_or_error,
|
|
410
444
|
).saveAsTable(
|
|
411
445
|
table_name=snowpark_table_name,
|
|
412
446
|
mode=write_mode,
|
|
413
447
|
column_order=_column_order_for_write,
|
|
414
448
|
)
|
|
415
449
|
case "append":
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
450
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
451
|
+
snowpark_table_name, session
|
|
452
|
+
)
|
|
453
|
+
if isinstance(
|
|
454
|
+
table_schema_or_error, DataType
|
|
455
|
+
) and get_table_type( # Table exists
|
|
456
|
+
snowpark_table_name, session
|
|
457
|
+
) not in (
|
|
458
|
+
"NORMAL",
|
|
459
|
+
"TABLE",
|
|
460
|
+
):
|
|
461
|
+
raise AnalysisException(
|
|
462
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
463
|
+
)
|
|
464
|
+
|
|
421
465
|
_validate_schema_and_get_writer(
|
|
422
|
-
input_df,
|
|
466
|
+
input_df,
|
|
467
|
+
write_mode,
|
|
468
|
+
snowpark_table_name,
|
|
469
|
+
table_schema_or_error,
|
|
423
470
|
).saveAsTable(
|
|
424
471
|
table_name=snowpark_table_name,
|
|
425
472
|
mode=write_mode,
|
|
@@ -466,7 +513,10 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
466
513
|
if write_op.provider.lower() == "iceberg":
|
|
467
514
|
match write_op.mode:
|
|
468
515
|
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
469
|
-
|
|
516
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
517
|
+
snowpark_table_name, session
|
|
518
|
+
)
|
|
519
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
470
520
|
raise AnalysisException(
|
|
471
521
|
f"Table {snowpark_table_name} already exists"
|
|
472
522
|
)
|
|
@@ -477,24 +527,29 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
477
527
|
snowpark_session=session,
|
|
478
528
|
)
|
|
479
529
|
_validate_schema_and_get_writer(
|
|
480
|
-
input_df, "append", snowpark_table_name
|
|
530
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
481
531
|
).saveAsTable(
|
|
482
532
|
table_name=snowpark_table_name,
|
|
483
533
|
mode="append",
|
|
484
534
|
column_order=_column_order_for_write,
|
|
485
535
|
)
|
|
486
536
|
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
487
|
-
|
|
537
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
538
|
+
snowpark_table_name, session
|
|
539
|
+
)
|
|
540
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
488
541
|
raise AnalysisException(
|
|
489
542
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
490
543
|
)
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
544
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
545
|
+
"ICEBERG",
|
|
546
|
+
"TABLE",
|
|
547
|
+
):
|
|
548
|
+
raise AnalysisException(
|
|
549
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
550
|
+
)
|
|
496
551
|
_validate_schema_and_get_writer(
|
|
497
|
-
input_df, "append", snowpark_table_name
|
|
552
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
498
553
|
).saveAsTable(
|
|
499
554
|
table_name=snowpark_table_name,
|
|
500
555
|
mode="append",
|
|
@@ -502,26 +557,33 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
502
557
|
)
|
|
503
558
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
504
559
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
560
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
561
|
+
snowpark_table_name, session
|
|
562
|
+
)
|
|
563
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
564
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
565
|
+
"ICEBERG",
|
|
566
|
+
"TABLE",
|
|
567
|
+
):
|
|
568
|
+
raise AnalysisException(
|
|
569
|
+
f"Table {snowpark_table_name} is not an iceberg table"
|
|
570
|
+
)
|
|
512
571
|
else:
|
|
513
572
|
raise AnalysisException(
|
|
514
573
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
515
574
|
)
|
|
516
575
|
_validate_schema_and_get_writer(
|
|
517
|
-
input_df, "truncate", snowpark_table_name
|
|
576
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
518
577
|
).saveAsTable(
|
|
519
578
|
table_name=snowpark_table_name,
|
|
520
579
|
mode="truncate",
|
|
521
580
|
column_order=_column_order_for_write,
|
|
522
581
|
)
|
|
523
582
|
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
524
|
-
|
|
583
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
584
|
+
snowpark_table_name, session
|
|
585
|
+
)
|
|
586
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
525
587
|
create_iceberg_table(
|
|
526
588
|
snowpark_table_name=snowpark_table_name,
|
|
527
589
|
location=write_op.table_properties.get("location"),
|
|
@@ -534,7 +596,7 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
534
596
|
f"Table {snowpark_table_name} does not exist"
|
|
535
597
|
)
|
|
536
598
|
_validate_schema_and_get_writer(
|
|
537
|
-
input_df, "replace", snowpark_table_name
|
|
599
|
+
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
538
600
|
).saveAsTable(
|
|
539
601
|
table_name=snowpark_table_name,
|
|
540
602
|
mode="append",
|
|
@@ -570,17 +632,22 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
570
632
|
column_order=_column_order_for_write,
|
|
571
633
|
)
|
|
572
634
|
case commands_proto.WriteOperationV2.MODE_APPEND:
|
|
573
|
-
|
|
635
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
636
|
+
snowpark_table_name, session
|
|
637
|
+
)
|
|
638
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
574
639
|
raise AnalysisException(
|
|
575
640
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
576
641
|
)
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
642
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
643
|
+
"NORMAL",
|
|
644
|
+
"TABLE",
|
|
645
|
+
):
|
|
646
|
+
raise AnalysisException(
|
|
647
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
648
|
+
)
|
|
582
649
|
_validate_schema_and_get_writer(
|
|
583
|
-
input_df, "append", snowpark_table_name
|
|
650
|
+
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
584
651
|
).saveAsTable(
|
|
585
652
|
table_name=snowpark_table_name,
|
|
586
653
|
mode="append",
|
|
@@ -588,31 +655,38 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
588
655
|
)
|
|
589
656
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
590
657
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
658
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
659
|
+
snowpark_table_name, session
|
|
660
|
+
)
|
|
661
|
+
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
662
|
+
if get_table_type(snowpark_table_name, session) not in (
|
|
663
|
+
"NORMAL",
|
|
664
|
+
"TABLE",
|
|
665
|
+
):
|
|
666
|
+
raise AnalysisException(
|
|
667
|
+
f"Table {snowpark_table_name} is not a FDN table"
|
|
668
|
+
)
|
|
598
669
|
else:
|
|
599
670
|
raise AnalysisException(
|
|
600
671
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
601
672
|
)
|
|
602
673
|
_validate_schema_and_get_writer(
|
|
603
|
-
input_df, "truncate", snowpark_table_name
|
|
674
|
+
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
604
675
|
).saveAsTable(
|
|
605
676
|
table_name=snowpark_table_name,
|
|
606
677
|
mode="truncate",
|
|
607
678
|
column_order=_column_order_for_write,
|
|
608
679
|
)
|
|
609
680
|
case commands_proto.WriteOperationV2.MODE_REPLACE:
|
|
610
|
-
|
|
681
|
+
table_schema_or_error = _get_table_schema_or_error(
|
|
682
|
+
snowpark_table_name, session
|
|
683
|
+
)
|
|
684
|
+
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
611
685
|
raise AnalysisException(
|
|
612
686
|
f"Table {snowpark_table_name} does not exist"
|
|
613
687
|
)
|
|
614
688
|
_validate_schema_and_get_writer(
|
|
615
|
-
input_df, "replace", snowpark_table_name
|
|
689
|
+
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
616
690
|
).saveAsTable(
|
|
617
691
|
table_name=snowpark_table_name,
|
|
618
692
|
mode="overwrite",
|
|
@@ -632,8 +706,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
632
706
|
)
|
|
633
707
|
|
|
634
708
|
|
|
709
|
+
def _get_table_schema_or_error(
|
|
710
|
+
snowpark_table_name: str, snowpark_session: snowpark.Session
|
|
711
|
+
) -> DataType | SnowparkSQLException:
|
|
712
|
+
try:
|
|
713
|
+
return snowpark_session.table(snowpark_table_name).schema
|
|
714
|
+
except SnowparkSQLException as e:
|
|
715
|
+
return e
|
|
716
|
+
|
|
717
|
+
|
|
635
718
|
def _validate_schema_and_get_writer(
|
|
636
|
-
input_df: snowpark.DataFrame,
|
|
719
|
+
input_df: snowpark.DataFrame,
|
|
720
|
+
write_mode: str,
|
|
721
|
+
snowpark_table_name: str,
|
|
722
|
+
table_schema_or_error: DataType | SnowparkSQLException | None = None,
|
|
637
723
|
) -> snowpark.DataFrameWriter:
|
|
638
724
|
if write_mode is not None and write_mode.lower() in (
|
|
639
725
|
"replace",
|
|
@@ -642,16 +728,26 @@ def _validate_schema_and_get_writer(
|
|
|
642
728
|
return input_df.write
|
|
643
729
|
|
|
644
730
|
table_schema = None
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
731
|
+
if table_schema_or_error is not None:
|
|
732
|
+
if isinstance(table_schema_or_error, SnowparkSQLException):
|
|
733
|
+
msg = table_schema_or_error.message
|
|
734
|
+
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
735
|
+
pass
|
|
736
|
+
else:
|
|
737
|
+
raise table_schema_or_error
|
|
738
|
+
elif isinstance(table_schema_or_error, DataType):
|
|
739
|
+
table_schema = table_schema_or_error
|
|
740
|
+
else:
|
|
741
|
+
try:
|
|
742
|
+
table_schema = (
|
|
743
|
+
get_or_create_snowpark_session().table(snowpark_table_name).schema
|
|
744
|
+
)
|
|
745
|
+
except SnowparkSQLException as e:
|
|
746
|
+
msg = e.message
|
|
747
|
+
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
748
|
+
pass
|
|
749
|
+
else:
|
|
750
|
+
raise e
|
|
655
751
|
|
|
656
752
|
if table_schema is None:
|
|
657
753
|
# If table does not exist, we can skip the schema validation
|
|
@@ -889,7 +985,47 @@ def store_files_locally(
|
|
|
889
985
|
)
|
|
890
986
|
if overwrite and os.path.isdir(target_path):
|
|
891
987
|
_truncate_directory(real_path)
|
|
892
|
-
|
|
988
|
+
# Per Snowflake docs: "The command does not preserve stage directory structure when transferring files to your client machine"
|
|
989
|
+
# https://docs.snowflake.com/en/sql-reference/sql/get
|
|
990
|
+
# Preserve directory structure under stage_path by listing files and
|
|
991
|
+
# downloading each into its corresponding local subdirectory when partition subdirs exist.
|
|
992
|
+
# Otherwise, fall back to a direct GET which flattens.
|
|
993
|
+
|
|
994
|
+
# TODO(SNOW-2326973): This can be parallelized further. Its not done here because it only affects
|
|
995
|
+
# write to local storage.
|
|
996
|
+
|
|
997
|
+
ls_dataframe = session.sql(f"LS {stage_path}")
|
|
998
|
+
ls_iterator = ls_dataframe.toLocalIterator()
|
|
999
|
+
|
|
1000
|
+
# Build a normalized base prefix from stage_path to compute relatives
|
|
1001
|
+
# Example: stage_path='@MY_STAGE/prefix' -> base_prefix='my_stage/prefix/'
|
|
1002
|
+
base_prefix = stage_path.lstrip("@").rstrip("/") + "/"
|
|
1003
|
+
base_prefix_lower = base_prefix.lower()
|
|
1004
|
+
|
|
1005
|
+
# Group by parent directory under the base prefix, then issue a GET per directory.
|
|
1006
|
+
# This gives a small parallelism advantage if we have many files per partition directory.
|
|
1007
|
+
parent_dirs: set[str] = set()
|
|
1008
|
+
for row in ls_iterator:
|
|
1009
|
+
name: str = row[0]
|
|
1010
|
+
name_lower = name.lower()
|
|
1011
|
+
rel_start = name_lower.find(base_prefix_lower)
|
|
1012
|
+
relative = name[rel_start + len(base_prefix) :] if rel_start != -1 else name
|
|
1013
|
+
parent_dir = os.path.dirname(relative)
|
|
1014
|
+
if parent_dir and parent_dir != ".":
|
|
1015
|
+
parent_dirs.add(parent_dir)
|
|
1016
|
+
|
|
1017
|
+
# If no parent directories were discovered (non-partitioned unload prefix), use direct GET.
|
|
1018
|
+
if not parent_dirs:
|
|
1019
|
+
snowpark.file_operation.FileOperation(session).get(stage_path, str(real_path))
|
|
1020
|
+
return
|
|
1021
|
+
|
|
1022
|
+
file_op = snowpark.file_operation.FileOperation(session)
|
|
1023
|
+
for parent_dir in sorted(parent_dirs):
|
|
1024
|
+
local_dir = real_path / parent_dir
|
|
1025
|
+
os.makedirs(local_dir, exist_ok=True)
|
|
1026
|
+
|
|
1027
|
+
src_dir = f"@{base_prefix}{parent_dir}"
|
|
1028
|
+
file_op.get(src_dir, str(local_dir))
|
|
893
1029
|
|
|
894
1030
|
|
|
895
1031
|
def _truncate_directory(directory_path: Path) -> None:
|
|
@@ -904,31 +1040,3 @@ def _truncate_directory(directory_path: Path) -> None:
|
|
|
904
1040
|
file.unlink()
|
|
905
1041
|
elif file.is_dir():
|
|
906
1042
|
shutil.rmtree(file)
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
def check_snowflake_table_existence(
|
|
910
|
-
snowpark_table_name: str,
|
|
911
|
-
snowpark_session: snowpark.Session,
|
|
912
|
-
):
|
|
913
|
-
try:
|
|
914
|
-
snowpark_session.sql(f"SELECT 1 FROM {snowpark_table_name} LIMIT 1").collect()
|
|
915
|
-
return True
|
|
916
|
-
except Exception:
|
|
917
|
-
return False
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
921
|
-
# def check_table_type(
|
|
922
|
-
# snowpark_table_name: str,
|
|
923
|
-
# snowpark_session: snowpark.Session,
|
|
924
|
-
# ) -> str:
|
|
925
|
-
# # currently we only support iceberg table and FDN table
|
|
926
|
-
# metadata = snowpark_session.sql(
|
|
927
|
-
# f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
|
|
928
|
-
# ).collect()
|
|
929
|
-
# if metadata is None or len(metadata) == 0:
|
|
930
|
-
# raise AnalysisException(f"Table {snowpark_table_name} does not exist")
|
|
931
|
-
# metadata = metadata[0]
|
|
932
|
-
# if metadata.as_dict().get("is_iceberg") == "Y":
|
|
933
|
-
# return "ICEBERG"
|
|
934
|
-
# return "TABLE"
|