snowpark-connect 0.27.0__py3-none-any.whl → 0.28.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (42) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +3 -93
  2. snowflake/snowpark_connect/config.py +99 -1
  3. snowflake/snowpark_connect/dataframe_container.py +0 -6
  4. snowflake/snowpark_connect/execute_plan/map_execution_command.py +31 -68
  5. snowflake/snowpark_connect/expression/map_expression.py +22 -7
  6. snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
  7. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +4 -26
  8. snowflake/snowpark_connect/expression/map_unresolved_function.py +12 -3
  9. snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
  10. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  11. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +207 -20
  12. snowflake/snowpark_connect/relation/map_extension.py +14 -10
  13. snowflake/snowpark_connect/relation/map_join.py +62 -258
  14. snowflake/snowpark_connect/relation/map_relation.py +5 -1
  15. snowflake/snowpark_connect/relation/map_sql.py +464 -68
  16. snowflake/snowpark_connect/relation/read/map_read_table.py +58 -0
  17. snowflake/snowpark_connect/relation/write/map_write.py +228 -120
  18. snowflake/snowpark_connect/resources_initializer.py +20 -5
  19. snowflake/snowpark_connect/server.py +16 -17
  20. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  21. snowflake/snowpark_connect/utils/context.py +21 -0
  22. snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
  23. snowflake/snowpark_connect/utils/identifiers.py +128 -2
  24. snowflake/snowpark_connect/utils/io_utils.py +21 -1
  25. snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
  26. snowflake/snowpark_connect/utils/session.py +16 -26
  27. snowflake/snowpark_connect/utils/telemetry.py +53 -0
  28. snowflake/snowpark_connect/utils/temporary_view_cache.py +61 -0
  29. snowflake/snowpark_connect/utils/udf_utils.py +9 -8
  30. snowflake/snowpark_connect/utils/udtf_utils.py +3 -2
  31. snowflake/snowpark_connect/version.py +1 -1
  32. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/METADATA +2 -2
  33. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/RECORD +41 -41
  34. snowflake/snowpark_connect/hidden_column.py +0 -39
  35. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-connect +0 -0
  36. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-session +0 -0
  37. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.1.data}/scripts/snowpark-submit +0 -0
  38. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/WHEEL +0 -0
  39. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/LICENSE-binary +0 -0
  40. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/LICENSE.txt +0 -0
  41. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/licenses/NOTICE-binary +0 -0
  42. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.1.dist-info}/top_level.txt +0 -0
@@ -11,11 +11,17 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
11
  unquote_if_quoted,
12
12
  )
13
13
  from snowflake.snowpark.exceptions import SnowparkSQLException
14
+ from snowflake.snowpark.types import StructField, StructType
15
+ from snowflake.snowpark_connect.column_name_handler import (
16
+ ColumnNameMap,
17
+ make_column_names_snowpark_compatible,
18
+ )
14
19
  from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
15
20
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
16
21
  from snowflake.snowpark_connect.relation.read.utils import (
17
22
  rename_columns_as_snowflake_standard,
18
23
  )
24
+ from snowflake.snowpark_connect.utils.context import get_processed_views
19
25
  from snowflake.snowpark_connect.utils.identifiers import (
20
26
  split_fully_qualified_spark_name,
21
27
  )
@@ -23,6 +29,7 @@ from snowflake.snowpark_connect.utils.session import _get_current_snowpark_sessi
23
29
  from snowflake.snowpark_connect.utils.telemetry import (
24
30
  SnowparkConnectNotImplementedError,
25
31
  )
32
+ from snowflake.snowpark_connect.utils.temporary_view_cache import get_temp_view
26
33
 
27
34
 
28
35
  def post_process_df(
@@ -64,15 +71,66 @@ def post_process_df(
64
71
  raise
65
72
 
66
73
 
74
+ def _get_temporary_view(
75
+ temp_view: DataFrameContainer, table_name: str, plan_id: int
76
+ ) -> DataFrameContainer:
77
+ fields_names = [field.name for field in temp_view.dataframe.schema.fields]
78
+ fields_types = [field.datatype for field in temp_view.dataframe.schema.fields]
79
+
80
+ snowpark_column_names = make_column_names_snowpark_compatible(fields_names, plan_id)
81
+ # Rename columns in dataframe to prevent conflicting names during joins
82
+ renamed_df = temp_view.dataframe.select(
83
+ *(
84
+ temp_view.dataframe.col(orig).alias(alias)
85
+ for orig, alias in zip(fields_names, snowpark_column_names)
86
+ )
87
+ )
88
+
89
+ new_column_map = ColumnNameMap(
90
+ spark_column_names=temp_view.column_map.get_spark_columns(),
91
+ snowpark_column_names=snowpark_column_names,
92
+ column_metadata=temp_view.column_map.column_metadata,
93
+ column_qualifiers=[split_fully_qualified_spark_name(table_name)]
94
+ * len(temp_view.column_map.get_spark_columns()),
95
+ parent_column_name_map=temp_view.column_map.get_parent_column_name_map(),
96
+ )
97
+
98
+ schema = StructType(
99
+ [
100
+ StructField(name, type, _is_column=False)
101
+ for name, type in zip(snowpark_column_names, fields_types)
102
+ ]
103
+ )
104
+ return DataFrameContainer(
105
+ dataframe=renamed_df,
106
+ column_map=new_column_map,
107
+ table_name=temp_view.table_name,
108
+ alias=temp_view.alias,
109
+ partition_hint=temp_view.partition_hint,
110
+ cached_schema_getter=lambda: schema,
111
+ )
112
+
113
+
67
114
  def get_table_from_name(
68
115
  table_name: str, session: snowpark.Session, plan_id: int
69
116
  ) -> DataFrameContainer:
70
117
  """Get table from name returning a container."""
118
+
119
+ # Verify if recursive view read is not attempted
120
+ if table_name in get_processed_views():
121
+ raise AnalysisException(
122
+ f"[RECURSIVE_VIEW] Recursive view `{table_name}` detected (cycle: `{table_name}` -> `{table_name}`)"
123
+ )
124
+
71
125
  snowpark_name = ".".join(
72
126
  quote_name_without_upper_casing(part)
73
127
  for part in split_fully_qualified_spark_name(table_name)
74
128
  )
75
129
 
130
+ temp_view = get_temp_view(snowpark_name)
131
+ if temp_view:
132
+ return _get_temporary_view(temp_view, table_name, plan_id)
133
+
76
134
  if auto_uppercase_non_column_identifiers():
77
135
  snowpark_name = snowpark_name.upper()
78
136
 
@@ -50,6 +50,7 @@ from snowflake.snowpark_connect.utils.identifiers import (
50
50
  spark_to_sf_single_id,
51
51
  split_fully_qualified_spark_name,
52
52
  )
53
+ from snowflake.snowpark_connect.utils.io_utils import get_table_type
53
54
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
54
55
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
55
56
  from snowflake.snowpark_connect.utils.telemetry import (
@@ -217,8 +218,9 @@ def map_write(request: proto_base.ExecutePlanRequest):
217
218
  },
218
219
  "overwrite": overwrite,
219
220
  }
220
- # By default, download from the same prefix we wrote to.
221
- download_stage_path = temp_file_prefix_on_stage
221
+ # Download from the base write path to ensure we fetch whatever Snowflake produced.
222
+ # Using the base avoids coupling to exact filenames/prefixes.
223
+ download_stage_path = write_path
222
224
 
223
225
  # Check for partition hint early to determine precedence over single option
224
226
  partition_hint = result.partition_hint
@@ -237,13 +239,19 @@ def map_write(request: proto_base.ExecutePlanRequest):
237
239
  raise SnowparkConnectNotImplementedError(
238
240
  "Partitioning is only supported for parquet format"
239
241
  )
240
- partitioning_columns = [f'"{c}"' for c in write_op.partitioning_columns]
241
- if len(partitioning_columns) > 1:
242
- raise SnowparkConnectNotImplementedError(
243
- "Multiple partitioning columns are not yet supported"
244
- )
245
- else:
246
- parameters["partition_by"] = partitioning_columns[0]
242
+ # Build Spark-style directory structure: col1=value1/col2=value2/...
243
+ # Example produced expression (Snowflake SQL):
244
+ # 'department=' || TO_VARCHAR("department") || '/' || 'region=' || TO_VARCHAR("region")
245
+ partitioning_column_names = list(write_op.partitioning_columns)
246
+ partition_expr_parts: list[str] = []
247
+ for col_name in partitioning_column_names:
248
+ quoted = f'"{col_name}"'
249
+ segment = f"'{col_name}=' || COALESCE(TO_VARCHAR({quoted}), '__HIVE_DEFAULT_PARTITION__')"
250
+ partition_expr_parts.append(segment)
251
+ parameters["partition_by"] = " || '/' || ".join(partition_expr_parts)
252
+ # When using PARTITION BY, Snowflake writes into subdirectories under the base path.
253
+ # Download from the base write path to preserve partition directories locally.
254
+ download_stage_path = write_path
247
255
 
248
256
  # If a partition hint is present (from DataFrame.repartition(n)), optionally split the
249
257
  # write into n COPY INTO calls by assigning a synthetic partition id. Controlled by config.
@@ -311,7 +319,10 @@ def map_write(request: proto_base.ExecutePlanRequest):
311
319
 
312
320
  match write_mode:
313
321
  case None | "error" | "errorifexists":
314
- if check_snowflake_table_existence(snowpark_table_name, session):
322
+ table_schema_or_error = _get_table_schema_or_error(
323
+ snowpark_table_name, session
324
+ )
325
+ if isinstance(table_schema_or_error, DataType): # Table exists
315
326
  raise AnalysisException(
316
327
  f"Table {snowpark_table_name} already exists"
317
328
  )
@@ -322,29 +333,45 @@ def map_write(request: proto_base.ExecutePlanRequest):
322
333
  snowpark_session=session,
323
334
  )
324
335
  _validate_schema_and_get_writer(
325
- input_df, "append", snowpark_table_name
336
+ input_df, "append", snowpark_table_name, table_schema_or_error
326
337
  ).saveAsTable(
327
338
  table_name=snowpark_table_name,
328
339
  mode="append",
329
340
  column_order=_column_order_for_write,
330
341
  )
331
342
  case "append":
332
- # TODO: SNOW-2299414 Fix the implementation of table type check
333
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
334
- # raise AnalysisException(
335
- # f"Table {snowpark_table_name} is not an iceberg table"
336
- # )
343
+ table_schema_or_error = _get_table_schema_or_error(
344
+ snowpark_table_name, session
345
+ )
346
+ if isinstance(table_schema_or_error, DataType): # Table exists
347
+ if get_table_type(snowpark_table_name, session) not in (
348
+ "ICEBERG",
349
+ "TABLE",
350
+ ):
351
+ raise AnalysisException(
352
+ f"Table {snowpark_table_name} is not an iceberg table"
353
+ )
354
+ else:
355
+ create_iceberg_table(
356
+ snowpark_table_name=snowpark_table_name,
357
+ location=write_op.options.get("location", None),
358
+ schema=input_df.schema,
359
+ snowpark_session=session,
360
+ )
337
361
  _validate_schema_and_get_writer(
338
- input_df, "append", snowpark_table_name
362
+ input_df, "append", snowpark_table_name, table_schema_or_error
339
363
  ).saveAsTable(
340
364
  table_name=snowpark_table_name,
341
365
  mode="append",
342
366
  column_order=_column_order_for_write,
343
367
  )
344
368
  case "ignore":
345
- if not check_snowflake_table_existence(
369
+ table_schema_or_error = _get_table_schema_or_error(
346
370
  snowpark_table_name, session
347
- ):
371
+ )
372
+ if not isinstance(
373
+ table_schema_or_error, DataType
374
+ ): # Table not exists
348
375
  create_iceberg_table(
349
376
  snowpark_table_name=snowpark_table_name,
350
377
  location=write_op.options.get("location", None),
@@ -359,13 +386,17 @@ def map_write(request: proto_base.ExecutePlanRequest):
359
386
  column_order=_column_order_for_write,
360
387
  )
361
388
  case "overwrite":
362
- if check_snowflake_table_existence(snowpark_table_name, session):
363
- # TODO: SNOW-2299414 Fix the implementation of table type check
364
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
365
- # raise AnalysisException(
366
- # f"Table {snowpark_table_name} is not an iceberg table"
367
- # )
368
- pass
389
+ table_schema_or_error = _get_table_schema_or_error(
390
+ snowpark_table_name, session
391
+ )
392
+ if isinstance(table_schema_or_error, DataType): # Table exists
393
+ if get_table_type(snowpark_table_name, session) not in (
394
+ "ICEBERG",
395
+ "TABLE",
396
+ ):
397
+ raise AnalysisException(
398
+ f"Table {snowpark_table_name} is not an iceberg table"
399
+ )
369
400
  else:
370
401
  create_iceberg_table(
371
402
  snowpark_table_name=snowpark_table_name,
@@ -374,7 +405,7 @@ def map_write(request: proto_base.ExecutePlanRequest):
374
405
  snowpark_session=session,
375
406
  )
376
407
  _validate_schema_and_get_writer(
377
- input_df, "truncate", snowpark_table_name
408
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
378
409
  ).saveAsTable(
379
410
  table_name=snowpark_table_name,
380
411
  mode="truncate",
@@ -393,33 +424,49 @@ def map_write(request: proto_base.ExecutePlanRequest):
393
424
  ):
394
425
  match write_mode:
395
426
  case "overwrite":
396
- if check_snowflake_table_existence(
427
+ table_schema_or_error = _get_table_schema_or_error(
397
428
  snowpark_table_name, session
398
- ):
399
- # TODO: SNOW-2299414 Fix the implementation of table type check
400
- # if (
401
- # check_table_type(snowpark_table_name, session)
402
- # != "TABLE"
403
- # ):
404
- # raise AnalysisException(
405
- # f"Table {snowpark_table_name} is not a FDN table"
406
- # )
429
+ )
430
+ if isinstance(table_schema_or_error, DataType): # Table exists
431
+ if get_table_type(snowpark_table_name, session) not in (
432
+ "NORMAL",
433
+ "TABLE",
434
+ ):
435
+ raise AnalysisException(
436
+ f"Table {snowpark_table_name} is not a FDN table"
437
+ )
407
438
  write_mode = "truncate"
408
439
  _validate_schema_and_get_writer(
409
- input_df, write_mode, snowpark_table_name
440
+ input_df,
441
+ write_mode,
442
+ snowpark_table_name,
443
+ table_schema_or_error,
410
444
  ).saveAsTable(
411
445
  table_name=snowpark_table_name,
412
446
  mode=write_mode,
413
447
  column_order=_column_order_for_write,
414
448
  )
415
449
  case "append":
416
- # TODO: SNOW-2299414 Fix the implementation of table type check
417
- # if check_table_type(snowpark_table_name, session) != "TABLE":
418
- # raise AnalysisException(
419
- # f"Table {snowpark_table_name} is not a FDN table"
420
- # )
450
+ table_schema_or_error = _get_table_schema_or_error(
451
+ snowpark_table_name, session
452
+ )
453
+ if isinstance(
454
+ table_schema_or_error, DataType
455
+ ) and get_table_type( # Table exists
456
+ snowpark_table_name, session
457
+ ) not in (
458
+ "NORMAL",
459
+ "TABLE",
460
+ ):
461
+ raise AnalysisException(
462
+ f"Table {snowpark_table_name} is not a FDN table"
463
+ )
464
+
421
465
  _validate_schema_and_get_writer(
422
- input_df, write_mode, snowpark_table_name
466
+ input_df,
467
+ write_mode,
468
+ snowpark_table_name,
469
+ table_schema_or_error,
423
470
  ).saveAsTable(
424
471
  table_name=snowpark_table_name,
425
472
  mode=write_mode,
@@ -466,7 +513,10 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
466
513
  if write_op.provider.lower() == "iceberg":
467
514
  match write_op.mode:
468
515
  case commands_proto.WriteOperationV2.MODE_CREATE:
469
- if check_snowflake_table_existence(snowpark_table_name, session):
516
+ table_schema_or_error = _get_table_schema_or_error(
517
+ snowpark_table_name, session
518
+ )
519
+ if isinstance(table_schema_or_error, DataType): # Table exists
470
520
  raise AnalysisException(
471
521
  f"Table {snowpark_table_name} already exists"
472
522
  )
@@ -477,24 +527,29 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
477
527
  snowpark_session=session,
478
528
  )
479
529
  _validate_schema_and_get_writer(
480
- input_df, "append", snowpark_table_name
530
+ input_df, "append", snowpark_table_name, table_schema_or_error
481
531
  ).saveAsTable(
482
532
  table_name=snowpark_table_name,
483
533
  mode="append",
484
534
  column_order=_column_order_for_write,
485
535
  )
486
536
  case commands_proto.WriteOperationV2.MODE_APPEND:
487
- if not check_snowflake_table_existence(snowpark_table_name, session):
537
+ table_schema_or_error = _get_table_schema_or_error(
538
+ snowpark_table_name, session
539
+ )
540
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
488
541
  raise AnalysisException(
489
542
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
490
543
  )
491
- # TODO: SNOW-2299414 Fix the implementation of table type check
492
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
493
- # raise AnalysisException(
494
- # f"Table {snowpark_table_name} is not an iceberg table"
495
- # )
544
+ if get_table_type(snowpark_table_name, session) not in (
545
+ "ICEBERG",
546
+ "TABLE",
547
+ ):
548
+ raise AnalysisException(
549
+ f"Table {snowpark_table_name} is not an iceberg table"
550
+ )
496
551
  _validate_schema_and_get_writer(
497
- input_df, "append", snowpark_table_name
552
+ input_df, "append", snowpark_table_name, table_schema_or_error
498
553
  ).saveAsTable(
499
554
  table_name=snowpark_table_name,
500
555
  mode="append",
@@ -502,26 +557,33 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
502
557
  )
503
558
  case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
504
559
  # TODO: handle the filter condition for MODE_OVERWRITE
505
- if check_snowflake_table_existence(snowpark_table_name, session):
506
- # TODO: SNOW-2299414 Fix the implementation of table type check
507
- # if check_table_type(snowpark_table_name, session) != "ICEBERG":
508
- # raise AnalysisException(
509
- # f"Table {snowpark_table_name} is not an iceberg table"
510
- # )
511
- pass
560
+ table_schema_or_error = _get_table_schema_or_error(
561
+ snowpark_table_name, session
562
+ )
563
+ if isinstance(table_schema_or_error, DataType): # Table exists
564
+ if get_table_type(snowpark_table_name, session) not in (
565
+ "ICEBERG",
566
+ "TABLE",
567
+ ):
568
+ raise AnalysisException(
569
+ f"Table {snowpark_table_name} is not an iceberg table"
570
+ )
512
571
  else:
513
572
  raise AnalysisException(
514
573
  f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
515
574
  )
516
575
  _validate_schema_and_get_writer(
517
- input_df, "truncate", snowpark_table_name
576
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
518
577
  ).saveAsTable(
519
578
  table_name=snowpark_table_name,
520
579
  mode="truncate",
521
580
  column_order=_column_order_for_write,
522
581
  )
523
582
  case commands_proto.WriteOperationV2.MODE_REPLACE:
524
- if check_snowflake_table_existence(snowpark_table_name, session):
583
+ table_schema_or_error = _get_table_schema_or_error(
584
+ snowpark_table_name, session
585
+ )
586
+ if isinstance(table_schema_or_error, DataType): # Table exists
525
587
  create_iceberg_table(
526
588
  snowpark_table_name=snowpark_table_name,
527
589
  location=write_op.table_properties.get("location"),
@@ -534,7 +596,7 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
534
596
  f"Table {snowpark_table_name} does not exist"
535
597
  )
536
598
  _validate_schema_and_get_writer(
537
- input_df, "replace", snowpark_table_name
599
+ input_df, "replace", snowpark_table_name, table_schema_or_error
538
600
  ).saveAsTable(
539
601
  table_name=snowpark_table_name,
540
602
  mode="append",
@@ -570,17 +632,22 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
570
632
  column_order=_column_order_for_write,
571
633
  )
572
634
  case commands_proto.WriteOperationV2.MODE_APPEND:
573
- if not check_snowflake_table_existence(snowpark_table_name, session):
635
+ table_schema_or_error = _get_table_schema_or_error(
636
+ snowpark_table_name, session
637
+ )
638
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
574
639
  raise AnalysisException(
575
640
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
576
641
  )
577
- # TODO: SNOW-2299414 Fix the implementation of table type check
578
- # if check_table_type(snowpark_table_name, session) != "TABLE":
579
- # raise AnalysisException(
580
- # f"Table {snowpark_table_name} is not a FDN table"
581
- # )
642
+ if get_table_type(snowpark_table_name, session) not in (
643
+ "NORMAL",
644
+ "TABLE",
645
+ ):
646
+ raise AnalysisException(
647
+ f"Table {snowpark_table_name} is not a FDN table"
648
+ )
582
649
  _validate_schema_and_get_writer(
583
- input_df, "append", snowpark_table_name
650
+ input_df, "append", snowpark_table_name, table_schema_or_error
584
651
  ).saveAsTable(
585
652
  table_name=snowpark_table_name,
586
653
  mode="append",
@@ -588,31 +655,38 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
588
655
  )
589
656
  case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
590
657
  # TODO: handle the filter condition for MODE_OVERWRITE
591
- if check_snowflake_table_existence(snowpark_table_name, session):
592
- # TODO: SNOW-2299414 Fix the implementation of table type check
593
- # if check_table_type(snowpark_table_name, session) != "TABLE":
594
- # raise AnalysisException(
595
- # f"Table {snowpark_table_name} is not a FDN table"
596
- # )
597
- pass
658
+ table_schema_or_error = _get_table_schema_or_error(
659
+ snowpark_table_name, session
660
+ )
661
+ if isinstance(table_schema_or_error, DataType): # Table exists
662
+ if get_table_type(snowpark_table_name, session) not in (
663
+ "NORMAL",
664
+ "TABLE",
665
+ ):
666
+ raise AnalysisException(
667
+ f"Table {snowpark_table_name} is not a FDN table"
668
+ )
598
669
  else:
599
670
  raise AnalysisException(
600
671
  f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
601
672
  )
602
673
  _validate_schema_and_get_writer(
603
- input_df, "truncate", snowpark_table_name
674
+ input_df, "truncate", snowpark_table_name, table_schema_or_error
604
675
  ).saveAsTable(
605
676
  table_name=snowpark_table_name,
606
677
  mode="truncate",
607
678
  column_order=_column_order_for_write,
608
679
  )
609
680
  case commands_proto.WriteOperationV2.MODE_REPLACE:
610
- if not check_snowflake_table_existence(snowpark_table_name, session):
681
+ table_schema_or_error = _get_table_schema_or_error(
682
+ snowpark_table_name, session
683
+ )
684
+ if not isinstance(table_schema_or_error, DataType): # Table not exists
611
685
  raise AnalysisException(
612
686
  f"Table {snowpark_table_name} does not exist"
613
687
  )
614
688
  _validate_schema_and_get_writer(
615
- input_df, "replace", snowpark_table_name
689
+ input_df, "replace", snowpark_table_name, table_schema_or_error
616
690
  ).saveAsTable(
617
691
  table_name=snowpark_table_name,
618
692
  mode="overwrite",
@@ -632,8 +706,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
632
706
  )
633
707
 
634
708
 
709
+ def _get_table_schema_or_error(
710
+ snowpark_table_name: str, snowpark_session: snowpark.Session
711
+ ) -> DataType | SnowparkSQLException:
712
+ try:
713
+ return snowpark_session.table(snowpark_table_name).schema
714
+ except SnowparkSQLException as e:
715
+ return e
716
+
717
+
635
718
  def _validate_schema_and_get_writer(
636
- input_df: snowpark.DataFrame, write_mode: str, snowpark_table_name: str
719
+ input_df: snowpark.DataFrame,
720
+ write_mode: str,
721
+ snowpark_table_name: str,
722
+ table_schema_or_error: DataType | SnowparkSQLException | None = None,
637
723
  ) -> snowpark.DataFrameWriter:
638
724
  if write_mode is not None and write_mode.lower() in (
639
725
  "replace",
@@ -642,16 +728,26 @@ def _validate_schema_and_get_writer(
642
728
  return input_df.write
643
729
 
644
730
  table_schema = None
645
- try:
646
- table_schema = (
647
- get_or_create_snowpark_session().table(snowpark_table_name).schema
648
- )
649
- except SnowparkSQLException as e:
650
- msg = e.message
651
- if "SQL compilation error" in msg and "does not exist" in msg:
652
- pass
653
- else:
654
- raise e
731
+ if table_schema_or_error is not None:
732
+ if isinstance(table_schema_or_error, SnowparkSQLException):
733
+ msg = table_schema_or_error.message
734
+ if "SQL compilation error" in msg and "does not exist" in msg:
735
+ pass
736
+ else:
737
+ raise table_schema_or_error
738
+ elif isinstance(table_schema_or_error, DataType):
739
+ table_schema = table_schema_or_error
740
+ else:
741
+ try:
742
+ table_schema = (
743
+ get_or_create_snowpark_session().table(snowpark_table_name).schema
744
+ )
745
+ except SnowparkSQLException as e:
746
+ msg = e.message
747
+ if "SQL compilation error" in msg and "does not exist" in msg:
748
+ pass
749
+ else:
750
+ raise e
655
751
 
656
752
  if table_schema is None:
657
753
  # If table does not exist, we can skip the schema validation
@@ -889,7 +985,47 @@ def store_files_locally(
889
985
  )
890
986
  if overwrite and os.path.isdir(target_path):
891
987
  _truncate_directory(real_path)
892
- snowpark.file_operation.FileOperation(session).get(stage_path, str(real_path))
988
+ # Per Snowflake docs: "The command does not preserve stage directory structure when transferring files to your client machine"
989
+ # https://docs.snowflake.com/en/sql-reference/sql/get
990
+ # Preserve directory structure under stage_path by listing files and
991
+ # downloading each into its corresponding local subdirectory when partition subdirs exist.
992
+ # Otherwise, fall back to a direct GET which flattens.
993
+
994
+ # TODO(SNOW-2326973): This can be parallelized further. Its not done here because it only affects
995
+ # write to local storage.
996
+
997
+ ls_dataframe = session.sql(f"LS {stage_path}")
998
+ ls_iterator = ls_dataframe.toLocalIterator()
999
+
1000
+ # Build a normalized base prefix from stage_path to compute relatives
1001
+ # Example: stage_path='@MY_STAGE/prefix' -> base_prefix='my_stage/prefix/'
1002
+ base_prefix = stage_path.lstrip("@").rstrip("/") + "/"
1003
+ base_prefix_lower = base_prefix.lower()
1004
+
1005
+ # Group by parent directory under the base prefix, then issue a GET per directory.
1006
+ # This gives a small parallelism advantage if we have many files per partition directory.
1007
+ parent_dirs: set[str] = set()
1008
+ for row in ls_iterator:
1009
+ name: str = row[0]
1010
+ name_lower = name.lower()
1011
+ rel_start = name_lower.find(base_prefix_lower)
1012
+ relative = name[rel_start + len(base_prefix) :] if rel_start != -1 else name
1013
+ parent_dir = os.path.dirname(relative)
1014
+ if parent_dir and parent_dir != ".":
1015
+ parent_dirs.add(parent_dir)
1016
+
1017
+ # If no parent directories were discovered (non-partitioned unload prefix), use direct GET.
1018
+ if not parent_dirs:
1019
+ snowpark.file_operation.FileOperation(session).get(stage_path, str(real_path))
1020
+ return
1021
+
1022
+ file_op = snowpark.file_operation.FileOperation(session)
1023
+ for parent_dir in sorted(parent_dirs):
1024
+ local_dir = real_path / parent_dir
1025
+ os.makedirs(local_dir, exist_ok=True)
1026
+
1027
+ src_dir = f"@{base_prefix}{parent_dir}"
1028
+ file_op.get(src_dir, str(local_dir))
893
1029
 
894
1030
 
895
1031
  def _truncate_directory(directory_path: Path) -> None:
@@ -904,31 +1040,3 @@ def _truncate_directory(directory_path: Path) -> None:
904
1040
  file.unlink()
905
1041
  elif file.is_dir():
906
1042
  shutil.rmtree(file)
907
-
908
-
909
- def check_snowflake_table_existence(
910
- snowpark_table_name: str,
911
- snowpark_session: snowpark.Session,
912
- ):
913
- try:
914
- snowpark_session.sql(f"SELECT 1 FROM {snowpark_table_name} LIMIT 1").collect()
915
- return True
916
- except Exception:
917
- return False
918
-
919
-
920
- # TODO: SNOW-2299414 Fix the implementation of table type check
921
- # def check_table_type(
922
- # snowpark_table_name: str,
923
- # snowpark_session: snowpark.Session,
924
- # ) -> str:
925
- # # currently we only support iceberg table and FDN table
926
- # metadata = snowpark_session.sql(
927
- # f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
928
- # ).collect()
929
- # if metadata is None or len(metadata) == 0:
930
- # raise AnalysisException(f"Table {snowpark_table_name} does not exist")
931
- # metadata = metadata[0]
932
- # if metadata.as_dict().get("is_iceberg") == "Y":
933
- # return "ICEBERG"
934
- # return "TABLE"