snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +200 -102
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +53 -8
- snowflake/snowpark_connect/expression/map_extension.py +37 -11
- snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
- snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
- snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
- snowflake/snowpark_connect/relation/map_extension.py +38 -17
- snowflake/snowpark_connect/relation/map_join.py +26 -12
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +124 -25
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +160 -48
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +73 -21
- snowflake/snowpark_connect/type_mapping.py +90 -20
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +24 -4
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -16,7 +16,7 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
16
16
|
unquote_if_quoted,
|
|
17
17
|
)
|
|
18
18
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
19
|
-
from snowflake.snowpark.functions import col, lit, object_construct, sql_expr
|
|
19
|
+
from snowflake.snowpark.functions import col, lit, object_construct, sql_expr, when
|
|
20
20
|
from snowflake.snowpark.types import (
|
|
21
21
|
ArrayType,
|
|
22
22
|
DataType,
|
|
@@ -28,11 +28,14 @@ from snowflake.snowpark.types import (
|
|
|
28
28
|
_NumericType,
|
|
29
29
|
)
|
|
30
30
|
from snowflake.snowpark_connect.config import (
|
|
31
|
+
auto_uppercase_column_identifiers,
|
|
31
32
|
global_config,
|
|
32
33
|
sessions_config,
|
|
33
34
|
str_to_bool,
|
|
34
35
|
)
|
|
35
36
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
37
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
38
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
36
39
|
from snowflake.snowpark_connect.relation.io_utils import (
|
|
37
40
|
convert_file_prefix_path,
|
|
38
41
|
get_compression_for_source_and_options,
|
|
@@ -254,9 +257,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
254
257
|
get_param_from_options(parameters, write_op.options, write_op.source)
|
|
255
258
|
if write_op.partitioning_columns:
|
|
256
259
|
if write_op.source != "parquet":
|
|
257
|
-
|
|
260
|
+
exception = SnowparkConnectNotImplementedError(
|
|
258
261
|
"Partitioning is only supported for parquet format"
|
|
259
262
|
)
|
|
263
|
+
attach_custom_error_code(
|
|
264
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
265
|
+
)
|
|
266
|
+
raise exception
|
|
260
267
|
# Build Spark-style directory structure: col1=value1/col2=value2/...
|
|
261
268
|
# Example produced expression (Snowflake SQL):
|
|
262
269
|
# 'department=' || TO_VARCHAR("department") || '/' || 'region=' || TO_VARCHAR("region")
|
|
@@ -341,9 +348,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
341
348
|
snowpark_table_name, session
|
|
342
349
|
)
|
|
343
350
|
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
344
|
-
|
|
351
|
+
exception = AnalysisException(
|
|
345
352
|
f"Table {snowpark_table_name} already exists"
|
|
346
353
|
)
|
|
354
|
+
attach_custom_error_code(
|
|
355
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
356
|
+
)
|
|
357
|
+
raise exception
|
|
347
358
|
create_iceberg_table(
|
|
348
359
|
snowpark_table_name=snowpark_table_name,
|
|
349
360
|
location=write_op.options.get("location", None),
|
|
@@ -366,9 +377,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
366
377
|
"ICEBERG",
|
|
367
378
|
"TABLE",
|
|
368
379
|
):
|
|
369
|
-
|
|
380
|
+
exception = AnalysisException(
|
|
370
381
|
f"Table {snowpark_table_name} is not an iceberg table"
|
|
371
382
|
)
|
|
383
|
+
attach_custom_error_code(
|
|
384
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
385
|
+
)
|
|
386
|
+
raise exception
|
|
372
387
|
else:
|
|
373
388
|
create_iceberg_table(
|
|
374
389
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -412,9 +427,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
412
427
|
"ICEBERG",
|
|
413
428
|
"TABLE",
|
|
414
429
|
):
|
|
415
|
-
|
|
430
|
+
exception = AnalysisException(
|
|
416
431
|
f"Table {snowpark_table_name} is not an iceberg table"
|
|
417
432
|
)
|
|
433
|
+
attach_custom_error_code(
|
|
434
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
435
|
+
)
|
|
436
|
+
raise exception
|
|
418
437
|
else:
|
|
419
438
|
create_iceberg_table(
|
|
420
439
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -430,9 +449,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
430
449
|
column_order=_column_order_for_write,
|
|
431
450
|
)
|
|
432
451
|
case _:
|
|
433
|
-
|
|
452
|
+
exception = SnowparkConnectNotImplementedError(
|
|
434
453
|
f"Write mode {write_mode} is not supported"
|
|
435
454
|
)
|
|
455
|
+
attach_custom_error_code(
|
|
456
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
457
|
+
)
|
|
458
|
+
raise exception
|
|
436
459
|
case _:
|
|
437
460
|
snowpark_table_name = _spark_to_snowflake(write_op.table.table_name)
|
|
438
461
|
save_method = write_op.table.save_method
|
|
@@ -448,9 +471,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
448
471
|
if len(write_op.table.table_name) == 0:
|
|
449
472
|
dbtable_name = write_op.options.get("dbtable", "")
|
|
450
473
|
if len(dbtable_name) == 0:
|
|
451
|
-
|
|
474
|
+
exception = SnowparkConnectNotImplementedError(
|
|
452
475
|
"Save command is not supported without a table name"
|
|
453
476
|
)
|
|
477
|
+
attach_custom_error_code(
|
|
478
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
479
|
+
)
|
|
480
|
+
raise exception
|
|
454
481
|
else:
|
|
455
482
|
snowpark_table_name = _spark_to_snowflake(dbtable_name)
|
|
456
483
|
|
|
@@ -468,9 +495,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
468
495
|
"NORMAL",
|
|
469
496
|
"TABLE",
|
|
470
497
|
):
|
|
471
|
-
|
|
498
|
+
exception = AnalysisException(
|
|
472
499
|
f"Table {snowpark_table_name} is not a FDN table"
|
|
473
500
|
)
|
|
501
|
+
attach_custom_error_code(
|
|
502
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
503
|
+
)
|
|
504
|
+
raise exception
|
|
474
505
|
write_mode = "truncate"
|
|
475
506
|
_validate_schema_and_get_writer(
|
|
476
507
|
input_df,
|
|
@@ -494,9 +525,13 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
494
525
|
"NORMAL",
|
|
495
526
|
"TABLE",
|
|
496
527
|
):
|
|
497
|
-
|
|
528
|
+
exception = AnalysisException(
|
|
498
529
|
f"Table {snowpark_table_name} is not a FDN table"
|
|
499
530
|
)
|
|
531
|
+
attach_custom_error_code(
|
|
532
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
533
|
+
)
|
|
534
|
+
raise exception
|
|
500
535
|
|
|
501
536
|
_validate_schema_and_get_writer(
|
|
502
537
|
input_df,
|
|
@@ -528,9 +563,11 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
528
563
|
column_order=_column_order_for_write,
|
|
529
564
|
)
|
|
530
565
|
else:
|
|
531
|
-
|
|
566
|
+
exception = SnowparkConnectNotImplementedError(
|
|
532
567
|
f"Save command not supported: {save_method}"
|
|
533
568
|
)
|
|
569
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
570
|
+
raise exception
|
|
534
571
|
|
|
535
572
|
|
|
536
573
|
def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
@@ -555,9 +592,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
555
592
|
session: snowpark.Session = get_or_create_snowpark_session()
|
|
556
593
|
|
|
557
594
|
if write_op.table_name is None or write_op.table_name == "":
|
|
558
|
-
|
|
595
|
+
exception = SnowparkConnectNotImplementedError(
|
|
559
596
|
"Write operation V2 only support table writing now"
|
|
560
597
|
)
|
|
598
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
599
|
+
raise exception
|
|
561
600
|
|
|
562
601
|
if write_op.provider.lower() == "iceberg":
|
|
563
602
|
match write_op.mode:
|
|
@@ -566,9 +605,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
566
605
|
snowpark_table_name, session
|
|
567
606
|
)
|
|
568
607
|
if isinstance(table_schema_or_error, DataType): # Table exists
|
|
569
|
-
|
|
608
|
+
exception = AnalysisException(
|
|
570
609
|
f"Table {snowpark_table_name} already exists"
|
|
571
610
|
)
|
|
611
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
612
|
+
raise exception
|
|
572
613
|
create_iceberg_table(
|
|
573
614
|
snowpark_table_name=snowpark_table_name,
|
|
574
615
|
location=write_op.table_properties.get("location"),
|
|
@@ -587,16 +628,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
587
628
|
snowpark_table_name, session
|
|
588
629
|
)
|
|
589
630
|
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
590
|
-
|
|
631
|
+
exception = AnalysisException(
|
|
591
632
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
592
633
|
)
|
|
634
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
635
|
+
raise exception
|
|
593
636
|
if get_table_type(snowpark_table_name, session) not in (
|
|
594
637
|
"ICEBERG",
|
|
595
638
|
"TABLE",
|
|
596
639
|
):
|
|
597
|
-
|
|
640
|
+
exception = AnalysisException(
|
|
598
641
|
f"Table {snowpark_table_name} is not an iceberg table"
|
|
599
642
|
)
|
|
643
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
644
|
+
raise exception
|
|
600
645
|
_validate_schema_and_get_writer(
|
|
601
646
|
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
602
647
|
).saveAsTable(
|
|
@@ -614,13 +659,19 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
614
659
|
"ICEBERG",
|
|
615
660
|
"TABLE",
|
|
616
661
|
):
|
|
617
|
-
|
|
662
|
+
exception = AnalysisException(
|
|
618
663
|
f"Table {snowpark_table_name} is not an iceberg table"
|
|
619
664
|
)
|
|
665
|
+
attach_custom_error_code(
|
|
666
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
667
|
+
)
|
|
668
|
+
raise exception
|
|
620
669
|
else:
|
|
621
|
-
|
|
670
|
+
exception = AnalysisException(
|
|
622
671
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
623
672
|
)
|
|
673
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
674
|
+
raise exception
|
|
624
675
|
_validate_schema_and_get_writer(
|
|
625
676
|
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
626
677
|
).saveAsTable(
|
|
@@ -641,9 +692,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
641
692
|
mode="replace",
|
|
642
693
|
)
|
|
643
694
|
else:
|
|
644
|
-
|
|
695
|
+
exception = AnalysisException(
|
|
645
696
|
f"Table {snowpark_table_name} does not exist"
|
|
646
697
|
)
|
|
698
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
699
|
+
raise exception
|
|
647
700
|
_validate_schema_and_get_writer(
|
|
648
701
|
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
649
702
|
).saveAsTable(
|
|
@@ -667,9 +720,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
667
720
|
column_order=_column_order_for_write,
|
|
668
721
|
)
|
|
669
722
|
case _:
|
|
670
|
-
|
|
723
|
+
exception = SnowparkConnectNotImplementedError(
|
|
671
724
|
f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
|
|
672
725
|
)
|
|
726
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
727
|
+
raise exception
|
|
673
728
|
else:
|
|
674
729
|
match write_op.mode:
|
|
675
730
|
case commands_proto.WriteOperationV2.MODE_CREATE:
|
|
@@ -685,16 +740,20 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
685
740
|
snowpark_table_name, session
|
|
686
741
|
)
|
|
687
742
|
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
688
|
-
|
|
743
|
+
exception = AnalysisException(
|
|
689
744
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
690
745
|
)
|
|
746
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
747
|
+
raise exception
|
|
691
748
|
if get_table_type(snowpark_table_name, session) not in (
|
|
692
749
|
"NORMAL",
|
|
693
750
|
"TABLE",
|
|
694
751
|
):
|
|
695
|
-
|
|
752
|
+
exception = AnalysisException(
|
|
696
753
|
f"Table {snowpark_table_name} is not a FDN table"
|
|
697
754
|
)
|
|
755
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
756
|
+
raise exception
|
|
698
757
|
_validate_schema_and_get_writer(
|
|
699
758
|
input_df, "append", snowpark_table_name, table_schema_or_error
|
|
700
759
|
).saveAsTable(
|
|
@@ -712,13 +771,19 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
712
771
|
"NORMAL",
|
|
713
772
|
"TABLE",
|
|
714
773
|
):
|
|
715
|
-
|
|
774
|
+
exception = AnalysisException(
|
|
716
775
|
f"Table {snowpark_table_name} is not a FDN table"
|
|
717
776
|
)
|
|
777
|
+
attach_custom_error_code(
|
|
778
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
779
|
+
)
|
|
780
|
+
raise exception
|
|
718
781
|
else:
|
|
719
|
-
|
|
782
|
+
exception = AnalysisException(
|
|
720
783
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
721
784
|
)
|
|
785
|
+
attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
|
|
786
|
+
raise exception
|
|
722
787
|
_validate_schema_and_get_writer(
|
|
723
788
|
input_df, "truncate", snowpark_table_name, table_schema_or_error
|
|
724
789
|
).saveAsTable(
|
|
@@ -731,9 +796,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
731
796
|
snowpark_table_name, session
|
|
732
797
|
)
|
|
733
798
|
if not isinstance(table_schema_or_error, DataType): # Table not exists
|
|
734
|
-
|
|
799
|
+
exception = AnalysisException(
|
|
735
800
|
f"Table {snowpark_table_name} does not exist"
|
|
736
801
|
)
|
|
802
|
+
attach_custom_error_code(exception, ErrorCodes.TABLE_NOT_FOUND)
|
|
803
|
+
raise exception
|
|
737
804
|
_validate_schema_and_get_writer(
|
|
738
805
|
input_df, "replace", snowpark_table_name, table_schema_or_error
|
|
739
806
|
).saveAsTable(
|
|
@@ -750,9 +817,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
750
817
|
column_order=_column_order_for_write,
|
|
751
818
|
)
|
|
752
819
|
case _:
|
|
753
|
-
|
|
820
|
+
exception = SnowparkConnectNotImplementedError(
|
|
754
821
|
f"Write mode {commands_proto.WriteOperationV2.Mode.Name(write_op.mode)} is not supported"
|
|
755
822
|
)
|
|
823
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
824
|
+
raise exception
|
|
756
825
|
|
|
757
826
|
|
|
758
827
|
def _get_table_schema_or_error(
|
|
@@ -764,6 +833,20 @@ def _get_table_schema_or_error(
|
|
|
764
833
|
return e
|
|
765
834
|
|
|
766
835
|
|
|
836
|
+
def _get_writer_for_table_creation(df: snowpark.DataFrame) -> snowpark.DataFrameWriter:
|
|
837
|
+
# When creating a new table, if case sensitivity is not enabled, we need to rename the columns
|
|
838
|
+
# to upper case so they are case-insensitive in Snowflake.
|
|
839
|
+
if auto_uppercase_column_identifiers():
|
|
840
|
+
for field in df.schema.fields:
|
|
841
|
+
col_name = field.name
|
|
842
|
+
# Uppercasing is fine, regardless of whether the original name was quoted or not.
|
|
843
|
+
# In Snowflake these are equivalent "COL" == COL == col == coL
|
|
844
|
+
uppercased_name = col_name.upper()
|
|
845
|
+
if col_name != uppercased_name:
|
|
846
|
+
df = df.withColumnRenamed(col_name, uppercased_name)
|
|
847
|
+
return df.write
|
|
848
|
+
|
|
849
|
+
|
|
767
850
|
def _validate_schema_and_get_writer(
|
|
768
851
|
input_df: snowpark.DataFrame,
|
|
769
852
|
write_mode: str,
|
|
@@ -774,7 +857,7 @@ def _validate_schema_and_get_writer(
|
|
|
774
857
|
"replace",
|
|
775
858
|
"create_or_replace",
|
|
776
859
|
):
|
|
777
|
-
return input_df
|
|
860
|
+
return _get_writer_for_table_creation(input_df)
|
|
778
861
|
|
|
779
862
|
table_schema = None
|
|
780
863
|
if table_schema_or_error is not None:
|
|
@@ -783,6 +866,9 @@ def _validate_schema_and_get_writer(
|
|
|
783
866
|
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
784
867
|
pass
|
|
785
868
|
else:
|
|
869
|
+
attach_custom_error_code(
|
|
870
|
+
table_schema_or_error, ErrorCodes.INTERNAL_ERROR
|
|
871
|
+
)
|
|
786
872
|
raise table_schema_or_error
|
|
787
873
|
elif isinstance(table_schema_or_error, DataType):
|
|
788
874
|
table_schema = table_schema_or_error
|
|
@@ -796,16 +882,17 @@ def _validate_schema_and_get_writer(
|
|
|
796
882
|
if "SQL compilation error" in msg and "does not exist" in msg:
|
|
797
883
|
pass
|
|
798
884
|
else:
|
|
885
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
799
886
|
raise e
|
|
800
887
|
|
|
801
888
|
if table_schema is None:
|
|
802
889
|
# If table does not exist, we can skip the schema validation
|
|
803
|
-
return input_df
|
|
890
|
+
return _get_writer_for_table_creation(input_df)
|
|
804
891
|
|
|
805
892
|
_validate_schema_for_append(table_schema, input_df.schema, snowpark_table_name)
|
|
806
893
|
|
|
807
894
|
# if table exists and case sensitivity is not enabled, we need to rename the columns to match existing table schema
|
|
808
|
-
if
|
|
895
|
+
if auto_uppercase_column_identifiers():
|
|
809
896
|
|
|
810
897
|
for field in input_df.schema.fields:
|
|
811
898
|
# Find the matching field in the table schema (case-insensitive)
|
|
@@ -815,8 +902,8 @@ def _validate_schema_and_get_writer(
|
|
|
815
902
|
(
|
|
816
903
|
f
|
|
817
904
|
for f in table_schema.fields
|
|
818
|
-
if unquote_if_quoted(f.name).
|
|
819
|
-
== unquote_if_quoted(col_name).
|
|
905
|
+
if unquote_if_quoted(f.name).upper()
|
|
906
|
+
== unquote_if_quoted(col_name).upper()
|
|
820
907
|
),
|
|
821
908
|
None,
|
|
822
909
|
)
|
|
@@ -851,21 +938,25 @@ def _validate_schema_for_append(
|
|
|
851
938
|
case (StructType() as table_struct, StructType() as data_struct):
|
|
852
939
|
|
|
853
940
|
def _comparable_col_name(col: str) -> str:
|
|
854
|
-
name = col if
|
|
941
|
+
name = col.upper() if auto_uppercase_column_identifiers() else col
|
|
855
942
|
if compare_structs:
|
|
856
943
|
return name
|
|
857
944
|
else:
|
|
858
945
|
return unquote_if_quoted(name)
|
|
859
946
|
|
|
860
947
|
def invalid_struct_schema():
|
|
861
|
-
|
|
948
|
+
exception = AnalysisException(
|
|
862
949
|
f"Cannot resolve columns for the existing table {snowpark_table_name} ({table_schema.simple_string()}) with the data schema ({data_schema.simple_string()})."
|
|
863
950
|
)
|
|
951
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
952
|
+
raise exception
|
|
864
953
|
|
|
865
954
|
if len(table_struct.fields) != len(data_struct.fields):
|
|
866
|
-
|
|
955
|
+
exception = AnalysisException(
|
|
867
956
|
f"The column number of the existing table {snowpark_table_name} ({table_schema.simple_string()}) doesn't match the data schema ({data_schema.simple_string()}).)"
|
|
868
957
|
)
|
|
958
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
959
|
+
raise exception
|
|
869
960
|
|
|
870
961
|
table_field_names = {
|
|
871
962
|
_comparable_col_name(field.name) for field in table_struct.fields
|
|
@@ -928,9 +1019,11 @@ def _validate_schema_for_append(
|
|
|
928
1019
|
case (DateType(), _) if isinstance(data_schema, (DateType, TimestampType)):
|
|
929
1020
|
return
|
|
930
1021
|
case (_, _):
|
|
931
|
-
|
|
1022
|
+
exception = AnalysisException(
|
|
932
1023
|
f"[INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST] Cannot write incompatible data for the table {snowpark_table_name}: Cannot safely cast {data_schema.simple_string()} to {table_schema.simple_string()}"
|
|
933
1024
|
)
|
|
1025
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
1026
|
+
raise exception
|
|
934
1027
|
|
|
935
1028
|
|
|
936
1029
|
def create_iceberg_table(
|
|
@@ -970,9 +1063,11 @@ def create_iceberg_table(
|
|
|
970
1063
|
case "create_or_replace":
|
|
971
1064
|
create_sql = "CREATE OR REPLACE"
|
|
972
1065
|
case _:
|
|
973
|
-
|
|
1066
|
+
exception = SnowparkConnectNotImplementedError(
|
|
974
1067
|
f"Write mode {mode} is not supported for iceberg table"
|
|
975
1068
|
)
|
|
1069
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1070
|
+
raise exception
|
|
976
1071
|
sql = f"""
|
|
977
1072
|
{create_sql} ICEBERG TABLE {snowpark_table_name} ({",".join(table_schema)})
|
|
978
1073
|
CATALOG = 'SNOWFLAKE'
|
|
@@ -988,20 +1083,35 @@ def rewrite_df(input_df: snowpark.DataFrame, source: str) -> snowpark.DataFrame:
|
|
|
988
1083
|
json: construct the dataframe to 1 column in json format
|
|
989
1084
|
1. Append columns which represents the column name
|
|
990
1085
|
2. Use object_construct to aggregate the dataframe into 1 column
|
|
991
|
-
|
|
1086
|
+
csv:
|
|
1087
|
+
Use "" to replace empty string
|
|
992
1088
|
"""
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1089
|
+
match source:
|
|
1090
|
+
case "json":
|
|
1091
|
+
rand_salt = random_string(10, "_")
|
|
1092
|
+
rewritten_df = input_df.with_columns(
|
|
1093
|
+
[co + rand_salt for co in input_df.columns],
|
|
1094
|
+
[lit(unquote_if_quoted(co)) for co in input_df.columns],
|
|
1095
|
+
)
|
|
1096
|
+
construct_key_values = []
|
|
1097
|
+
for co in input_df.columns:
|
|
1098
|
+
construct_key_values.append(col(co + rand_salt))
|
|
1099
|
+
construct_key_values.append(col(co))
|
|
1100
|
+
return rewritten_df.select(object_construct(*construct_key_values))
|
|
1101
|
+
case "csv":
|
|
1102
|
+
new_cols = []
|
|
1103
|
+
for co in input_df.columns:
|
|
1104
|
+
if isinstance(input_df.schema[co].datatype, StringType):
|
|
1105
|
+
new_col = col(co)
|
|
1106
|
+
new_col = when(
|
|
1107
|
+
new_col.isNotNull() & (new_col == ""), lit('""')
|
|
1108
|
+
).otherwise(new_col)
|
|
1109
|
+
new_cols.append(new_col.alias(co))
|
|
1110
|
+
else:
|
|
1111
|
+
new_cols.append(col(co))
|
|
1112
|
+
return input_df.select(new_cols)
|
|
1113
|
+
case _:
|
|
1114
|
+
return input_df
|
|
1005
1115
|
|
|
1006
1116
|
|
|
1007
1117
|
def handle_column_names(
|
|
@@ -1079,9 +1189,11 @@ def store_files_locally(
|
|
|
1079
1189
|
|
|
1080
1190
|
def _truncate_directory(directory_path: Path) -> None:
|
|
1081
1191
|
if not directory_path.exists():
|
|
1082
|
-
|
|
1192
|
+
exception = FileNotFoundError(
|
|
1083
1193
|
f"The specified directory {directory_path} does not exist."
|
|
1084
1194
|
)
|
|
1195
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
1196
|
+
raise exception
|
|
1085
1197
|
# Iterate over all the files and directories in the specified directory
|
|
1086
1198
|
for file in directory_path.iterdir():
|
|
1087
1199
|
# Check if it is a file or directory and remove it
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
from snowflake import snowpark
|
|
6
6
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
7
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
8
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
7
9
|
from snowflake.snowpark_connect.relation.read.map_read_jdbc import (
|
|
8
10
|
close_connection,
|
|
9
11
|
create_connection,
|
|
@@ -35,7 +37,9 @@ def map_write_jdbc(
|
|
|
35
37
|
dbtable = None
|
|
36
38
|
|
|
37
39
|
if dbtable is None:
|
|
38
|
-
|
|
40
|
+
exception = ValueError("Include dbtable is required option")
|
|
41
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
42
|
+
raise exception
|
|
39
43
|
|
|
40
44
|
try:
|
|
41
45
|
JdbcDataFrameWriter(session, jdbc_options).jdbc_write_dbapi(
|
|
@@ -46,4 +50,6 @@ def map_write_jdbc(
|
|
|
46
50
|
write_mode=write_mode,
|
|
47
51
|
)
|
|
48
52
|
except Exception as e:
|
|
49
|
-
|
|
53
|
+
exception = Exception(f"Error accessing JDBC datasource for write: {e}")
|
|
54
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
55
|
+
raise exception
|
|
@@ -5,6 +5,8 @@ import pathlib
|
|
|
5
5
|
import threading
|
|
6
6
|
import time
|
|
7
7
|
|
|
8
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
9
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
8
10
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
9
11
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
10
12
|
|
|
@@ -119,9 +121,11 @@ def wait_for_resource_initialization() -> None:
|
|
|
119
121
|
logger.error(
|
|
120
122
|
"Resource initialization failed - initializer thread has been running for over 300 seconds."
|
|
121
123
|
)
|
|
122
|
-
|
|
124
|
+
exception = RuntimeError(
|
|
123
125
|
"Resource initialization failed - initializer thread has been running for over 300 seconds."
|
|
124
126
|
)
|
|
127
|
+
attach_custom_error_code(exception, ErrorCodes.RESOURCE_INITIALIZATION_FAILED)
|
|
128
|
+
raise exception
|
|
125
129
|
|
|
126
130
|
|
|
127
131
|
def set_upload_jars(upload: bool) -> None:
|