snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +150 -25
- snowflake/snowpark_connect/config.py +54 -16
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +48 -4
- snowflake/snowpark_connect/expression/map_extension.py +25 -5
- snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
- snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
- snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +66 -4
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
- snowflake/snowpark_connect/relation/map_extension.py +28 -8
- snowflake/snowpark_connect/relation/map_join.py +21 -10
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +36 -9
- snowflake/snowpark_connect/relation/map_sql.py +91 -24
- snowflake/snowpark_connect/relation/map_stats.py +25 -6
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +24 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/utils.py +19 -2
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +146 -63
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +72 -19
- snowflake/snowpark_connect/type_mapping.py +54 -17
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +5 -2
- snowflake/snowpark_connect/utils/telemetry.py +81 -18
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +81 -78
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
|
@@ -46,6 +46,8 @@ from snowflake.snowpark_connect.config import (
|
|
|
46
46
|
unset_config_param,
|
|
47
47
|
)
|
|
48
48
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
49
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
50
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
49
51
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
50
52
|
ColumnNameMap,
|
|
51
53
|
map_single_column_expression,
|
|
@@ -257,7 +259,8 @@ def _create_table_as_select(logical_plan, mode: str) -> None:
|
|
|
257
259
|
|
|
258
260
|
|
|
259
261
|
def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
|
|
260
|
-
# Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase"
|
|
262
|
+
# Column names will be uppercased according to "snowpark.connect.sql.identifiers.auto-uppercase"
|
|
263
|
+
# if present, or to "spark.sql.caseSensitive".
|
|
261
264
|
# and struct fields will be left as is. This should allow users to use the same names
|
|
262
265
|
# in spark and Snowflake in most cases.
|
|
263
266
|
if is_column:
|
|
@@ -377,14 +380,18 @@ def _get_assignments_from_action(
|
|
|
377
380
|
or action.getClass().getSimpleName() == "UpdateStarAction"
|
|
378
381
|
):
|
|
379
382
|
if len(column_mapping_source.columns) != len(column_mapping_target.columns):
|
|
380
|
-
|
|
383
|
+
exception = ValueError(
|
|
381
384
|
"source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
|
|
382
385
|
)
|
|
386
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
387
|
+
raise exception
|
|
383
388
|
for i, col in enumerate(column_mapping_target.columns):
|
|
384
389
|
if assignments.get(col.snowpark_name) is not None:
|
|
385
|
-
|
|
390
|
+
exception = SnowparkConnectNotImplementedError(
|
|
386
391
|
"UpdateStarAction or InsertStarAction is not supported with duplicate columns."
|
|
387
392
|
)
|
|
393
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
394
|
+
raise exception
|
|
388
395
|
assignments[col.snowpark_name] = snowpark_fn.col(
|
|
389
396
|
column_mapping_source.columns[i].snowpark_name
|
|
390
397
|
)
|
|
@@ -489,9 +496,11 @@ def map_sql_to_pandas_df(
|
|
|
489
496
|
snowflake_sql = f"ALTER TABLE {table_name} ALTER COLUMN {column_name} {alter_clause}"
|
|
490
497
|
session.sql(snowflake_sql).collect()
|
|
491
498
|
else:
|
|
492
|
-
|
|
499
|
+
exception = ValueError(
|
|
493
500
|
f"No alter operations found in AlterColumn logical plan for table {table_name}, column {column_name}"
|
|
494
501
|
)
|
|
502
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
|
|
503
|
+
raise exception
|
|
495
504
|
case "CreateNamespace":
|
|
496
505
|
name = get_relation_identifier_name(logical_plan.name(), True)
|
|
497
506
|
previous_name = session.connection.schema
|
|
@@ -603,9 +612,11 @@ def map_sql_to_pandas_df(
|
|
|
603
612
|
)
|
|
604
613
|
temp_view = get_temp_view(snowflake_view_name)
|
|
605
614
|
if temp_view is not None and not logical_plan.replace():
|
|
606
|
-
|
|
615
|
+
exception = AnalysisException(
|
|
607
616
|
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{spark_view_name}` because it already exists."
|
|
608
617
|
)
|
|
618
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
619
|
+
raise exception
|
|
609
620
|
else:
|
|
610
621
|
unregister_temp_view(
|
|
611
622
|
spark_to_sf_single_id_with_unquoting(spark_view_name)
|
|
@@ -625,11 +636,13 @@ def map_sql_to_pandas_df(
|
|
|
625
636
|
df_container = execute_logical_plan(logical_plan.query())
|
|
626
637
|
df = df_container.dataframe
|
|
627
638
|
if _accessing_temp_object.get():
|
|
628
|
-
|
|
639
|
+
exception = AnalysisException(
|
|
629
640
|
f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
|
|
630
641
|
"of the type VIEW because it references to a temporary object of the type VIEW. Please "
|
|
631
642
|
f"make the temporary object persistent, or make the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` temporary."
|
|
632
643
|
)
|
|
644
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
645
|
+
raise exception
|
|
633
646
|
|
|
634
647
|
name = get_relation_identifier_name(logical_plan.child())
|
|
635
648
|
comment = logical_plan.comment()
|
|
@@ -661,6 +674,7 @@ def map_sql_to_pandas_df(
|
|
|
661
674
|
snowpark_column_names=df_container.column_map.get_snowpark_columns(),
|
|
662
675
|
parent_column_name_map=df_container.column_map,
|
|
663
676
|
)
|
|
677
|
+
|
|
664
678
|
is_global = isinstance(
|
|
665
679
|
logical_plan.viewType(),
|
|
666
680
|
jpype.JClass(
|
|
@@ -757,9 +771,11 @@ def map_sql_to_pandas_df(
|
|
|
757
771
|
del session._udtfs[func_name]
|
|
758
772
|
else:
|
|
759
773
|
if not logical_plan.ifExists():
|
|
760
|
-
|
|
774
|
+
exception = ValueError(
|
|
761
775
|
f"Function {func_name} not found among registered UDFs or UDTFs."
|
|
762
776
|
)
|
|
777
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
778
|
+
raise exception
|
|
763
779
|
if snowpark_name != "":
|
|
764
780
|
argument_string = f"({', '.join(convert_sp_to_sf_type(arg) for arg in input_types)})"
|
|
765
781
|
session.sql(
|
|
@@ -832,17 +848,25 @@ def map_sql_to_pandas_df(
|
|
|
832
848
|
rows = session.sql(final_sql).collect()
|
|
833
849
|
else:
|
|
834
850
|
# TODO: Support other logical plans
|
|
835
|
-
|
|
851
|
+
exception = SnowparkConnectNotImplementedError(
|
|
836
852
|
f"{logical_plan_name} is not supported yet with EXPLAIN."
|
|
837
853
|
)
|
|
854
|
+
attach_custom_error_code(
|
|
855
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
856
|
+
)
|
|
857
|
+
raise exception
|
|
838
858
|
case "InsertIntoStatement":
|
|
839
859
|
df_container = execute_logical_plan(logical_plan.query())
|
|
840
860
|
df = df_container.dataframe
|
|
841
861
|
queries = df.queries["queries"]
|
|
842
862
|
if len(queries) != 1:
|
|
843
|
-
|
|
863
|
+
exception = SnowparkConnectNotImplementedError(
|
|
844
864
|
f"Unexpected number of queries: {len(queries)}"
|
|
845
865
|
)
|
|
866
|
+
attach_custom_error_code(
|
|
867
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
868
|
+
)
|
|
869
|
+
raise exception
|
|
846
870
|
|
|
847
871
|
name = get_relation_identifier_name(logical_plan.table(), True)
|
|
848
872
|
|
|
@@ -996,9 +1020,13 @@ def map_sql_to_pandas_df(
|
|
|
996
1020
|
clauses.append(when_not_matched(condition).insert(assignments))
|
|
997
1021
|
|
|
998
1022
|
if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
|
|
999
|
-
|
|
1023
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1000
1024
|
"Snowflake does not support 'not matched by source' actions in MERGE statements."
|
|
1001
1025
|
)
|
|
1026
|
+
attach_custom_error_code(
|
|
1027
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
1028
|
+
)
|
|
1029
|
+
raise exception
|
|
1002
1030
|
|
|
1003
1031
|
target_table.merge(source_df, merge_condition_typed_col.col, clauses)
|
|
1004
1032
|
case "DeleteFromTable":
|
|
@@ -1036,10 +1064,12 @@ def map_sql_to_pandas_df(
|
|
|
1036
1064
|
case "UpdateTable":
|
|
1037
1065
|
# Databricks/Delta-specific extension not supported by SAS.
|
|
1038
1066
|
# Provide an actionable, clear error.
|
|
1039
|
-
|
|
1067
|
+
exception = UnsupportedOperationException(
|
|
1040
1068
|
"[UNSUPPORTED_SQL_EXTENSION] The UPDATE TABLE command failed.\n"
|
|
1041
1069
|
+ "Reason: This command is a platform-specific SQL extension and is not part of the standard Apache Spark specification that this interface uses."
|
|
1042
1070
|
)
|
|
1071
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1072
|
+
raise exception
|
|
1043
1073
|
case "RenameColumn":
|
|
1044
1074
|
full_table_identifier = get_relation_identifier_name(
|
|
1045
1075
|
logical_plan.table(), True
|
|
@@ -1049,11 +1079,15 @@ def map_sql_to_pandas_df(
|
|
|
1049
1079
|
if not check_table_supports_operation(
|
|
1050
1080
|
full_table_identifier, "rename_column"
|
|
1051
1081
|
):
|
|
1052
|
-
|
|
1082
|
+
exception = AnalysisException(
|
|
1053
1083
|
f"ALTER TABLE RENAME COLUMN is not supported for table '{full_table_identifier}'. "
|
|
1054
1084
|
f"This table was created as a v1 table with a data source that doesn't support column renaming. "
|
|
1055
1085
|
f"To enable this operation, set 'snowpark.connect.enable_snowflake_extension_behavior' to 'true'."
|
|
1056
1086
|
)
|
|
1087
|
+
attach_custom_error_code(
|
|
1088
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
1089
|
+
)
|
|
1090
|
+
raise exception
|
|
1057
1091
|
|
|
1058
1092
|
column_obj = logical_plan.column()
|
|
1059
1093
|
old_column_name = ".".join(
|
|
@@ -1094,6 +1128,7 @@ def map_sql_to_pandas_df(
|
|
|
1094
1128
|
f"ALTER ICEBERG TABLE {name} RENAME TO {new_name}"
|
|
1095
1129
|
).collect()
|
|
1096
1130
|
else:
|
|
1131
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
1097
1132
|
raise e
|
|
1098
1133
|
case "ReplaceTableAsSelect":
|
|
1099
1134
|
_create_table_as_select(logical_plan, mode="overwrite")
|
|
@@ -1113,9 +1148,11 @@ def map_sql_to_pandas_df(
|
|
|
1113
1148
|
name = _spark_to_snowflake(logical_plan.namespace())
|
|
1114
1149
|
session.sql(f"USE SCHEMA {name}").collect()
|
|
1115
1150
|
case "SetNamespaceLocation" | "SetNamespaceProperties":
|
|
1116
|
-
|
|
1151
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1117
1152
|
"Altering databases is not currently supported."
|
|
1118
1153
|
)
|
|
1154
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1155
|
+
raise exception
|
|
1119
1156
|
case "ShowCreateTable":
|
|
1120
1157
|
# Handle SHOW CREATE TABLE command
|
|
1121
1158
|
# Spark: SHOW CREATE TABLE table_name
|
|
@@ -1137,16 +1174,24 @@ def map_sql_to_pandas_df(
|
|
|
1137
1174
|
case "ShowNamespaces":
|
|
1138
1175
|
name = get_relation_identifier_name(logical_plan.namespace(), True)
|
|
1139
1176
|
if name:
|
|
1140
|
-
|
|
1177
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1141
1178
|
"'IN' clause is not supported while listing databases"
|
|
1142
1179
|
)
|
|
1180
|
+
attach_custom_error_code(
|
|
1181
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
1182
|
+
)
|
|
1183
|
+
raise exception
|
|
1143
1184
|
if logical_plan.pattern().isDefined():
|
|
1144
1185
|
# Snowflake SQL requires a "%" pattern.
|
|
1145
1186
|
# Snowpark catalog requires a regex and does client-side filtering.
|
|
1146
1187
|
# Spark, however, uses a regex-like pattern that treats '*' and '|' differently.
|
|
1147
|
-
|
|
1188
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1148
1189
|
"'LIKE' clause is not supported while listing databases"
|
|
1149
1190
|
)
|
|
1191
|
+
attach_custom_error_code(
|
|
1192
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
1193
|
+
)
|
|
1194
|
+
raise exception
|
|
1150
1195
|
rows = session.sql("SHOW SCHEMAS").collect()
|
|
1151
1196
|
if not rows:
|
|
1152
1197
|
rows = None
|
|
@@ -1247,9 +1292,13 @@ def map_sql_to_pandas_df(
|
|
|
1247
1292
|
spark_to_sf_single_id(str(db_and_table_name[0])).casefold()
|
|
1248
1293
|
!= db_name.casefold()
|
|
1249
1294
|
):
|
|
1250
|
-
|
|
1295
|
+
exception = AnalysisException(
|
|
1251
1296
|
f"database name is not matching:{db_name} and {db_and_table_name[0]}"
|
|
1252
1297
|
)
|
|
1298
|
+
attach_custom_error_code(
|
|
1299
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
1300
|
+
)
|
|
1301
|
+
raise exception
|
|
1253
1302
|
|
|
1254
1303
|
# Just table name
|
|
1255
1304
|
snowflake_cmd = f"SHOW COLUMNS IN TABLE {table_name}"
|
|
@@ -1471,7 +1520,7 @@ def map_logical_plan_relation(
|
|
|
1471
1520
|
|
|
1472
1521
|
# Extract aliases from the aggregate expressions (SELECT clause)
|
|
1473
1522
|
alias_map = {}
|
|
1474
|
-
for agg_expr in as_java_list(rel.aggregateExpressions()):
|
|
1523
|
+
for agg_expr in list(as_java_list(rel.aggregateExpressions())):
|
|
1475
1524
|
if str(agg_expr.getClass().getSimpleName()) == "Alias":
|
|
1476
1525
|
alias_map[str(agg_expr.name())] = agg_expr.child()
|
|
1477
1526
|
|
|
@@ -1534,9 +1583,13 @@ def map_logical_plan_relation(
|
|
|
1534
1583
|
group_type = snowflake_proto.Aggregate.GROUP_TYPE_CUBE
|
|
1535
1584
|
case "GroupingSets":
|
|
1536
1585
|
if not exp.userGivenGroupByExprs().isEmpty():
|
|
1537
|
-
|
|
1586
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1538
1587
|
"User-defined group by expressions are not supported"
|
|
1539
1588
|
)
|
|
1589
|
+
attach_custom_error_code(
|
|
1590
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
1591
|
+
)
|
|
1592
|
+
raise exception
|
|
1540
1593
|
group_type = (
|
|
1541
1594
|
snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS
|
|
1542
1595
|
)
|
|
@@ -1552,9 +1605,13 @@ def map_logical_plan_relation(
|
|
|
1552
1605
|
|
|
1553
1606
|
if group_type != snowflake_proto.Aggregate.GROUP_TYPE_GROUPBY:
|
|
1554
1607
|
if len(group_expression_list) != 1:
|
|
1555
|
-
|
|
1608
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1556
1609
|
"Multiple grouping expressions are not supported"
|
|
1557
1610
|
)
|
|
1611
|
+
attach_custom_error_code(
|
|
1612
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
1613
|
+
)
|
|
1614
|
+
raise exception
|
|
1558
1615
|
if group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
|
|
1559
1616
|
group_expression_list = [] # TODO: exp.userGivenGroupByExprs()?
|
|
1560
1617
|
else:
|
|
@@ -1786,12 +1843,14 @@ def map_logical_plan_relation(
|
|
|
1786
1843
|
|
|
1787
1844
|
# Check for multi-column UNPIVOT which Snowflake doesn't support
|
|
1788
1845
|
if len(value_column_names) > 1:
|
|
1789
|
-
|
|
1846
|
+
exception = UnsupportedOperationException(
|
|
1790
1847
|
f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
|
|
1791
1848
|
f"multiple value columns ({', '.join(value_column_names)}) in a single operation. "
|
|
1792
1849
|
f"Workaround: Use separate UNPIVOT operations for each value column and join the results, "
|
|
1793
1850
|
f"or restructure your query to unpivot columns individually."
|
|
1794
1851
|
)
|
|
1852
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1853
|
+
raise exception
|
|
1795
1854
|
|
|
1796
1855
|
values = []
|
|
1797
1856
|
values_groups = as_java_list(rel.values().get())
|
|
@@ -1799,11 +1858,13 @@ def map_logical_plan_relation(
|
|
|
1799
1858
|
# Check if we have multi-column groups in the IN clause
|
|
1800
1859
|
if values_groups and len(as_java_list(values_groups[0])) > 1:
|
|
1801
1860
|
group_sizes = [len(as_java_list(group)) for group in values_groups]
|
|
1802
|
-
|
|
1861
|
+
exception = UnsupportedOperationException(
|
|
1803
1862
|
f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
|
|
1804
1863
|
f"multiple columns together in groups. Found groups with {max(group_sizes)} columns. "
|
|
1805
1864
|
f"Workaround: Unpivot each column separately and then join/union the results as needed."
|
|
1806
1865
|
)
|
|
1866
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1867
|
+
raise exception
|
|
1807
1868
|
|
|
1808
1869
|
for e1 in values_groups:
|
|
1809
1870
|
for e in as_java_list(e1):
|
|
@@ -1849,9 +1910,11 @@ def map_logical_plan_relation(
|
|
|
1849
1910
|
# Store the having condition in context and process the child aggregate
|
|
1850
1911
|
child_relation = rel.child()
|
|
1851
1912
|
if str(child_relation.getClass().getSimpleName()) != "Aggregate":
|
|
1852
|
-
|
|
1913
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1853
1914
|
"UnresolvedHaving can only be applied to Aggregate relations"
|
|
1854
1915
|
)
|
|
1916
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1917
|
+
raise exception
|
|
1855
1918
|
|
|
1856
1919
|
# Store having condition in a context variable for the Aggregate case to pick up
|
|
1857
1920
|
having_condition = map_logical_plan_expression(rel.havingCondition())
|
|
@@ -2176,7 +2239,7 @@ def map_logical_plan_relation(
|
|
|
2176
2239
|
function_name = rel.generator().name().toString()
|
|
2177
2240
|
func_arguments = [
|
|
2178
2241
|
map_logical_plan_expression(e)
|
|
2179
|
-
for e in as_java_list(rel.generator().children())
|
|
2242
|
+
for e in list(as_java_list(rel.generator().children()))
|
|
2180
2243
|
]
|
|
2181
2244
|
unresolved_fun_proto = expressions_proto.Expression.UnresolvedFunction(
|
|
2182
2245
|
function_name=function_name, arguments=func_arguments
|
|
@@ -2242,7 +2305,11 @@ def map_logical_plan_relation(
|
|
|
2242
2305
|
)
|
|
2243
2306
|
proto = generator_dataframe_proto
|
|
2244
2307
|
case other:
|
|
2245
|
-
|
|
2308
|
+
exception = SnowparkConnectNotImplementedError(
|
|
2309
|
+
f"Unimplemented relation: {other}"
|
|
2310
|
+
)
|
|
2311
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
2312
|
+
raise exception
|
|
2246
2313
|
|
|
2247
2314
|
proto.common.plan_id = plan_id
|
|
2248
2315
|
|
|
@@ -15,6 +15,8 @@ from snowflake import snowpark
|
|
|
15
15
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
16
16
|
from snowflake.snowpark_connect.config import get_boolean_session_config_param
|
|
17
17
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
18
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
19
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
18
20
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
19
21
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
20
22
|
|
|
@@ -99,9 +101,11 @@ def map_approx_quantile(
|
|
|
99
101
|
else ""
|
|
100
102
|
)
|
|
101
103
|
|
|
102
|
-
|
|
104
|
+
exception = AnalysisException(
|
|
103
105
|
f"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `{col_name}` cannot be resolved.{suggestion_text}"
|
|
104
106
|
)
|
|
107
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
108
|
+
raise exception
|
|
105
109
|
|
|
106
110
|
cols = input_container.column_map.get_snowpark_column_names_from_spark_column_names(
|
|
107
111
|
list(rel.approx_quantile.cols)
|
|
@@ -309,9 +313,28 @@ def map_freq_items(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
309
313
|
cols = input_container.column_map.get_snowpark_column_names_from_spark_column_names(
|
|
310
314
|
list(rel.freq_items.cols)
|
|
311
315
|
)
|
|
316
|
+
|
|
317
|
+
# handle empty DataFrame case
|
|
318
|
+
row_count = input_df.count()
|
|
319
|
+
|
|
320
|
+
for sp_col_name in cols:
|
|
321
|
+
spark_col_names.append(
|
|
322
|
+
f"{input_container.column_map.get_spark_column_name_from_snowpark_column_name(sp_col_name)}_freqItems"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if row_count == 0:
|
|
326
|
+
# If DataFrame is empty, return empty arrays for each column
|
|
327
|
+
empty_values = [[] for _ in cols]
|
|
328
|
+
approx_top_k_df = session.createDataFrame([empty_values], spark_col_names)
|
|
329
|
+
return DataFrameContainer.create_with_column_mapping(
|
|
330
|
+
dataframe=approx_top_k_df,
|
|
331
|
+
spark_column_names=spark_col_names,
|
|
332
|
+
snowpark_column_names=spark_col_names,
|
|
333
|
+
)
|
|
334
|
+
|
|
312
335
|
approx_top_k_df = input_df.select(
|
|
313
336
|
*[
|
|
314
|
-
fn.function("approx_top_k")(fn.col(col), round(
|
|
337
|
+
fn.function("approx_top_k")(fn.col(col), round(row_count / support))
|
|
315
338
|
for col in cols
|
|
316
339
|
]
|
|
317
340
|
)
|
|
@@ -330,10 +353,6 @@ def map_freq_items(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
330
353
|
for value in approx_top_k_values
|
|
331
354
|
]
|
|
332
355
|
|
|
333
|
-
for sp_col_name in cols:
|
|
334
|
-
spark_col_names.append(
|
|
335
|
-
f"{input_container.column_map.get_spark_column_name_from_snowpark_column_name(sp_col_name)}_freqItems"
|
|
336
|
-
)
|
|
337
356
|
approx_top_k_df = session.createDataFrame([filtered_values], spark_col_names)
|
|
338
357
|
|
|
339
358
|
return DataFrameContainer.create_with_column_mapping(
|
|
@@ -22,6 +22,8 @@ from snowflake.snowpark_connect.config import (
|
|
|
22
22
|
global_config,
|
|
23
23
|
)
|
|
24
24
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
25
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
26
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
25
27
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
26
28
|
map_single_column_expression,
|
|
27
29
|
)
|
|
@@ -163,17 +165,21 @@ def process_return_type(
|
|
|
163
165
|
else:
|
|
164
166
|
parsed_return = return_type
|
|
165
167
|
except ValueError as e:
|
|
166
|
-
|
|
168
|
+
exception = PythonException(
|
|
167
169
|
f"[UDTF_ARROW_TYPE_CAST_ERROR] Error parsing UDTF return type DDL: {e}"
|
|
168
170
|
)
|
|
171
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
172
|
+
raise exception
|
|
169
173
|
original_output_schema = proto_to_snowpark_type(parsed_return)
|
|
170
174
|
output_schema = proto_to_snowpark_type(parsed_return)
|
|
171
175
|
# Snowflake UDTF does not support MapType, so we convert it to VariantType.
|
|
172
176
|
output_schema = convert_maptype_to_variant(output_schema)
|
|
173
177
|
if not isinstance(output_schema, StructType):
|
|
174
|
-
|
|
178
|
+
exception = PySparkTypeError(
|
|
175
179
|
f"Invalid Python user-defined table function return type. Expect a struct type, but got {parsed_return}"
|
|
176
180
|
)
|
|
181
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
182
|
+
raise exception
|
|
177
183
|
|
|
178
184
|
expected_types = None
|
|
179
185
|
if is_arrow_enabled_in_udtf() or is_spark_compatible_udtf_mode_enabled():
|
|
@@ -276,12 +282,16 @@ def map_common_inline_user_defined_table_function(
|
|
|
276
282
|
if require_creating_udtf_in_sproc(udtf_proto):
|
|
277
283
|
snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
|
|
278
284
|
if isinstance(snowpark_udtf_or_error, str):
|
|
279
|
-
|
|
285
|
+
exception = PythonException(snowpark_udtf_or_error)
|
|
286
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
287
|
+
raise exception
|
|
280
288
|
snowpark_udtf = snowpark_udtf_or_error
|
|
281
289
|
else:
|
|
282
290
|
udtf_or_error = create_udtf(**kwargs)
|
|
283
291
|
if isinstance(udtf_or_error, str):
|
|
284
|
-
|
|
292
|
+
exception = PythonException(udtf_or_error)
|
|
293
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
294
|
+
raise exception
|
|
285
295
|
udtf = udtf_or_error
|
|
286
296
|
snowpark_udtf = SnowparkUDTF(
|
|
287
297
|
name=udtf.name,
|
|
@@ -38,6 +38,8 @@ from snowflake.snowpark.types import (
|
|
|
38
38
|
TimeType,
|
|
39
39
|
_NumericType,
|
|
40
40
|
)
|
|
41
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
42
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
41
43
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
42
44
|
DATA_SOURCE_SQL_COMMENT,
|
|
43
45
|
Connection,
|
|
@@ -147,9 +149,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
147
149
|
or upper_bound is not None
|
|
148
150
|
or num_partitions is not None
|
|
149
151
|
):
|
|
150
|
-
|
|
152
|
+
exception = ValueError(
|
|
151
153
|
"when column is not specified, lower_bound, upper_bound, num_partitions are expected to be None"
|
|
152
154
|
)
|
|
155
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
156
|
+
raise exception
|
|
153
157
|
if table is not None:
|
|
154
158
|
partitioned_queries = []
|
|
155
159
|
table_query = f"SELECT * FROM {table}"
|
|
@@ -160,24 +164,32 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
160
164
|
elif query is not None:
|
|
161
165
|
partitioned_queries = [query]
|
|
162
166
|
else:
|
|
163
|
-
|
|
167
|
+
exception = ValueError("table or query is not specified")
|
|
168
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
169
|
+
raise exception
|
|
164
170
|
else:
|
|
165
171
|
if lower_bound is None or upper_bound is None or num_partitions is None:
|
|
166
|
-
|
|
172
|
+
exception = ValueError(
|
|
167
173
|
"when column is specified, lower_bound, upper_bound, num_partitions must be specified"
|
|
168
174
|
)
|
|
175
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
176
|
+
raise exception
|
|
169
177
|
|
|
170
178
|
column_type = None
|
|
171
179
|
for field in struct_schema.fields:
|
|
172
180
|
if field.name.lower() == column.lower():
|
|
173
181
|
column_type = field.datatype
|
|
174
182
|
if column_type is None:
|
|
175
|
-
|
|
183
|
+
exception = ValueError("Column does not exist")
|
|
184
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
185
|
+
raise exception
|
|
176
186
|
|
|
177
187
|
if not isinstance(column_type, _NumericType) and not isinstance(
|
|
178
188
|
column_type, DateType
|
|
179
189
|
):
|
|
180
|
-
|
|
190
|
+
exception = ValueError(f"unsupported type {column_type}")
|
|
191
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
192
|
+
raise exception
|
|
181
193
|
spark_column_name = f'"{column}"'
|
|
182
194
|
partitioned_queries = self._generate_partition(
|
|
183
195
|
table,
|
|
@@ -240,7 +252,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
240
252
|
)
|
|
241
253
|
query_thread_executor.shutdown(wait=False)
|
|
242
254
|
upload_thread_executor.shutdown(wait=False)
|
|
243
|
-
|
|
255
|
+
exception = future.result()
|
|
256
|
+
attach_custom_error_code(
|
|
257
|
+
exception, ErrorCodes.INTERNAL_ERROR
|
|
258
|
+
)
|
|
259
|
+
raise exception
|
|
244
260
|
else:
|
|
245
261
|
path = future.result()
|
|
246
262
|
if not path:
|
|
@@ -266,7 +282,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
266
282
|
)
|
|
267
283
|
query_thread_executor.shutdown(wait=False)
|
|
268
284
|
upload_thread_executor.shutdown(wait=False)
|
|
269
|
-
|
|
285
|
+
exception = f.result()
|
|
286
|
+
attach_custom_error_code(
|
|
287
|
+
exception, ErrorCodes.INTERNAL_ERROR
|
|
288
|
+
)
|
|
289
|
+
raise exception
|
|
270
290
|
finally:
|
|
271
291
|
close_connection(conn)
|
|
272
292
|
|
|
@@ -283,7 +303,9 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
283
303
|
elif query is not None:
|
|
284
304
|
sql = f"SELECT * FROM ({query}) WHERE 1=0"
|
|
285
305
|
else:
|
|
286
|
-
|
|
306
|
+
exception = ValueError("table or query is not specified")
|
|
307
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
308
|
+
raise exception
|
|
287
309
|
|
|
288
310
|
cursor = conn.cursor()
|
|
289
311
|
cursor.execute(sql)
|
|
@@ -301,7 +323,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
301
323
|
dt = parser.parse(value)
|
|
302
324
|
return int(dt.replace(tzinfo=pytz.UTC).timestamp())
|
|
303
325
|
else:
|
|
304
|
-
|
|
326
|
+
exception = TypeError(
|
|
327
|
+
f"unsupported column type for partition: {column_type}"
|
|
328
|
+
)
|
|
329
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
330
|
+
raise exception
|
|
305
331
|
|
|
306
332
|
# this function is only used in data source API for SQL server
|
|
307
333
|
def _to_external_value(self, value: Union[int, str, float], column_type: DataType):
|
|
@@ -311,7 +337,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
311
337
|
# TODO: SNOW-1909315: support timezone
|
|
312
338
|
return datetime.datetime.fromtimestamp(value, tz=pytz.UTC)
|
|
313
339
|
else:
|
|
314
|
-
|
|
340
|
+
exception = TypeError(
|
|
341
|
+
f"unsupported column type for partition: {column_type}"
|
|
342
|
+
)
|
|
343
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
344
|
+
raise exception
|
|
315
345
|
|
|
316
346
|
def _to_snowpark_type(self, schema: Tuple[tuple]) -> StructType:
|
|
317
347
|
fields = []
|
|
@@ -339,7 +369,9 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
339
369
|
case jaydebeapi.BINARY:
|
|
340
370
|
field = StructField(name, BinaryType(), is_nullable)
|
|
341
371
|
case _:
|
|
342
|
-
|
|
372
|
+
exception = ValueError(f"unsupported type: {dbapi_type}")
|
|
373
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
374
|
+
raise exception
|
|
343
375
|
|
|
344
376
|
fields.append(field)
|
|
345
377
|
return StructType(fields)
|
|
@@ -359,7 +391,9 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
359
391
|
processed_lower_bound = self._to_internal_value(lower_bound, column_type)
|
|
360
392
|
processed_upper_bound = self._to_internal_value(upper_bound, column_type)
|
|
361
393
|
if processed_lower_bound > processed_upper_bound:
|
|
362
|
-
|
|
394
|
+
exception = ValueError("lower_bound cannot be greater than upper_bound")
|
|
395
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
396
|
+
raise exception
|
|
363
397
|
|
|
364
398
|
if processed_lower_bound == processed_upper_bound or num_partitions <= 1:
|
|
365
399
|
return [select_query]
|
|
@@ -665,4 +699,6 @@ def get_jdbc_dialect(url: str) -> JdbcDialect:
|
|
|
665
699
|
for jdbc_dialect in jdbc_dialects:
|
|
666
700
|
if jdbc_dialect.can_handle(url):
|
|
667
701
|
return jdbc_dialect
|
|
668
|
-
|
|
702
|
+
exception = ValueError(f"Unsupported JDBC datasource: {url}")
|
|
703
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
704
|
+
raise exception
|
|
@@ -15,8 +15,11 @@ from snowflake import snowpark
|
|
|
15
15
|
from snowflake.snowpark.types import StructType
|
|
16
16
|
from snowflake.snowpark_connect.config import global_config
|
|
17
17
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
18
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
19
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
18
20
|
from snowflake.snowpark_connect.relation.io_utils import (
|
|
19
21
|
convert_file_prefix_path,
|
|
22
|
+
get_compression_for_source_and_options,
|
|
20
23
|
is_cloud_path,
|
|
21
24
|
)
|
|
22
25
|
from snowflake.snowpark_connect.relation.read.map_read_table import map_read_table
|
|
@@ -158,12 +161,20 @@ def map_read(
|
|
|
158
161
|
options[DBTABLE_OPTION], session, rel.common.plan_id
|
|
159
162
|
)
|
|
160
163
|
case other:
|
|
161
|
-
|
|
164
|
+
exception = SnowparkConnectNotImplementedError(
|
|
162
165
|
f"UNSUPPORTED FORMAT {other} WITH NO PATH"
|
|
163
166
|
)
|
|
167
|
+
attach_custom_error_code(
|
|
168
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
169
|
+
)
|
|
170
|
+
raise exception
|
|
164
171
|
case other:
|
|
165
172
|
# TODO: Empty data source
|
|
166
|
-
|
|
173
|
+
exception = SnowparkConnectNotImplementedError(
|
|
174
|
+
f"Unsupported read type: {other}"
|
|
175
|
+
)
|
|
176
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
177
|
+
raise exception
|
|
167
178
|
|
|
168
179
|
return df_cache_map_put_if_absent(
|
|
169
180
|
(get_session_id(), rel.common.plan_id),
|
|
@@ -237,6 +248,14 @@ def _read_file(
|
|
|
237
248
|
)
|
|
238
249
|
upload_files_if_needed(paths, clean_source_paths, session, read_format)
|
|
239
250
|
paths = [_quote_stage_path(path) for path in paths]
|
|
251
|
+
|
|
252
|
+
if read_format in ("csv", "text", "json", "parquet"):
|
|
253
|
+
compression = get_compression_for_source_and_options(
|
|
254
|
+
read_format, options, from_read=True
|
|
255
|
+
)
|
|
256
|
+
if compression is not None:
|
|
257
|
+
options["compression"] = compression
|
|
258
|
+
|
|
240
259
|
match read_format:
|
|
241
260
|
case "csv":
|
|
242
261
|
from snowflake.snowpark_connect.relation.read.map_read_csv import (
|
|
@@ -265,9 +284,11 @@ def _read_file(
|
|
|
265
284
|
|
|
266
285
|
return map_read_text(rel, schema, session, paths)
|
|
267
286
|
case _:
|
|
268
|
-
|
|
287
|
+
exception = SnowparkConnectNotImplementedError(
|
|
269
288
|
f"Unsupported format: {read_format}"
|
|
270
289
|
)
|
|
290
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
291
|
+
raise exception
|
|
271
292
|
|
|
272
293
|
|
|
273
294
|
def _skip_upload(path: str, read_format: str):
|