snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +200 -102
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +53 -8
- snowflake/snowpark_connect/expression/map_extension.py +37 -11
- snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
- snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
- snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
- snowflake/snowpark_connect/relation/map_extension.py +38 -17
- snowflake/snowpark_connect/relation/map_join.py +26 -12
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +124 -25
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +160 -48
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +73 -21
- snowflake/snowpark_connect/type_mapping.py +90 -20
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +24 -4
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,8 @@ import pandas
|
|
|
8
8
|
import pyspark.sql.connect.proto.catalog_pb2 as catalog_proto
|
|
9
9
|
|
|
10
10
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
11
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
12
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
13
|
from snowflake.snowpark_connect.relation.catalogs import CATALOGS
|
|
12
14
|
from snowflake.snowpark_connect.relation.catalogs.utils import (
|
|
13
15
|
CURRENT_CATALOG_NAME,
|
|
@@ -148,4 +150,6 @@ def map_catalog(
|
|
|
148
150
|
return get_current_catalog().uncacheTable(rel.uncache_table.table_name)
|
|
149
151
|
case other:
|
|
150
152
|
# TODO: list_function implementation is blocked on SNOW-1787268
|
|
151
|
-
|
|
153
|
+
exception = SnowparkConnectNotImplementedError(f"Other Relation {other}")
|
|
154
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
155
|
+
raise exception
|
|
@@ -29,11 +29,16 @@ from snowflake.snowpark.column import Column
|
|
|
29
29
|
from snowflake.snowpark.table_function import _ExplodeFunctionCall
|
|
30
30
|
from snowflake.snowpark.types import DataType, StructField, StructType, _NumericType
|
|
31
31
|
from snowflake.snowpark_connect.column_name_handler import (
|
|
32
|
+
ColumnQualifier,
|
|
32
33
|
make_column_names_snowpark_compatible,
|
|
33
34
|
)
|
|
34
35
|
from snowflake.snowpark_connect.config import global_config
|
|
35
36
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
36
|
-
from snowflake.snowpark_connect.error.
|
|
37
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
38
|
+
from snowflake.snowpark_connect.error.error_utils import (
|
|
39
|
+
SparkException,
|
|
40
|
+
attach_custom_error_code,
|
|
41
|
+
)
|
|
37
42
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
38
43
|
map_alias,
|
|
39
44
|
map_expression,
|
|
@@ -369,56 +374,64 @@ def map_sort(
|
|
|
369
374
|
for col in input_container.column_map.get_spark_columns()
|
|
370
375
|
]
|
|
371
376
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
377
|
+
# Process ORDER BY expressions with a context flag to enable column reuse optimization
|
|
378
|
+
from snowflake.snowpark_connect.utils.context import push_processing_order_by_scope
|
|
379
|
+
|
|
380
|
+
with push_processing_order_by_scope():
|
|
381
|
+
for so in sort_order:
|
|
382
|
+
if so.child.HasField("literal"):
|
|
383
|
+
column_index = unwrap_literal(so.child)
|
|
384
|
+
try:
|
|
385
|
+
if column_index <= 0:
|
|
386
|
+
exception = IndexError()
|
|
387
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
388
|
+
raise exception
|
|
389
|
+
col = input_df[column_index - 1]
|
|
390
|
+
except IndexError:
|
|
391
|
+
exception = AnalysisException(
|
|
392
|
+
f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
|
|
393
|
+
)
|
|
394
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
395
|
+
raise exception
|
|
396
|
+
else:
|
|
397
|
+
_, typed_column = map_single_column_expression(
|
|
398
|
+
so.child, input_container.column_map, typer
|
|
382
399
|
)
|
|
383
|
-
|
|
384
|
-
_, typed_column = map_single_column_expression(
|
|
385
|
-
so.child, input_container.column_map, typer
|
|
386
|
-
)
|
|
387
|
-
col = typed_column.col
|
|
400
|
+
col = typed_column.col
|
|
388
401
|
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
402
|
+
match (so.direction, so.null_ordering):
|
|
403
|
+
case (
|
|
404
|
+
expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
|
|
405
|
+
expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
|
|
406
|
+
):
|
|
407
|
+
col = col.asc_nulls_first()
|
|
408
|
+
case (
|
|
409
|
+
expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
|
|
410
|
+
expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
|
|
411
|
+
):
|
|
412
|
+
col = col.asc_nulls_last()
|
|
413
|
+
case (
|
|
414
|
+
expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
|
|
415
|
+
expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
|
|
416
|
+
):
|
|
417
|
+
col = col.desc_nulls_first()
|
|
418
|
+
case (
|
|
419
|
+
expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
|
|
420
|
+
expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
|
|
421
|
+
):
|
|
422
|
+
col = col.desc_nulls_last()
|
|
410
423
|
|
|
411
|
-
|
|
424
|
+
cols.append(col)
|
|
412
425
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
426
|
+
ascending.append(
|
|
427
|
+
so.direction
|
|
428
|
+
== expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
|
|
429
|
+
)
|
|
430
|
+
if (
|
|
431
|
+
so.direction
|
|
432
|
+
!= expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
|
|
433
|
+
):
|
|
434
|
+
order_specified = True
|
|
422
435
|
|
|
423
436
|
# TODO: sort.isglobal.
|
|
424
437
|
if not order_specified:
|
|
@@ -446,9 +459,11 @@ def map_to_df(
|
|
|
446
459
|
new_column_names = list(rel.to_df.column_names)
|
|
447
460
|
if len(new_column_names) != len(input_container.column_map.columns):
|
|
448
461
|
# TODO: Check error type here
|
|
449
|
-
|
|
462
|
+
exception = ValueError(
|
|
450
463
|
"Number of column names must match number of columns in DataFrame"
|
|
451
464
|
)
|
|
465
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
466
|
+
raise exception
|
|
452
467
|
snowpark_new_column_names = make_column_names_snowpark_compatible(
|
|
453
468
|
new_column_names, rel.common.plan_id
|
|
454
469
|
)
|
|
@@ -507,9 +522,11 @@ def map_to_schema(
|
|
|
507
522
|
for field in rel.to_schema.schema.struct.fields:
|
|
508
523
|
if field.name in already_existing_columns:
|
|
509
524
|
if count_case_insensitive_column_names[field.name.lower()] > 1:
|
|
510
|
-
|
|
525
|
+
exception = AnalysisException(
|
|
511
526
|
f"[AMBIGUOUS_COLUMN_OR_FIELD] Column or field `{field.name}` is ambiguous and has {len(input_container.column_map.spark_to_col[field.name])} matches."
|
|
512
527
|
)
|
|
528
|
+
attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
|
|
529
|
+
raise exception
|
|
513
530
|
snowpark_name = None
|
|
514
531
|
for name in input_container.column_map.spark_to_col:
|
|
515
532
|
if name.lower() == field.name.lower():
|
|
@@ -526,17 +543,23 @@ def map_to_schema(
|
|
|
526
543
|
and snowpark_field.nullable
|
|
527
544
|
and not isinstance(snowpark_field.datatype, StructType)
|
|
528
545
|
):
|
|
529
|
-
|
|
546
|
+
exception = AnalysisException(
|
|
530
547
|
f"[NULLABLE_COLUMN_OR_FIELD] Column or field `{field.name}` is nullable while it's required to be non-nullable."
|
|
531
548
|
)
|
|
549
|
+
attach_custom_error_code(
|
|
550
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
551
|
+
)
|
|
552
|
+
raise exception
|
|
532
553
|
|
|
533
554
|
# Check type casting validation
|
|
534
555
|
if not _can_cast_column_in_schema(
|
|
535
556
|
snowpark_field.datatype, proto_to_snowpark_type(field.data_type)
|
|
536
557
|
):
|
|
537
|
-
|
|
558
|
+
exception = AnalysisException(
|
|
538
559
|
f"""[INVALID_COLUMN_OR_FIELD_DATA_TYPE] Column or field `{field.name}` is of type "{map_snowpark_to_pyspark_types(proto_to_snowpark_type(field.data_type))}" while it's required to be "{map_snowpark_to_pyspark_types(snowpark_field.datatype)}"."""
|
|
539
560
|
)
|
|
561
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
562
|
+
raise exception
|
|
540
563
|
if len(already_existing_columns) == len(new_column_names):
|
|
541
564
|
# All columns already exist, we're doing a simple update.
|
|
542
565
|
snowpark_new_column_names = []
|
|
@@ -761,9 +784,11 @@ def map_with_columns(
|
|
|
761
784
|
name = names_list[0]
|
|
762
785
|
name_normalized = input_container.column_map._normalized_spark_name(name)
|
|
763
786
|
if name_normalized in seen_columns:
|
|
764
|
-
|
|
787
|
+
exception = ValueError(
|
|
765
788
|
f"[COLUMN_ALREADY_EXISTS] The column `{name}` already exists."
|
|
766
789
|
)
|
|
790
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
791
|
+
raise exception
|
|
767
792
|
seen_columns.add(name_normalized)
|
|
768
793
|
# If the column name is already in the DataFrame, we replace it, so we use the
|
|
769
794
|
# mapping to get the correct column name.
|
|
@@ -772,7 +797,9 @@ def map_with_columns(
|
|
|
772
797
|
[name]
|
|
773
798
|
)
|
|
774
799
|
if len(all_instances_of_spark_column_name) == 0:
|
|
775
|
-
|
|
800
|
+
exception = KeyError(f"Spark column name {name} does not exist")
|
|
801
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
802
|
+
raise exception
|
|
776
803
|
with_columns_names.extend(all_instances_of_spark_column_name)
|
|
777
804
|
with_columns_exprs.extend(
|
|
778
805
|
[expr.col] * len(all_instances_of_spark_column_name)
|
|
@@ -852,7 +879,9 @@ def map_unpivot(
|
|
|
852
879
|
# Spark API: df.unpivot([id_columns], [unpivot_columns], var_column, val_column)
|
|
853
880
|
# Snowpark API: df.unpivot(val_column, var_column, [unpivot_columns])
|
|
854
881
|
if rel.unpivot.HasField("values") and len(rel.unpivot.values.values) == 0:
|
|
855
|
-
|
|
882
|
+
exception = SparkException.unpivot_requires_value_columns()
|
|
883
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
884
|
+
raise exception
|
|
856
885
|
|
|
857
886
|
input_container = map_relation(rel.unpivot.input)
|
|
858
887
|
input_df = input_container.dataframe
|
|
@@ -893,7 +922,7 @@ def map_unpivot(
|
|
|
893
922
|
)
|
|
894
923
|
if not get_lease_common_ancestor_classes(type_list):
|
|
895
924
|
# TODO: match exactly how spark shows mismatched columns
|
|
896
|
-
|
|
925
|
+
exception = SparkException.unpivot_value_data_type_mismatch(
|
|
897
926
|
", ".join(
|
|
898
927
|
[
|
|
899
928
|
f"{dtype} {column_name}"
|
|
@@ -901,6 +930,8 @@ def map_unpivot(
|
|
|
901
930
|
]
|
|
902
931
|
)
|
|
903
932
|
)
|
|
933
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
934
|
+
raise exception
|
|
904
935
|
return not is_same_type and contains_numeric_type
|
|
905
936
|
|
|
906
937
|
def get_column_names(
|
|
@@ -984,7 +1015,7 @@ def map_unpivot(
|
|
|
984
1015
|
column_project = []
|
|
985
1016
|
column_reverse_project = []
|
|
986
1017
|
snowpark_columns = []
|
|
987
|
-
qualifiers = []
|
|
1018
|
+
qualifiers: list[set[ColumnQualifier]] = []
|
|
988
1019
|
for c in input_container.column_map.get_snowpark_columns():
|
|
989
1020
|
c_name = snowpark_functions_col(c, input_container.column_map).get_name()
|
|
990
1021
|
if c_name in unpivot_col_names:
|
|
@@ -1012,7 +1043,7 @@ def map_unpivot(
|
|
|
1012
1043
|
)
|
|
1013
1044
|
snowpark_columns.append(c)
|
|
1014
1045
|
qualifiers.append(
|
|
1015
|
-
input_container.column_map.
|
|
1046
|
+
input_container.column_map.get_qualifiers_for_spark_column(c)
|
|
1016
1047
|
)
|
|
1017
1048
|
|
|
1018
1049
|
# Without the case when postprocessing, the result Spark dataframe is:
|
|
@@ -1057,7 +1088,7 @@ def map_unpivot(
|
|
|
1057
1088
|
snowpark_functions_col(snowpark_value_column_name, input_container.column_map)
|
|
1058
1089
|
)
|
|
1059
1090
|
snowpark_columns.append(snowpark_value_column_name)
|
|
1060
|
-
qualifiers.extend([
|
|
1091
|
+
qualifiers.extend([set() for _ in range(2)])
|
|
1061
1092
|
|
|
1062
1093
|
result = (
|
|
1063
1094
|
input_df.select(*column_project)
|
|
@@ -1097,7 +1128,9 @@ def map_group_map(
|
|
|
1097
1128
|
snowpark_grouping_expressions.append(snowpark_column.col)
|
|
1098
1129
|
group_name_list.append(new_name)
|
|
1099
1130
|
if rel.group_map.func.python_udf is None:
|
|
1100
|
-
|
|
1131
|
+
exception = ValueError("group_map relation without python udf is not supported")
|
|
1132
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1133
|
+
raise exception
|
|
1101
1134
|
|
|
1102
1135
|
python_major, python_minor = rel.group_map.func.python_udf.python_ver.split(".")
|
|
1103
1136
|
is_compatible_python = sys.version_info.major == int(
|
|
@@ -15,8 +15,11 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
15
15
|
ColumnNameMap,
|
|
16
16
|
make_column_names_snowpark_compatible,
|
|
17
17
|
)
|
|
18
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
18
19
|
from snowflake.snowpark_connect.config import get_boolean_session_config_param
|
|
19
20
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
21
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
22
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
20
23
|
from snowflake.snowpark_connect.expression.map_expression import map_expression
|
|
21
24
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
22
25
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
@@ -84,11 +87,13 @@ def map_extension(
|
|
|
84
87
|
input_df = result.dataframe
|
|
85
88
|
snowpark_col_names = result.column_map.get_snowpark_columns()
|
|
86
89
|
if len(subquery_aliases.aliases) != len(snowpark_col_names):
|
|
87
|
-
|
|
90
|
+
exception = AnalysisException(
|
|
88
91
|
"Number of column aliases does not match number of columns. "
|
|
89
92
|
f"Number of column aliases: {len(subquery_aliases.aliases)}; "
|
|
90
93
|
f"number of columns: {len(snowpark_col_names)}."
|
|
91
94
|
)
|
|
95
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
96
|
+
raise exception
|
|
92
97
|
return DataFrameContainer.create_with_column_mapping(
|
|
93
98
|
dataframe=input_df,
|
|
94
99
|
spark_column_names=subquery_aliases.aliases,
|
|
@@ -108,18 +113,22 @@ def map_extension(
|
|
|
108
113
|
|
|
109
114
|
left_queries = left_df.queries["queries"]
|
|
110
115
|
if len(left_queries) != 1:
|
|
111
|
-
|
|
116
|
+
exception = SnowparkConnectNotImplementedError(
|
|
112
117
|
f"Unexpected number of queries: {len(left_queries)}"
|
|
113
118
|
)
|
|
119
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
120
|
+
raise exception
|
|
114
121
|
left_query = left_queries[0]
|
|
115
122
|
with push_outer_dataframe(left_result):
|
|
116
123
|
right_result = map_relation(lateral_join.right)
|
|
117
124
|
right_df = right_result.dataframe
|
|
118
125
|
right_queries = right_df.queries["queries"]
|
|
119
126
|
if len(right_queries) != 1:
|
|
120
|
-
|
|
127
|
+
exception = SnowparkConnectNotImplementedError(
|
|
121
128
|
f"Unexpected number of queries: {len(right_queries)}"
|
|
122
129
|
)
|
|
130
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
131
|
+
raise exception
|
|
123
132
|
right_query = right_queries[0]
|
|
124
133
|
input_df_sql = f"WITH __left AS ({left_query}) SELECT * FROM __left INNER JOIN LATERAL ({right_query})"
|
|
125
134
|
session = snowpark.Session.get_active_session()
|
|
@@ -139,7 +148,11 @@ def map_extension(
|
|
|
139
148
|
case "aggregate":
|
|
140
149
|
return map_aggregate(extension.aggregate, rel.common.plan_id)
|
|
141
150
|
case other:
|
|
142
|
-
|
|
151
|
+
exception = SnowparkConnectNotImplementedError(
|
|
152
|
+
f"Unexpected extension {other}"
|
|
153
|
+
)
|
|
154
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
155
|
+
raise exception
|
|
143
156
|
|
|
144
157
|
|
|
145
158
|
def get_udtf_project(relation: relation_proto.Relation) -> bool:
|
|
@@ -166,7 +179,7 @@ def get_udtf_project(relation: relation_proto.Relation) -> bool:
|
|
|
166
179
|
|
|
167
180
|
def handle_udtf_with_table_arguments(
|
|
168
181
|
udtf_info: snowflake_proto.UDTFWithTableArguments,
|
|
169
|
-
) ->
|
|
182
|
+
) -> DataFrameContainer:
|
|
170
183
|
"""
|
|
171
184
|
Handle UDTF with one or more table arguments using Snowpark's join_table_function.
|
|
172
185
|
For multiple table arguments, this creates a Cartesian product of all input tables.
|
|
@@ -174,7 +187,9 @@ def handle_udtf_with_table_arguments(
|
|
|
174
187
|
session = snowpark.Session.get_active_session()
|
|
175
188
|
udtf_name_lower = udtf_info.function_name.lower()
|
|
176
189
|
if udtf_name_lower not in session._udtfs:
|
|
177
|
-
|
|
190
|
+
exception = ValueError(f"UDTF '{udtf_info.function_name}' not found.")
|
|
191
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
192
|
+
raise exception
|
|
178
193
|
_udtf_obj, udtf_spark_output_names = session._udtfs[udtf_name_lower]
|
|
179
194
|
|
|
180
195
|
table_containers = []
|
|
@@ -188,10 +203,12 @@ def handle_udtf_with_table_arguments(
|
|
|
188
203
|
if not get_boolean_session_config_param(
|
|
189
204
|
"spark.sql.tvf.allowMultipleTableArguments.enabled"
|
|
190
205
|
):
|
|
191
|
-
|
|
206
|
+
exception = AnalysisException(
|
|
192
207
|
"[TABLE_VALUED_FUNCTION_TOO_MANY_TABLE_ARGUMENTS] Multiple table arguments are not enabled. "
|
|
193
208
|
"Please set `spark.sql.tvf.allowMultipleTableArguments.enabled` to `true`"
|
|
194
209
|
)
|
|
210
|
+
attach_custom_error_code(exception, ErrorCodes.CONFIG_NOT_ENABLED)
|
|
211
|
+
raise exception
|
|
195
212
|
|
|
196
213
|
base_df = table_containers[0][0].dataframe
|
|
197
214
|
first_table_col_count = len(base_df.columns)
|
|
@@ -270,7 +287,7 @@ def handle_lateral_join_with_udtf(
|
|
|
270
287
|
left_result: DataFrameContainer,
|
|
271
288
|
udtf_relation: relation_proto.Relation,
|
|
272
289
|
udtf_info: tuple[snowpark.udtf.UserDefinedTableFunction, list],
|
|
273
|
-
) ->
|
|
290
|
+
) -> DataFrameContainer:
|
|
274
291
|
"""
|
|
275
292
|
Handle lateral join with UDTF on the right side using join_table_function.
|
|
276
293
|
"""
|
|
@@ -303,7 +320,7 @@ def handle_lateral_join_with_udtf(
|
|
|
303
320
|
|
|
304
321
|
def map_aggregate(
|
|
305
322
|
aggregate: snowflake_proto.Aggregate, plan_id: int
|
|
306
|
-
) ->
|
|
323
|
+
) -> DataFrameContainer:
|
|
307
324
|
input_container = map_relation(aggregate.input)
|
|
308
325
|
input_df: snowpark.DataFrame = input_container.dataframe
|
|
309
326
|
|
|
@@ -339,13 +356,15 @@ def map_aggregate(
|
|
|
339
356
|
exp, input_container.column_map, typer
|
|
340
357
|
)
|
|
341
358
|
if len(new_names) != 1:
|
|
342
|
-
|
|
359
|
+
exception = SnowparkConnectNotImplementedError(
|
|
343
360
|
"Multi-column aggregate expressions are not supported"
|
|
344
361
|
)
|
|
362
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
363
|
+
raise exception
|
|
345
364
|
return new_names[0], snowpark_column
|
|
346
365
|
|
|
347
366
|
raw_groupings: list[tuple[str, TypedColumn]] = []
|
|
348
|
-
raw_aggregations: list[tuple[str, TypedColumn,
|
|
367
|
+
raw_aggregations: list[tuple[str, TypedColumn, set[ColumnQualifier]]] = []
|
|
349
368
|
|
|
350
369
|
if not is_group_by_all:
|
|
351
370
|
raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
|
|
@@ -383,11 +402,11 @@ def map_aggregate(
|
|
|
383
402
|
col = _map_column(exp)
|
|
384
403
|
if exp.WhichOneof("expr_type") == "unresolved_attribute":
|
|
385
404
|
spark_name = col[0]
|
|
386
|
-
qualifiers
|
|
387
|
-
|
|
388
|
-
)
|
|
405
|
+
qualifiers: set[
|
|
406
|
+
ColumnQualifier
|
|
407
|
+
] = input_container.column_map.get_qualifiers_for_spark_column(spark_name)
|
|
389
408
|
else:
|
|
390
|
-
qualifiers =
|
|
409
|
+
qualifiers = set()
|
|
391
410
|
|
|
392
411
|
raw_aggregations.append((col[0], col[1], qualifiers))
|
|
393
412
|
|
|
@@ -420,7 +439,7 @@ def map_aggregate(
|
|
|
420
439
|
spark_columns: list[str] = []
|
|
421
440
|
snowpark_columns: list[str] = []
|
|
422
441
|
snowpark_column_types: list[snowpark_types.DataType] = []
|
|
423
|
-
all_qualifiers: list[
|
|
442
|
+
all_qualifiers: list[set[ColumnQualifier]] = []
|
|
424
443
|
|
|
425
444
|
# Use grouping columns directly without aliases
|
|
426
445
|
groupings = [col.col for _, col in raw_groupings]
|
|
@@ -474,9 +493,11 @@ def map_aggregate(
|
|
|
474
493
|
snowpark.GroupingSets(*sets_mapped)
|
|
475
494
|
)
|
|
476
495
|
case other:
|
|
477
|
-
|
|
496
|
+
exception = SnowparkConnectNotImplementedError(
|
|
478
497
|
f"Unsupported GROUP BY type: {other}"
|
|
479
498
|
)
|
|
499
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
500
|
+
raise exception
|
|
480
501
|
|
|
481
502
|
result = result.agg(*aggregations, exclude_grouping_columns=True)
|
|
482
503
|
|
|
@@ -5,14 +5,20 @@
|
|
|
5
5
|
from functools import reduce
|
|
6
6
|
|
|
7
7
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
8
|
+
from pyspark.errors import AnalysisException
|
|
8
9
|
|
|
9
10
|
import snowflake.snowpark.functions as snowpark_fn
|
|
10
11
|
from snowflake import snowpark
|
|
11
12
|
from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
|
|
13
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
12
14
|
from snowflake.snowpark_connect.config import global_config
|
|
13
15
|
from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
|
|
14
16
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
15
|
-
from snowflake.snowpark_connect.error.
|
|
17
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
18
|
+
from snowflake.snowpark_connect.error.error_utils import (
|
|
19
|
+
SparkException,
|
|
20
|
+
attach_custom_error_code,
|
|
21
|
+
)
|
|
16
22
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
17
23
|
map_single_column_expression,
|
|
18
24
|
)
|
|
@@ -62,7 +68,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
62
68
|
match rel.join.join_type:
|
|
63
69
|
case relation_proto.Join.JOIN_TYPE_UNSPECIFIED:
|
|
64
70
|
# TODO: Understand what UNSPECIFIED Join type is
|
|
65
|
-
|
|
71
|
+
exception = SnowparkConnectNotImplementedError("Unspecified Join Type")
|
|
72
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
73
|
+
raise exception
|
|
66
74
|
case relation_proto.Join.JOIN_TYPE_INNER:
|
|
67
75
|
join_type = "inner"
|
|
68
76
|
case relation_proto.Join.JOIN_TYPE_FULL_OUTER:
|
|
@@ -78,7 +86,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
78
86
|
case relation_proto.Join.JOIN_TYPE_CROSS:
|
|
79
87
|
join_type = "cross"
|
|
80
88
|
case other:
|
|
81
|
-
|
|
89
|
+
exception = SnowparkConnectNotImplementedError(f"Other Join Type: {other}")
|
|
90
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
91
|
+
raise exception
|
|
82
92
|
|
|
83
93
|
# This handles case sensitivity for using_columns
|
|
84
94
|
case_corrected_right_columns: list[str] = []
|
|
@@ -124,9 +134,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
124
134
|
is None
|
|
125
135
|
for c in using_columns
|
|
126
136
|
):
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
raise pyspark.errors.AnalysisException(
|
|
137
|
+
exception = AnalysisException(
|
|
130
138
|
USING_COLUMN_NOT_FOUND_ERROR.format(
|
|
131
139
|
next(
|
|
132
140
|
c
|
|
@@ -140,6 +148,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
140
148
|
left_container.column_map.get_spark_columns(),
|
|
141
149
|
)
|
|
142
150
|
)
|
|
151
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
152
|
+
raise exception
|
|
143
153
|
if any(
|
|
144
154
|
right_container.column_map.get_snowpark_column_name_from_spark_column_name(
|
|
145
155
|
c, allow_non_exists=True, return_first=True
|
|
@@ -147,9 +157,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
147
157
|
is None
|
|
148
158
|
for c in using_columns
|
|
149
159
|
):
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
raise pyspark.errors.AnalysisException(
|
|
160
|
+
exception = AnalysisException(
|
|
153
161
|
USING_COLUMN_NOT_FOUND_ERROR.format(
|
|
154
162
|
next(
|
|
155
163
|
c
|
|
@@ -163,6 +171,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
163
171
|
right_container.column_map.get_spark_columns(),
|
|
164
172
|
)
|
|
165
173
|
)
|
|
174
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
175
|
+
raise exception
|
|
166
176
|
|
|
167
177
|
# Round trip the using columns through the column map to get the correct names
|
|
168
178
|
# in order to support case sensitivity.
|
|
@@ -227,7 +237,9 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
227
237
|
result = joined_df.drop(*(right for _, right in snowpark_using_columns))
|
|
228
238
|
else:
|
|
229
239
|
if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
|
|
230
|
-
|
|
240
|
+
exception = SparkException.implicit_cartesian_product("inner")
|
|
241
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
242
|
+
raise exception
|
|
231
243
|
result: snowpark.DataFrame = left_input.join(
|
|
232
244
|
right=right_input,
|
|
233
245
|
how=join_type,
|
|
@@ -256,8 +268,10 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
256
268
|
] # this is to make sure we only remove the column once
|
|
257
269
|
]
|
|
258
270
|
|
|
259
|
-
qualifiers = list(
|
|
260
|
-
|
|
271
|
+
qualifiers: list[set[ColumnQualifier]] = list(
|
|
272
|
+
left_container.column_map.get_qualifiers()
|
|
273
|
+
) + [
|
|
274
|
+
{right_container.column_map.get_qualifier_for_spark_column(spark_col)}
|
|
261
275
|
for i, spark_col in enumerate(
|
|
262
276
|
right_container.column_map.get_spark_columns()
|
|
263
277
|
)
|
|
@@ -19,6 +19,8 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
19
19
|
make_column_names_snowpark_compatible,
|
|
20
20
|
)
|
|
21
21
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
22
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
23
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
22
24
|
from snowflake.snowpark_connect.type_mapping import (
|
|
23
25
|
get_python_sql_utils_class,
|
|
24
26
|
map_json_schema_to_snowpark,
|
|
@@ -327,9 +329,11 @@ def map_local_relation(
|
|
|
327
329
|
column_metadata=column_metadata,
|
|
328
330
|
)
|
|
329
331
|
else:
|
|
330
|
-
|
|
332
|
+
exception = SnowparkConnectNotImplementedError(
|
|
331
333
|
"LocalRelation without data & schema is not supported"
|
|
332
334
|
)
|
|
335
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
336
|
+
raise exception
|
|
333
337
|
|
|
334
338
|
|
|
335
339
|
def map_range(
|