snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +200 -102
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +53 -8
- snowflake/snowpark_connect/expression/map_extension.py +37 -11
- snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
- snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
- snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
- snowflake/snowpark_connect/relation/map_extension.py +38 -17
- snowflake/snowpark_connect/relation/map_join.py +26 -12
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +124 -25
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +160 -48
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +73 -21
- snowflake/snowpark_connect/type_mapping.py +90 -20
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +24 -4
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -20,7 +20,7 @@ from contextlib import suppress
|
|
|
20
20
|
from decimal import ROUND_HALF_EVEN, ROUND_HALF_UP, Context, Decimal
|
|
21
21
|
from functools import partial, reduce
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import List, Optional
|
|
23
|
+
from typing import List, Optional
|
|
24
24
|
from urllib.parse import quote, unquote
|
|
25
25
|
|
|
26
26
|
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
@@ -66,6 +66,7 @@ from snowflake.snowpark.types import (
|
|
|
66
66
|
TimestampType,
|
|
67
67
|
VariantType,
|
|
68
68
|
YearMonthIntervalType,
|
|
69
|
+
_AnsiIntervalType,
|
|
69
70
|
_FractionalType,
|
|
70
71
|
_IntegralType,
|
|
71
72
|
_NumericType,
|
|
@@ -74,6 +75,7 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
74
75
|
ColumnNameMap,
|
|
75
76
|
set_schema_getter,
|
|
76
77
|
)
|
|
78
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
77
79
|
from snowflake.snowpark_connect.config import (
|
|
78
80
|
get_boolean_session_config_param,
|
|
79
81
|
get_timestamp_type,
|
|
@@ -83,6 +85,8 @@ from snowflake.snowpark_connect.constants import (
|
|
|
83
85
|
DUPLICATE_KEY_FOUND_ERROR_TEMPLATE,
|
|
84
86
|
STRUCTURED_TYPES_ENABLED,
|
|
85
87
|
)
|
|
88
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
89
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
86
90
|
from snowflake.snowpark_connect.expression.function_defaults import (
|
|
87
91
|
inject_function_defaults,
|
|
88
92
|
)
|
|
@@ -146,7 +150,11 @@ from snowflake.snowpark_connect.utils.xxhash64 import (
|
|
|
146
150
|
MAX_UINT64 = 2**64 - 1
|
|
147
151
|
MAX_INT64 = 2**63 - 1
|
|
148
152
|
MIN_INT64 = -(2**63)
|
|
149
|
-
|
|
153
|
+
MAX_32BIT_SIGNED_INT = 2_147_483_647
|
|
154
|
+
|
|
155
|
+
# Interval arithmetic precision limits
|
|
156
|
+
MAX_DAY_TIME_DAYS = 106751991 # Maximum days for day-time intervals
|
|
157
|
+
MAX_10_DIGIT_LIMIT = 1000000000 # 10-digit limit (1 billion) for interval operands
|
|
150
158
|
|
|
151
159
|
NAN, INFINITY = float("nan"), float("inf")
|
|
152
160
|
|
|
@@ -188,7 +196,9 @@ def _validate_numeric_args(
|
|
|
188
196
|
TypeError: If arguments cannot be converted to numeric types
|
|
189
197
|
"""
|
|
190
198
|
if len(typed_args) < 2:
|
|
191
|
-
|
|
199
|
+
exception = ValueError(f"{function_name} requires at least 2 arguments")
|
|
200
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
201
|
+
raise exception
|
|
192
202
|
|
|
193
203
|
modified_args = list(snowpark_args)
|
|
194
204
|
|
|
@@ -204,9 +214,11 @@ def _validate_numeric_args(
|
|
|
204
214
|
# https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala#L204
|
|
205
215
|
modified_args[i] = snowpark_fn.try_cast(snowpark_args[i], DoubleType())
|
|
206
216
|
case _:
|
|
207
|
-
|
|
217
|
+
exception = TypeError(
|
|
208
218
|
f"Data type mismatch: {function_name} requires numeric types, but got {typed_args[0].typ} and {typed_args[1].typ}."
|
|
209
219
|
)
|
|
220
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
221
|
+
raise exception
|
|
210
222
|
|
|
211
223
|
return modified_args
|
|
212
224
|
|
|
@@ -266,6 +278,40 @@ def _coerce_for_comparison(
|
|
|
266
278
|
return left_col, right_col
|
|
267
279
|
|
|
268
280
|
|
|
281
|
+
def _preprocess_not_equals_expression(exp: expressions_proto.Expression) -> str:
|
|
282
|
+
"""
|
|
283
|
+
Transform NOT(col1 = col2) expressions to col1 != col2 for Snowflake compatibility.
|
|
284
|
+
|
|
285
|
+
Snowflake has issues with NOT (col1 = col2) in subqueries, so we rewrite
|
|
286
|
+
not(==(a, b)) to a != b by modifying the protobuf expression early.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
The (potentially modified) function name as a lowercase string.
|
|
290
|
+
"""
|
|
291
|
+
function_name = exp.unresolved_function.function_name.lower()
|
|
292
|
+
|
|
293
|
+
# Snowflake has issues with NOT (col1 = col2) in subqueries.
|
|
294
|
+
# Transform not(==(a, b)) to a!=b by modifying the protobuf early.
|
|
295
|
+
if (
|
|
296
|
+
function_name in ("not", "!")
|
|
297
|
+
and len(exp.unresolved_function.arguments) == 1
|
|
298
|
+
and exp.unresolved_function.arguments[0].WhichOneof("expr_type")
|
|
299
|
+
== "unresolved_function"
|
|
300
|
+
and exp.unresolved_function.arguments[0].unresolved_function.function_name
|
|
301
|
+
== "=="
|
|
302
|
+
):
|
|
303
|
+
inner_eq_func = exp.unresolved_function.arguments[0].unresolved_function
|
|
304
|
+
inner_args = list(inner_eq_func.arguments)
|
|
305
|
+
|
|
306
|
+
exp.unresolved_function.function_name = "!="
|
|
307
|
+
exp.unresolved_function.ClearField("arguments")
|
|
308
|
+
exp.unresolved_function.arguments.extend(inner_args)
|
|
309
|
+
|
|
310
|
+
function_name = "!="
|
|
311
|
+
|
|
312
|
+
return function_name
|
|
313
|
+
|
|
314
|
+
|
|
269
315
|
def map_unresolved_function(
|
|
270
316
|
exp: expressions_proto.Expression,
|
|
271
317
|
column_mapping: ColumnNameMap,
|
|
@@ -294,6 +340,9 @@ def map_unresolved_function(
|
|
|
294
340
|
# Inject default parameters for functions that need them (especially for Scala clients)
|
|
295
341
|
inject_function_defaults(exp.unresolved_function)
|
|
296
342
|
|
|
343
|
+
# Transform NOT(col = col) to col != col for Snowflake compatibility
|
|
344
|
+
function_name = _preprocess_not_equals_expression(exp)
|
|
345
|
+
|
|
297
346
|
def _resolve_args_expressions(exp: expressions_proto.Expression):
|
|
298
347
|
def _resolve_fn_arg(exp):
|
|
299
348
|
with resolving_fun_args():
|
|
@@ -349,7 +398,7 @@ def map_unresolved_function(
|
|
|
349
398
|
function_name = exp.unresolved_function.function_name.lower()
|
|
350
399
|
telemetry.report_function_usage(function_name)
|
|
351
400
|
result_type: Optional[DataType | List[DateType]] = None
|
|
352
|
-
|
|
401
|
+
qualifier_parts: List[str] = []
|
|
353
402
|
|
|
354
403
|
pyspark_func = getattr(pyspark_functions, function_name, None)
|
|
355
404
|
if pyspark_func and pyspark_func.__doc__.lstrip().startswith("Aggregate function:"):
|
|
@@ -407,9 +456,11 @@ def map_unresolved_function(
|
|
|
407
456
|
expected_arity = str(valid_arity)
|
|
408
457
|
|
|
409
458
|
if invalid:
|
|
410
|
-
|
|
459
|
+
exception = AnalysisException(
|
|
411
460
|
f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `{function_name}` requires {expected_arity} parameters but the actual number is {arity}."
|
|
412
461
|
)
|
|
462
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
463
|
+
raise exception
|
|
413
464
|
|
|
414
465
|
def _like_util(column, patterns, mode, negate=False):
|
|
415
466
|
"""
|
|
@@ -422,9 +473,13 @@ def map_unresolved_function(
|
|
|
422
473
|
:return: A Snowpark condition.
|
|
423
474
|
"""
|
|
424
475
|
if len(patterns) == 0:
|
|
425
|
-
|
|
476
|
+
exception = ParseException("Expected something between '(' and ')'")
|
|
477
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SQL_SYNTAX)
|
|
478
|
+
raise exception
|
|
426
479
|
if mode not in ["any", "all"]:
|
|
427
|
-
|
|
480
|
+
exception = ValueError("Mode must be 'any' or 'all'.")
|
|
481
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
|
|
482
|
+
raise exception
|
|
428
483
|
|
|
429
484
|
if mode == "any":
|
|
430
485
|
condition = snowpark_fn.lit(False)
|
|
@@ -446,9 +501,13 @@ def map_unresolved_function(
|
|
|
446
501
|
def _check_percentile_percentage(exp: expressions_proto.Expression) -> Column:
|
|
447
502
|
perc = unwrap_literal(exp)
|
|
448
503
|
if perc is None:
|
|
449
|
-
|
|
504
|
+
exception = AnalysisException("The percentage must not be null.")
|
|
505
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
506
|
+
raise exception
|
|
450
507
|
if not 0.0 <= perc <= 1.0:
|
|
451
|
-
|
|
508
|
+
exception = AnalysisException("The percentage must be between [0.0, 1.0].")
|
|
509
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
510
|
+
raise exception
|
|
452
511
|
return snowpark_fn.lit(perc)
|
|
453
512
|
|
|
454
513
|
def _handle_structured_aggregate_result(
|
|
@@ -497,9 +556,17 @@ def map_unresolved_function(
|
|
|
497
556
|
)
|
|
498
557
|
result_type = [f.datatype for f in udtf.output_schema]
|
|
499
558
|
case "!=":
|
|
500
|
-
|
|
501
|
-
|
|
559
|
+
_check_interval_string_comparison(
|
|
560
|
+
"!=", snowpark_typed_args, snowpark_arg_names
|
|
561
|
+
)
|
|
562
|
+
# Make the function name same as spark connect. a != b translate's to not(a=b)
|
|
563
|
+
spark_function_name = (
|
|
564
|
+
f"(NOT ({snowpark_arg_names[0]} = {snowpark_arg_names[1]}))"
|
|
565
|
+
)
|
|
566
|
+
left, right = _coerce_for_comparison(
|
|
567
|
+
snowpark_typed_args[0], snowpark_typed_args[1]
|
|
502
568
|
)
|
|
569
|
+
result_exp = TypedColumn(left != right, lambda: [BooleanType()])
|
|
503
570
|
case "%" | "mod":
|
|
504
571
|
if spark_sql_ansi_enabled:
|
|
505
572
|
result_exp = snowpark_args[0] % snowpark_args[1]
|
|
@@ -548,9 +615,11 @@ def map_unresolved_function(
|
|
|
548
615
|
result_exp = snowpark_fn.lit(None)
|
|
549
616
|
case (StringType(), StringType()):
|
|
550
617
|
if spark_sql_ansi_enabled:
|
|
551
|
-
|
|
618
|
+
exception = AnalysisException(
|
|
552
619
|
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
|
|
553
620
|
)
|
|
621
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
622
|
+
raise exception
|
|
554
623
|
else:
|
|
555
624
|
result_type = DoubleType()
|
|
556
625
|
result_exp = snowpark_args[0].try_cast(
|
|
@@ -598,12 +667,87 @@ def map_unresolved_function(
|
|
|
598
667
|
result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
|
|
599
668
|
result_type
|
|
600
669
|
)
|
|
601
|
-
case (
|
|
602
|
-
|
|
603
|
-
|
|
670
|
+
case (StringType(), t) | (t, StringType()) if isinstance(
|
|
671
|
+
t, _AnsiIntervalType
|
|
672
|
+
):
|
|
673
|
+
if isinstance(snowpark_typed_args[0].typ, StringType):
|
|
674
|
+
result_type = type(
|
|
675
|
+
t
|
|
676
|
+
)() # YearMonthIntervalType() or DayTimeIntervalType()
|
|
677
|
+
result_exp = snowpark_args[1] * snowpark_args[0].try_cast(
|
|
678
|
+
LongType()
|
|
679
|
+
)
|
|
680
|
+
spark_function_name = (
|
|
681
|
+
f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
|
|
682
|
+
)
|
|
683
|
+
else:
|
|
684
|
+
result_type = type(
|
|
685
|
+
t
|
|
686
|
+
)() # YearMonthIntervalType() or DayTimeIntervalType()
|
|
687
|
+
result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
|
|
688
|
+
LongType()
|
|
689
|
+
)
|
|
690
|
+
spark_function_name = (
|
|
691
|
+
f"({snowpark_arg_names[0]} * {snowpark_arg_names[1]})"
|
|
692
|
+
)
|
|
693
|
+
case (
|
|
694
|
+
(_NumericType() as t, NullType())
|
|
695
|
+
| (NullType(), _NumericType() as t)
|
|
604
696
|
):
|
|
605
697
|
result_type = t
|
|
606
698
|
result_exp = snowpark_fn.lit(None)
|
|
699
|
+
case (NullType(), t) | (t, NullType()) if isinstance(
|
|
700
|
+
t, _AnsiIntervalType
|
|
701
|
+
):
|
|
702
|
+
result_type = (
|
|
703
|
+
YearMonthIntervalType()
|
|
704
|
+
if isinstance(t, YearMonthIntervalType)
|
|
705
|
+
else DayTimeIntervalType()
|
|
706
|
+
)
|
|
707
|
+
result_exp = snowpark_fn.lit(None)
|
|
708
|
+
if isinstance(snowpark_typed_args[0].typ, NullType):
|
|
709
|
+
spark_function_name = (
|
|
710
|
+
f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
|
|
711
|
+
)
|
|
712
|
+
else:
|
|
713
|
+
spark_function_name = (
|
|
714
|
+
f"({snowpark_arg_names[0]} * {snowpark_arg_names[1]})"
|
|
715
|
+
)
|
|
716
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
717
|
+
t, _AnsiIntervalType
|
|
718
|
+
):
|
|
719
|
+
result_type = (
|
|
720
|
+
YearMonthIntervalType()
|
|
721
|
+
if isinstance(t, YearMonthIntervalType)
|
|
722
|
+
else DayTimeIntervalType()
|
|
723
|
+
)
|
|
724
|
+
if isinstance(snowpark_typed_args[0].typ, DecimalType):
|
|
725
|
+
result_exp = snowpark_args[1] * snowpark_args[0]
|
|
726
|
+
spark_function_name = (
|
|
727
|
+
f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
|
|
728
|
+
)
|
|
729
|
+
else:
|
|
730
|
+
result_exp = snowpark_args[0] * snowpark_args[1]
|
|
731
|
+
spark_function_name = (
|
|
732
|
+
f"({snowpark_arg_names[0]} * {snowpark_arg_names[1]})"
|
|
733
|
+
)
|
|
734
|
+
case (t, _NumericType()) if isinstance(t, _AnsiIntervalType):
|
|
735
|
+
result_type = (
|
|
736
|
+
YearMonthIntervalType()
|
|
737
|
+
if isinstance(t, YearMonthIntervalType)
|
|
738
|
+
else DayTimeIntervalType()
|
|
739
|
+
)
|
|
740
|
+
result_exp = snowpark_args[0] * snowpark_args[1]
|
|
741
|
+
case (_NumericType(), t) if isinstance(t, _AnsiIntervalType):
|
|
742
|
+
result_type = (
|
|
743
|
+
YearMonthIntervalType()
|
|
744
|
+
if isinstance(t, YearMonthIntervalType)
|
|
745
|
+
else DayTimeIntervalType()
|
|
746
|
+
)
|
|
747
|
+
result_exp = snowpark_args[1] * snowpark_args[0]
|
|
748
|
+
spark_function_name = (
|
|
749
|
+
f"({snowpark_arg_names[1]} * {snowpark_arg_names[0]})"
|
|
750
|
+
)
|
|
607
751
|
case (_NumericType(), _NumericType()):
|
|
608
752
|
result_type = _find_common_type(
|
|
609
753
|
[arg.typ for arg in snowpark_typed_args]
|
|
@@ -612,9 +756,11 @@ def map_unresolved_function(
|
|
|
612
756
|
1
|
|
613
757
|
].cast(result_type)
|
|
614
758
|
case _:
|
|
615
|
-
|
|
759
|
+
exception = AnalysisException(
|
|
616
760
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
617
761
|
)
|
|
762
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
763
|
+
raise exception
|
|
618
764
|
case "+":
|
|
619
765
|
spark_function_name = _get_spark_function_name(
|
|
620
766
|
snowpark_typed_args[0],
|
|
@@ -642,7 +788,14 @@ def map_unresolved_function(
|
|
|
642
788
|
result_type = DateType()
|
|
643
789
|
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
644
790
|
elif isinstance(t, (DayTimeIntervalType, YearMonthIntervalType)):
|
|
645
|
-
result_type =
|
|
791
|
+
result_type = (
|
|
792
|
+
TimestampType()
|
|
793
|
+
if isinstance(
|
|
794
|
+
snowpark_typed_args[t_param_index].typ,
|
|
795
|
+
DayTimeIntervalType,
|
|
796
|
+
)
|
|
797
|
+
else DateType()
|
|
798
|
+
)
|
|
646
799
|
result_exp = (
|
|
647
800
|
snowpark_args[date_param_index]
|
|
648
801
|
+ snowpark_args[t_param_index]
|
|
@@ -660,14 +813,47 @@ def map_unresolved_function(
|
|
|
660
813
|
+ snowpark_args[t_param_index]
|
|
661
814
|
)
|
|
662
815
|
else:
|
|
663
|
-
|
|
816
|
+
exception = AnalysisException(
|
|
664
817
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[t_param_index]}" has the type "{t}".',
|
|
665
818
|
)
|
|
819
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
820
|
+
raise exception
|
|
821
|
+
case (TimestampType(), t) | (t, TimestampType()):
|
|
822
|
+
timestamp_param_index = (
|
|
823
|
+
0
|
|
824
|
+
if isinstance(snowpark_typed_args[0].typ, TimestampType)
|
|
825
|
+
else 1
|
|
826
|
+
)
|
|
827
|
+
t_param_index = 1 - timestamp_param_index
|
|
828
|
+
if isinstance(t, (DayTimeIntervalType, YearMonthIntervalType)):
|
|
829
|
+
result_type = TimestampType()
|
|
830
|
+
result_exp = (
|
|
831
|
+
snowpark_args[timestamp_param_index]
|
|
832
|
+
+ snowpark_args[t_param_index]
|
|
833
|
+
)
|
|
834
|
+
elif (
|
|
835
|
+
hasattr(
|
|
836
|
+
snowpark_typed_args[t_param_index].col._expr1, "pretty_name"
|
|
837
|
+
)
|
|
838
|
+
and "INTERVAL"
|
|
839
|
+
== snowpark_typed_args[t_param_index].col._expr1.pretty_name
|
|
840
|
+
):
|
|
841
|
+
result_type = TimestampType()
|
|
842
|
+
result_exp = (
|
|
843
|
+
snowpark_args[timestamp_param_index]
|
|
844
|
+
+ snowpark_args[t_param_index]
|
|
845
|
+
)
|
|
846
|
+
else:
|
|
847
|
+
raise AnalysisException(
|
|
848
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INTERVAL") type for timestamp operations, however "{snowpark_arg_names[t_param_index]}" has the type "{t}".',
|
|
849
|
+
)
|
|
666
850
|
case (StringType(), StringType()):
|
|
667
851
|
if spark_sql_ansi_enabled:
|
|
668
|
-
|
|
852
|
+
exception = AnalysisException(
|
|
669
853
|
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
|
|
670
854
|
)
|
|
855
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
856
|
+
raise exception
|
|
671
857
|
else:
|
|
672
858
|
result_type = DoubleType()
|
|
673
859
|
result_exp = snowpark_fn.try_cast(
|
|
@@ -707,9 +893,91 @@ def map_unresolved_function(
|
|
|
707
893
|
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
708
894
|
t, (BinaryType, TimestampType)
|
|
709
895
|
):
|
|
710
|
-
|
|
896
|
+
exception = AnalysisException(
|
|
711
897
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
712
898
|
)
|
|
899
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
900
|
+
raise exception
|
|
901
|
+
case (t1, t2) | (t2, t1) if isinstance(
|
|
902
|
+
t1, _AnsiIntervalType
|
|
903
|
+
) and isinstance(t2, _AnsiIntervalType) and type(t1) == type(t2):
|
|
904
|
+
# Both operands are the same interval type
|
|
905
|
+
result_type = type(t1)(
|
|
906
|
+
min(t1.start_field, t2.start_field),
|
|
907
|
+
max(t1.end_field, t2.end_field),
|
|
908
|
+
)
|
|
909
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
910
|
+
case (StringType(), t) | (t, StringType()) if isinstance(
|
|
911
|
+
t, YearMonthIntervalType
|
|
912
|
+
):
|
|
913
|
+
# String + YearMonthInterval: Spark tries to cast string to double first, throws error if it fails
|
|
914
|
+
result_type = StringType()
|
|
915
|
+
if isinstance(snowpark_typed_args[0].typ, StringType):
|
|
916
|
+
result_exp = (
|
|
917
|
+
snowpark_fn.cast(snowpark_args[0], "double")
|
|
918
|
+
+ snowpark_args[1]
|
|
919
|
+
)
|
|
920
|
+
else:
|
|
921
|
+
result_exp = snowpark_args[0] + snowpark_fn.cast(
|
|
922
|
+
snowpark_args[1], "double"
|
|
923
|
+
)
|
|
924
|
+
case (StringType(), t) | (t, StringType()) if isinstance(
|
|
925
|
+
t, DayTimeIntervalType
|
|
926
|
+
):
|
|
927
|
+
# String + DayTimeInterval: try to parse string as timestamp, return NULL if it fails
|
|
928
|
+
# For time-only strings (like '10:00:00'), prepend current date to make it a full timestamp
|
|
929
|
+
result_type = StringType()
|
|
930
|
+
if isinstance(snowpark_typed_args[0].typ, StringType):
|
|
931
|
+
# Check if string looks like time-only (HH:MM:SS or HH:MM pattern)
|
|
932
|
+
# If so, prepend current date; otherwise use as-is
|
|
933
|
+
time_only_pattern = snowpark_fn.function("regexp_like")(
|
|
934
|
+
snowpark_args[0], r"^\d{1,2}:\d{2}(:\d{2})?$"
|
|
935
|
+
)
|
|
936
|
+
timestamp_expr = snowpark_fn.when(
|
|
937
|
+
time_only_pattern,
|
|
938
|
+
snowpark_fn.function("try_to_timestamp_ntz")(
|
|
939
|
+
snowpark_fn.function("concat")(
|
|
940
|
+
snowpark_fn.function("to_char")(
|
|
941
|
+
snowpark_fn.function("current_date")(),
|
|
942
|
+
"YYYY-MM-DD",
|
|
943
|
+
),
|
|
944
|
+
snowpark_fn.lit(" "),
|
|
945
|
+
snowpark_args[0],
|
|
946
|
+
)
|
|
947
|
+
),
|
|
948
|
+
).otherwise(
|
|
949
|
+
snowpark_fn.function("try_to_timestamp_ntz")(
|
|
950
|
+
snowpark_args[0]
|
|
951
|
+
)
|
|
952
|
+
)
|
|
953
|
+
result_exp = timestamp_expr + snowpark_args[1]
|
|
954
|
+
else:
|
|
955
|
+
# interval + string case
|
|
956
|
+
time_only_pattern = snowpark_fn.function("regexp_like")(
|
|
957
|
+
snowpark_args[1], r"^\d{1,2}:\d{2}(:\d{2})?$"
|
|
958
|
+
)
|
|
959
|
+
timestamp_expr = snowpark_fn.when(
|
|
960
|
+
time_only_pattern,
|
|
961
|
+
snowpark_fn.function("try_to_timestamp_ntz")(
|
|
962
|
+
snowpark_fn.function("concat")(
|
|
963
|
+
snowpark_fn.function("to_char")(
|
|
964
|
+
snowpark_fn.function("current_date")(),
|
|
965
|
+
"'YYYY-MM-DD'",
|
|
966
|
+
),
|
|
967
|
+
snowpark_fn.lit(" "),
|
|
968
|
+
snowpark_args[1],
|
|
969
|
+
)
|
|
970
|
+
),
|
|
971
|
+
).otherwise(
|
|
972
|
+
snowpark_fn.function("try_to_timestamp_ntz")(
|
|
973
|
+
snowpark_args[1]
|
|
974
|
+
)
|
|
975
|
+
)
|
|
976
|
+
result_exp = snowpark_args[0] + timestamp_expr
|
|
977
|
+
spark_function_name = (
|
|
978
|
+
f"{snowpark_arg_names[0]} + {snowpark_arg_names[1]}"
|
|
979
|
+
)
|
|
980
|
+
|
|
713
981
|
case _:
|
|
714
982
|
result_type, overflow_possible = _get_add_sub_result_type(
|
|
715
983
|
snowpark_typed_args[0].typ,
|
|
@@ -755,7 +1023,11 @@ def map_unresolved_function(
|
|
|
755
1023
|
DateType(),
|
|
756
1024
|
YearMonthIntervalType(),
|
|
757
1025
|
):
|
|
758
|
-
result_type =
|
|
1026
|
+
result_type = (
|
|
1027
|
+
TimestampType()
|
|
1028
|
+
if isinstance(snowpark_typed_args[1].typ, DayTimeIntervalType)
|
|
1029
|
+
else DateType()
|
|
1030
|
+
)
|
|
759
1031
|
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
760
1032
|
case (DateType(), StringType()):
|
|
761
1033
|
if (
|
|
@@ -773,6 +1045,23 @@ def map_unresolved_function(
|
|
|
773
1045
|
result_exp = snowpark_args[0] - snowpark_args[1].cast(
|
|
774
1046
|
input_type
|
|
775
1047
|
)
|
|
1048
|
+
case (TimestampType(), DayTimeIntervalType()) | (
|
|
1049
|
+
TimestampType(),
|
|
1050
|
+
YearMonthIntervalType(),
|
|
1051
|
+
):
|
|
1052
|
+
result_type = TimestampType()
|
|
1053
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
1054
|
+
case (TimestampType(), StringType()):
|
|
1055
|
+
if (
|
|
1056
|
+
hasattr(snowpark_typed_args[1].col._expr1, "pretty_name")
|
|
1057
|
+
and "INTERVAL" == snowpark_typed_args[1].col._expr1.pretty_name
|
|
1058
|
+
):
|
|
1059
|
+
result_type = TimestampType()
|
|
1060
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
1061
|
+
else:
|
|
1062
|
+
raise AnalysisException(
|
|
1063
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INTERVAL") type for timestamp operations, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
|
|
1064
|
+
)
|
|
776
1065
|
case (StringType(), DateType()):
|
|
777
1066
|
# TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
|
|
778
1067
|
result_type = LongType()
|
|
@@ -782,18 +1071,24 @@ def map_unresolved_function(
|
|
|
782
1071
|
result_type = DateType()
|
|
783
1072
|
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
784
1073
|
case (DateType(), _):
|
|
785
|
-
|
|
1074
|
+
exception = AnalysisException(
|
|
786
1075
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
|
|
787
1076
|
)
|
|
1077
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1078
|
+
raise exception
|
|
788
1079
|
case (_, DateType()):
|
|
789
|
-
|
|
1080
|
+
exception = AnalysisException(
|
|
790
1081
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "DATE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".',
|
|
791
1082
|
)
|
|
1083
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1084
|
+
raise exception
|
|
792
1085
|
case (StringType(), StringType()):
|
|
793
1086
|
if spark_sql_ansi_enabled:
|
|
794
|
-
|
|
1087
|
+
exception = AnalysisException(
|
|
795
1088
|
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
|
|
796
1089
|
)
|
|
1090
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1091
|
+
raise exception
|
|
797
1092
|
else:
|
|
798
1093
|
result_type = DoubleType()
|
|
799
1094
|
result_exp = snowpark_fn.try_cast(
|
|
@@ -833,9 +1128,21 @@ def map_unresolved_function(
|
|
|
833
1128
|
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
834
1129
|
t, (BinaryType, TimestampType)
|
|
835
1130
|
):
|
|
836
|
-
|
|
1131
|
+
exception = AnalysisException(
|
|
837
1132
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
838
1133
|
)
|
|
1134
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1135
|
+
raise exception
|
|
1136
|
+
case (StringType(), t) if isinstance(t, _AnsiIntervalType):
|
|
1137
|
+
# String - Interval: try to parse string as timestamp, return NULL if it fails
|
|
1138
|
+
result_type = StringType()
|
|
1139
|
+
result_exp = (
|
|
1140
|
+
snowpark_fn.function("try_to_timestamp")(snowpark_args[0])
|
|
1141
|
+
- snowpark_args[1]
|
|
1142
|
+
)
|
|
1143
|
+
spark_function_name = (
|
|
1144
|
+
f"{snowpark_arg_names[0]} - {snowpark_arg_names[1]}"
|
|
1145
|
+
)
|
|
839
1146
|
case _:
|
|
840
1147
|
result_type, overflow_possible = _get_add_sub_result_type(
|
|
841
1148
|
snowpark_typed_args[0].typ,
|
|
@@ -879,9 +1186,11 @@ def map_unresolved_function(
|
|
|
879
1186
|
result_exp = snowpark_fn.lit(None)
|
|
880
1187
|
case (StringType(), StringType()):
|
|
881
1188
|
if spark_sql_ansi_enabled:
|
|
882
|
-
|
|
1189
|
+
exception = AnalysisException(
|
|
883
1190
|
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
|
|
884
1191
|
)
|
|
1192
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1193
|
+
raise exception
|
|
885
1194
|
else:
|
|
886
1195
|
result_type = DoubleType()
|
|
887
1196
|
result_exp = _divnull(
|
|
@@ -932,9 +1241,57 @@ def map_unresolved_function(
|
|
|
932
1241
|
result_exp = _divnull(
|
|
933
1242
|
snowpark_args[0], snowpark_args[1].try_cast(result_type)
|
|
934
1243
|
)
|
|
1244
|
+
case (t, StringType()) if isinstance(t, _AnsiIntervalType):
|
|
1245
|
+
result_type = (
|
|
1246
|
+
YearMonthIntervalType()
|
|
1247
|
+
if isinstance(t, YearMonthIntervalType)
|
|
1248
|
+
else DayTimeIntervalType()
|
|
1249
|
+
)
|
|
1250
|
+
result_exp = snowpark_args[0] / snowpark_args[1].try_cast(
|
|
1251
|
+
LongType()
|
|
1252
|
+
)
|
|
1253
|
+
spark_function_name = (
|
|
1254
|
+
f"({snowpark_arg_names[0]} / {snowpark_arg_names[1]})"
|
|
1255
|
+
)
|
|
935
1256
|
case (_NumericType(), NullType()) | (NullType(), _NumericType()):
|
|
936
1257
|
result_type = DoubleType()
|
|
937
1258
|
result_exp = snowpark_fn.lit(None)
|
|
1259
|
+
case (t, NullType()) if isinstance(t, _AnsiIntervalType):
|
|
1260
|
+
# Only allow interval / null, not null / interval
|
|
1261
|
+
result_type = (
|
|
1262
|
+
YearMonthIntervalType()
|
|
1263
|
+
if isinstance(t, YearMonthIntervalType)
|
|
1264
|
+
else DayTimeIntervalType()
|
|
1265
|
+
)
|
|
1266
|
+
result_exp = snowpark_fn.lit(None)
|
|
1267
|
+
spark_function_name = (
|
|
1268
|
+
f"({snowpark_arg_names[0]} / {snowpark_arg_names[1]})"
|
|
1269
|
+
)
|
|
1270
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
1271
|
+
t, _AnsiIntervalType
|
|
1272
|
+
):
|
|
1273
|
+
result_type = (
|
|
1274
|
+
YearMonthIntervalType()
|
|
1275
|
+
if isinstance(t, YearMonthIntervalType)
|
|
1276
|
+
else DayTimeIntervalType()
|
|
1277
|
+
)
|
|
1278
|
+
if isinstance(snowpark_typed_args[0].typ, DecimalType):
|
|
1279
|
+
result_exp = snowpark_args[1] / snowpark_args[0]
|
|
1280
|
+
spark_function_name = (
|
|
1281
|
+
f"({snowpark_arg_names[1]} / {snowpark_arg_names[0]})"
|
|
1282
|
+
)
|
|
1283
|
+
else:
|
|
1284
|
+
result_exp = snowpark_args[0] / snowpark_args[1]
|
|
1285
|
+
spark_function_name = (
|
|
1286
|
+
f"({snowpark_arg_names[0]} / {snowpark_arg_names[1]})"
|
|
1287
|
+
)
|
|
1288
|
+
case (t, _NumericType()) if isinstance(t, _AnsiIntervalType):
|
|
1289
|
+
result_type = (
|
|
1290
|
+
YearMonthIntervalType()
|
|
1291
|
+
if isinstance(t, YearMonthIntervalType)
|
|
1292
|
+
else DayTimeIntervalType()
|
|
1293
|
+
)
|
|
1294
|
+
result_exp = snowpark_args[0] / snowpark_args[1]
|
|
938
1295
|
case (_NumericType(), _NumericType()):
|
|
939
1296
|
result_type = DoubleType()
|
|
940
1297
|
result_exp = _divnull(
|
|
@@ -942,9 +1299,11 @@ def map_unresolved_function(
|
|
|
942
1299
|
snowpark_args[1].cast(result_type),
|
|
943
1300
|
)
|
|
944
1301
|
case _:
|
|
945
|
-
|
|
1302
|
+
exception = AnalysisException(
|
|
946
1303
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
947
1304
|
)
|
|
1305
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1306
|
+
raise exception
|
|
948
1307
|
case "~":
|
|
949
1308
|
result_exp = TypedColumn(
|
|
950
1309
|
snowpark_fn.bitnot(snowpark_args[0]),
|
|
@@ -958,9 +1317,11 @@ def map_unresolved_function(
|
|
|
958
1317
|
or isinstance(snowpark_typed_args[0].typ, BooleanType)
|
|
959
1318
|
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
960
1319
|
):
|
|
961
|
-
|
|
1320
|
+
exception = AnalysisException(
|
|
962
1321
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} < {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
|
|
963
1322
|
)
|
|
1323
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1324
|
+
raise exception
|
|
964
1325
|
# Check for interval-string comparisons
|
|
965
1326
|
_check_interval_string_comparison(
|
|
966
1327
|
"<", snowpark_typed_args, snowpark_arg_names
|
|
@@ -976,9 +1337,11 @@ def map_unresolved_function(
|
|
|
976
1337
|
or isinstance(snowpark_typed_args[0].typ, BooleanType)
|
|
977
1338
|
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
978
1339
|
):
|
|
979
|
-
|
|
1340
|
+
exception = AnalysisException(
|
|
980
1341
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} <= {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
|
|
981
1342
|
)
|
|
1343
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1344
|
+
raise exception
|
|
982
1345
|
# Check for interval-string comparisons
|
|
983
1346
|
_check_interval_string_comparison(
|
|
984
1347
|
"<=", snowpark_typed_args, snowpark_arg_names
|
|
@@ -1017,9 +1380,11 @@ def map_unresolved_function(
|
|
|
1017
1380
|
or isinstance(snowpark_typed_args[0].typ, BooleanType)
|
|
1018
1381
|
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
1019
1382
|
):
|
|
1020
|
-
|
|
1383
|
+
exception = AnalysisException(
|
|
1021
1384
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} > {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
|
|
1022
1385
|
)
|
|
1386
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1387
|
+
raise exception
|
|
1023
1388
|
# Check for interval-string comparisons
|
|
1024
1389
|
_check_interval_string_comparison(
|
|
1025
1390
|
">", snowpark_typed_args, snowpark_arg_names
|
|
@@ -1035,9 +1400,11 @@ def map_unresolved_function(
|
|
|
1035
1400
|
or isinstance(snowpark_typed_args[0].typ, BooleanType)
|
|
1036
1401
|
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
1037
1402
|
):
|
|
1038
|
-
|
|
1403
|
+
exception = AnalysisException(
|
|
1039
1404
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{snowpark_arg_names[0]} >= {snowpark_arg_names[1]}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").;'
|
|
1040
1405
|
)
|
|
1406
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1407
|
+
raise exception
|
|
1041
1408
|
# Check for interval-string comparisons
|
|
1042
1409
|
_check_interval_string_comparison(
|
|
1043
1410
|
">=", snowpark_typed_args, snowpark_arg_names
|
|
@@ -1134,9 +1501,11 @@ def map_unresolved_function(
|
|
|
1134
1501
|
)
|
|
1135
1502
|
case "any":
|
|
1136
1503
|
if not isinstance(snowpark_typed_args[0].typ, (BooleanType, NullType)):
|
|
1137
|
-
|
|
1504
|
+
exception = AnalysisException(
|
|
1138
1505
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "BOOLEAN" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}".'
|
|
1139
1506
|
)
|
|
1507
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1508
|
+
raise exception
|
|
1140
1509
|
result_exp = TypedColumn(
|
|
1141
1510
|
snowpark_fn.max(snowpark_args[0]),
|
|
1142
1511
|
lambda: [BooleanType()],
|
|
@@ -1151,9 +1520,13 @@ def map_unresolved_function(
|
|
|
1151
1520
|
case [col]:
|
|
1152
1521
|
result_exp = snowpark_fn.any_value(col)
|
|
1153
1522
|
case _:
|
|
1154
|
-
|
|
1523
|
+
exception = ValueError(
|
|
1155
1524
|
f"Unexpected number of args for function any_value. Expected 1 or 2, received {len(snowpark_args)}"
|
|
1156
1525
|
)
|
|
1526
|
+
attach_custom_error_code(
|
|
1527
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
1528
|
+
)
|
|
1529
|
+
raise exception
|
|
1157
1530
|
|
|
1158
1531
|
spark_function_name = f"any_value({snowpark_arg_names[0]})"
|
|
1159
1532
|
result_exp = _type_with_typer(result_exp)
|
|
@@ -1165,9 +1538,13 @@ def map_unresolved_function(
|
|
|
1165
1538
|
lambda: [LongType()],
|
|
1166
1539
|
)
|
|
1167
1540
|
case [_, _]:
|
|
1168
|
-
|
|
1541
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1169
1542
|
"'rsd' parameter is not supported"
|
|
1170
1543
|
)
|
|
1544
|
+
attach_custom_error_code(
|
|
1545
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
1546
|
+
)
|
|
1547
|
+
raise exception
|
|
1171
1548
|
case "approx_percentile" | "percentile_approx":
|
|
1172
1549
|
# SNOW-1955784: Support accuracy parameter
|
|
1173
1550
|
# Use percentile_disc to return actual values from dataset (matches PySpark behavior)
|
|
@@ -1184,7 +1561,11 @@ def map_unresolved_function(
|
|
|
1184
1561
|
# Even though the Spark function accepts a Column for percentage, it will fail unless it's a literal.
|
|
1185
1562
|
# Therefore, we can do error checking right here.
|
|
1186
1563
|
if not 0.0 <= percentage <= 1.0:
|
|
1187
|
-
|
|
1564
|
+
exception = AnalysisException(
|
|
1565
|
+
"percentage must be between [0.0, 1.0]"
|
|
1566
|
+
)
|
|
1567
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
1568
|
+
raise exception
|
|
1188
1569
|
|
|
1189
1570
|
result = snowpark_fn.function("percentile_disc")(
|
|
1190
1571
|
snowpark_fn.lit(percentage)
|
|
@@ -1251,9 +1632,11 @@ def map_unresolved_function(
|
|
|
1251
1632
|
case "array_contains":
|
|
1252
1633
|
array_type = snowpark_typed_args[0].typ
|
|
1253
1634
|
if not isinstance(array_type, ArrayType):
|
|
1254
|
-
|
|
1635
|
+
exception = AnalysisException(
|
|
1255
1636
|
f"Expected argument '{snowpark_arg_names[0]}' to have an ArrayType."
|
|
1256
1637
|
)
|
|
1638
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1639
|
+
raise exception
|
|
1257
1640
|
|
|
1258
1641
|
def _compatible_types(type1: DataType, type2: DataType) -> bool:
|
|
1259
1642
|
if type1 == type2:
|
|
@@ -1273,9 +1656,11 @@ def map_unresolved_function(
|
|
|
1273
1656
|
if not _compatible_types(
|
|
1274
1657
|
array_type.element_type, snowpark_typed_args[1].typ
|
|
1275
1658
|
):
|
|
1276
|
-
|
|
1659
|
+
exception = AnalysisException(
|
|
1277
1660
|
'[DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES] Cannot resolve "array_contains(arr, val)" due to data type mismatch: Input to `array_contains` should have been "ARRAY" followed by a value with same element type'
|
|
1278
1661
|
)
|
|
1662
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1663
|
+
raise exception
|
|
1279
1664
|
value = (
|
|
1280
1665
|
snowpark_fn.cast(snowpark_args[1], array_type.element_type)
|
|
1281
1666
|
if array_type.structured
|
|
@@ -1345,7 +1730,13 @@ def map_unresolved_function(
|
|
|
1345
1730
|
)
|
|
1346
1731
|
result_exp = snowpark_fn.array_to_string(data, delimiter)
|
|
1347
1732
|
case _:
|
|
1348
|
-
|
|
1733
|
+
exception = ValueError(
|
|
1734
|
+
f"Invalid number of arguments to {function_name}"
|
|
1735
|
+
)
|
|
1736
|
+
attach_custom_error_code(
|
|
1737
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
1738
|
+
)
|
|
1739
|
+
raise exception
|
|
1349
1740
|
result_exp = TypedColumn(result_exp, lambda: [StringType()])
|
|
1350
1741
|
case "array_max":
|
|
1351
1742
|
result_exp = TypedColumn(
|
|
@@ -1437,9 +1828,11 @@ def map_unresolved_function(
|
|
|
1437
1828
|
case "array_size":
|
|
1438
1829
|
array_type = snowpark_typed_args[0].typ
|
|
1439
1830
|
if not isinstance(array_type, ArrayType):
|
|
1440
|
-
|
|
1831
|
+
exception = AnalysisException(
|
|
1441
1832
|
f"Expected argument '{snowpark_arg_names[0]}' to have an ArrayType."
|
|
1442
1833
|
)
|
|
1834
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1835
|
+
raise exception
|
|
1443
1836
|
result_exp = TypedColumn(
|
|
1444
1837
|
snowpark_fn.array_size(*snowpark_args), lambda: [LongType()]
|
|
1445
1838
|
)
|
|
@@ -1450,9 +1843,11 @@ def map_unresolved_function(
|
|
|
1450
1843
|
snowpark_fn.size(*snowpark_args), lambda: [LongType()]
|
|
1451
1844
|
)
|
|
1452
1845
|
else:
|
|
1453
|
-
|
|
1846
|
+
exception = AnalysisException(
|
|
1454
1847
|
f"Expected argument '{snowpark_arg_names[0]}' to have an ArrayType or MapType, but got {arg_type.simpleString()}."
|
|
1455
1848
|
)
|
|
1849
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1850
|
+
raise exception
|
|
1456
1851
|
case "array_sort":
|
|
1457
1852
|
result_exp = TypedColumn(
|
|
1458
1853
|
snowpark_fn.array_sort(*snowpark_args),
|
|
@@ -1538,9 +1933,13 @@ def map_unresolved_function(
|
|
|
1538
1933
|
expr, snowpark_fn.lit(None)
|
|
1539
1934
|
).otherwise(raise_error(snowpark_fn.cast(message, StringType())))
|
|
1540
1935
|
case _:
|
|
1541
|
-
|
|
1936
|
+
exception = AnalysisException(
|
|
1542
1937
|
f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `assert_true` requires 1 or 2 parameters but the actual number is {len(snowpark_args)}."
|
|
1543
1938
|
)
|
|
1939
|
+
attach_custom_error_code(
|
|
1940
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
1941
|
+
)
|
|
1942
|
+
raise exception
|
|
1544
1943
|
case "atan":
|
|
1545
1944
|
spark_function_name = f"ATAN({snowpark_arg_names[0]})"
|
|
1546
1945
|
result_exp = TypedColumn(
|
|
@@ -1578,9 +1977,11 @@ def map_unresolved_function(
|
|
|
1578
1977
|
# Validate that input is StringType or BinaryType
|
|
1579
1978
|
input_type = snowpark_typed_args[0].typ
|
|
1580
1979
|
if not isinstance(input_type, (StringType, BinaryType)):
|
|
1581
|
-
|
|
1980
|
+
exception = AnalysisException(
|
|
1582
1981
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "base64({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "BINARY" type, however "{snowpark_arg_names[0]}" has the type "{input_type.simpleString().upper()}".'
|
|
1583
1982
|
)
|
|
1983
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
1984
|
+
raise exception
|
|
1584
1985
|
|
|
1585
1986
|
base64_encoding_function = snowpark_fn.function("base64_encode")
|
|
1586
1987
|
result_exp = TypedColumn(
|
|
@@ -1613,9 +2014,11 @@ def map_unresolved_function(
|
|
|
1613
2014
|
if not isinstance(
|
|
1614
2015
|
snowpark_typed_args[0].typ, (_IntegralType, BooleanType, NullType)
|
|
1615
2016
|
):
|
|
1616
|
-
|
|
2017
|
+
exception = AnalysisException(
|
|
1617
2018
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INTEGRAL" or "BOOLEAN") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}"'
|
|
1618
2019
|
)
|
|
2020
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2021
|
+
raise exception
|
|
1619
2022
|
|
|
1620
2023
|
@cached_udf(
|
|
1621
2024
|
input_types=[VariantType()],
|
|
@@ -1780,9 +2183,11 @@ def map_unresolved_function(
|
|
|
1780
2183
|
result_type = BinaryType()
|
|
1781
2184
|
case "bool_and" | "every":
|
|
1782
2185
|
if not isinstance(snowpark_typed_args[0].typ, (BooleanType, NullType)):
|
|
1783
|
-
|
|
2186
|
+
exception = AnalysisException(
|
|
1784
2187
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the \'BOOLEAN\' type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}".'
|
|
1785
2188
|
)
|
|
2189
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2190
|
+
raise exception
|
|
1786
2191
|
bool_and_agg_function = snowpark_fn.function("booland_agg")
|
|
1787
2192
|
result_exp = TypedColumn(
|
|
1788
2193
|
bool_and_agg_function(*snowpark_args), lambda: [BooleanType()]
|
|
@@ -1790,9 +2195,11 @@ def map_unresolved_function(
|
|
|
1790
2195
|
|
|
1791
2196
|
case "bool_or" | "some":
|
|
1792
2197
|
if not isinstance(snowpark_typed_args[0].typ, (BooleanType, NullType)):
|
|
1793
|
-
|
|
2198
|
+
exception = AnalysisException(
|
|
1794
2199
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "BOOLEAN" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}".'
|
|
1795
2200
|
)
|
|
2201
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2202
|
+
raise exception
|
|
1796
2203
|
bool_or_agg_function = snowpark_fn.function("boolor_agg")
|
|
1797
2204
|
result_exp = TypedColumn(
|
|
1798
2205
|
bool_or_agg_function(*snowpark_args), lambda: [BooleanType()]
|
|
@@ -1825,9 +2232,11 @@ def map_unresolved_function(
|
|
|
1825
2232
|
),
|
|
1826
2233
|
snowpark_typed_args[0].typ,
|
|
1827
2234
|
):
|
|
1828
|
-
|
|
2235
|
+
exception = ArithmeticException(
|
|
1829
2236
|
'[ARITHMETIC_OVERFLOW] Overflow. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
|
|
1830
2237
|
)
|
|
2238
|
+
attach_custom_error_code(exception, ErrorCodes.ARITHMETIC_ERROR)
|
|
2239
|
+
raise exception
|
|
1831
2240
|
|
|
1832
2241
|
match snowpark_typed_args[0].typ:
|
|
1833
2242
|
case DecimalType():
|
|
@@ -1892,9 +2301,11 @@ def map_unresolved_function(
|
|
|
1892
2301
|
if not isinstance(
|
|
1893
2302
|
snowpark_typed_args[1].typ, IntegerType
|
|
1894
2303
|
) and not isinstance(snowpark_typed_args[1].typ, LongType):
|
|
1895
|
-
|
|
2304
|
+
exception = AnalysisException(
|
|
1896
2305
|
f"The 'scale' parameter of function '{function_name}' needs to be a int literal."
|
|
1897
2306
|
)
|
|
2307
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
2308
|
+
raise exception
|
|
1898
2309
|
spark_function_name = (
|
|
1899
2310
|
f"{fn_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]})"
|
|
1900
2311
|
)
|
|
@@ -1911,9 +2322,13 @@ def map_unresolved_function(
|
|
|
1911
2322
|
else:
|
|
1912
2323
|
result_exp = TypedColumn(result_exp, lambda: [result_type])
|
|
1913
2324
|
else:
|
|
1914
|
-
|
|
2325
|
+
exception = AnalysisException(
|
|
1915
2326
|
f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `{function_name}` requires 2 parameters but the actual number is {len(snowpark_args)}."
|
|
1916
2327
|
)
|
|
2328
|
+
attach_custom_error_code(
|
|
2329
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
2330
|
+
)
|
|
2331
|
+
raise exception
|
|
1917
2332
|
case "chr" | "char":
|
|
1918
2333
|
result_exp = snowpark_fn.when(
|
|
1919
2334
|
(snowpark_args[0] > 256), snowpark_fn.char(snowpark_args[0] % 256)
|
|
@@ -1933,11 +2348,6 @@ def map_unresolved_function(
|
|
|
1933
2348
|
result_exp = snowpark_fn.coalesce(
|
|
1934
2349
|
*[col.cast(result_type) for col in snowpark_args]
|
|
1935
2350
|
)
|
|
1936
|
-
case "col":
|
|
1937
|
-
# TODO: assign type
|
|
1938
|
-
result_exp = snowpark_fn.col(*snowpark_args)
|
|
1939
|
-
result_exp = _type_with_typer(result_exp)
|
|
1940
|
-
qualifiers = snowpark_args[0].get_qualifiers()
|
|
1941
2351
|
case "collect_list" | "array_agg":
|
|
1942
2352
|
# TODO: SNOW-1967177 - Support structured types in array_agg
|
|
1943
2353
|
result_exp = snowpark_fn.array_agg(
|
|
@@ -1949,20 +2359,12 @@ def map_unresolved_function(
|
|
|
1949
2359
|
spark_function_name = f"collect_list({snowpark_arg_names[0]})"
|
|
1950
2360
|
case "collect_set":
|
|
1951
2361
|
# Convert to a semi-structured type. TODO SNOW-1953065 - Support structured types in array_unique_agg.
|
|
1952
|
-
result_exp = snowpark_fn.
|
|
1953
|
-
|
|
1954
|
-
snowpark_typed_args[0].column(to_semi_structure=True)
|
|
1955
|
-
),
|
|
1956
|
-
ArrayType(snowpark_typed_args[0].typ),
|
|
2362
|
+
result_exp = snowpark_fn.array_unique_agg(
|
|
2363
|
+
snowpark_typed_args[0].column(to_semi_structure=True)
|
|
1957
2364
|
)
|
|
1958
|
-
result_exp =
|
|
1959
|
-
result_exp,
|
|
2365
|
+
result_exp = _resolve_aggregate_exp(
|
|
2366
|
+
result_exp, ArrayType(snowpark_typed_args[0].typ)
|
|
1960
2367
|
)
|
|
1961
|
-
case "column":
|
|
1962
|
-
# TODO: assign type
|
|
1963
|
-
result_exp = snowpark_fn.column(*snowpark_args)
|
|
1964
|
-
result_exp = _type_with_typer(result_exp)
|
|
1965
|
-
qualifiers = snowpark_args[0].get_qualifiers()
|
|
1966
2368
|
case "concat":
|
|
1967
2369
|
if len(snowpark_args) == 0:
|
|
1968
2370
|
result_exp = TypedColumn(snowpark_fn.lit(""), lambda: [StringType()])
|
|
@@ -2040,9 +2442,11 @@ def map_unresolved_function(
|
|
|
2040
2442
|
),
|
|
2041
2443
|
ULongLong(),
|
|
2042
2444
|
):
|
|
2043
|
-
|
|
2445
|
+
exception = ArithmeticException(
|
|
2044
2446
|
'[ARITHMETIC_OVERFLOW] Overflow in function conv(). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
|
|
2045
2447
|
)
|
|
2448
|
+
attach_custom_error_code(exception, ErrorCodes.ARITHMETIC_ERROR)
|
|
2449
|
+
raise exception
|
|
2046
2450
|
|
|
2047
2451
|
@cached_udf(
|
|
2048
2452
|
input_types=[
|
|
@@ -2139,7 +2543,7 @@ def map_unresolved_function(
|
|
|
2139
2543
|
snowpark_fn.col("*", _is_qualified_name=True)
|
|
2140
2544
|
)
|
|
2141
2545
|
else:
|
|
2142
|
-
result_exp = snowpark_fn.
|
|
2546
|
+
result_exp = snowpark_fn.call_function("COUNT", *snowpark_args)
|
|
2143
2547
|
result_exp = TypedColumn(result_exp, lambda: [LongType()])
|
|
2144
2548
|
case "count_if":
|
|
2145
2549
|
result_exp = snowpark_fn.call_function("COUNT_IF", snowpark_args[0])
|
|
@@ -2190,9 +2594,11 @@ def map_unresolved_function(
|
|
|
2190
2594
|
seed = literal_value
|
|
2191
2595
|
|
|
2192
2596
|
if column is None or eps is None or confidence is None or seed is None:
|
|
2193
|
-
|
|
2597
|
+
exception = ValueError(
|
|
2194
2598
|
"The required parameters for count_min_sketch have not been set."
|
|
2195
2599
|
)
|
|
2600
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
2601
|
+
raise exception
|
|
2196
2602
|
|
|
2197
2603
|
# Calculate depth and width based on eps and confidence
|
|
2198
2604
|
depth = math.ceil(math.log(1.0 / (1.0 - confidence)))
|
|
@@ -2290,10 +2696,12 @@ def map_unresolved_function(
|
|
|
2290
2696
|
if not isinstance(col1_type, _NumericType) or not isinstance(
|
|
2291
2697
|
col2_type, _NumericType
|
|
2292
2698
|
):
|
|
2293
|
-
|
|
2699
|
+
exception = TypeError(
|
|
2294
2700
|
f"Data type mismatch: covar_pop requires numeric types, "
|
|
2295
2701
|
f"but got {col1_type} and {col2_type}."
|
|
2296
2702
|
)
|
|
2703
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2704
|
+
raise exception
|
|
2297
2705
|
result_exp = snowpark_fn.covar_pop(
|
|
2298
2706
|
snowpark_args[0],
|
|
2299
2707
|
snowpark_args[1],
|
|
@@ -2305,10 +2713,12 @@ def map_unresolved_function(
|
|
|
2305
2713
|
if not isinstance(col1_type, _NumericType) or not isinstance(
|
|
2306
2714
|
col2_type, _NumericType
|
|
2307
2715
|
):
|
|
2308
|
-
|
|
2716
|
+
exception = TypeError(
|
|
2309
2717
|
f"Data type mismatch: covar_samp requires numeric types, "
|
|
2310
2718
|
f"but got {col1_type} and {col2_type}."
|
|
2311
2719
|
)
|
|
2720
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2721
|
+
raise exception
|
|
2312
2722
|
result_exp = snowpark_fn.covar_samp(snowpark_args[0], snowpark_args[1])
|
|
2313
2723
|
result_type = DoubleType()
|
|
2314
2724
|
case "crc32":
|
|
@@ -2317,9 +2727,11 @@ def map_unresolved_function(
|
|
|
2317
2727
|
and not isinstance(snowpark_typed_args[0].typ, StringType)
|
|
2318
2728
|
and not isinstance(snowpark_typed_args[0].typ, VariantType)
|
|
2319
2729
|
):
|
|
2320
|
-
|
|
2730
|
+
exception = AnalysisException(
|
|
2321
2731
|
f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve crc32({snowpark_args[0]}) due to data type mismatch: Input requires the BINARY type, however {snowpark_args[0]} has the type {snowpark_typed_args[0].typ}."
|
|
2322
2732
|
)
|
|
2733
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2734
|
+
raise exception
|
|
2323
2735
|
|
|
2324
2736
|
# UDF to calculate the unsigned CRC32 value of data in bytes. Returns the CRC32 value
|
|
2325
2737
|
# as a 32-bit INT, or None if the input is None.
|
|
@@ -2369,9 +2781,13 @@ def map_unresolved_function(
|
|
|
2369
2781
|
spark_function_name = "current_database()"
|
|
2370
2782
|
case "current_date" | "curdate":
|
|
2371
2783
|
if len(snowpark_args) > 0:
|
|
2372
|
-
|
|
2784
|
+
exception = AnalysisException(
|
|
2373
2785
|
f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `{function_name}` requires 0 parameters but the actual number is {len(snowpark_args)}."
|
|
2374
2786
|
)
|
|
2787
|
+
attach_custom_error_code(
|
|
2788
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
2789
|
+
)
|
|
2790
|
+
raise exception
|
|
2375
2791
|
result_exp = TypedColumn(snowpark_fn.current_date(), lambda: [DateType()])
|
|
2376
2792
|
spark_function_name = "current_date()"
|
|
2377
2793
|
case "current_timestamp" | "now":
|
|
@@ -2387,7 +2803,11 @@ def map_unresolved_function(
|
|
|
2387
2803
|
if len(snowpark_args) != 2:
|
|
2388
2804
|
# SQL supports a 3-argument call that gets mapped to timestamp_add -
|
|
2389
2805
|
# however, if the first argument is invalid, we end up here.
|
|
2390
|
-
|
|
2806
|
+
exception = AnalysisException("date_add takes 2 arguments")
|
|
2807
|
+
attach_custom_error_code(
|
|
2808
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
2809
|
+
)
|
|
2810
|
+
raise exception
|
|
2391
2811
|
arg_2 = snowpark_typed_args[1].typ
|
|
2392
2812
|
if isinstance(arg_2, StringType):
|
|
2393
2813
|
with suppress(Exception):
|
|
@@ -2395,9 +2815,11 @@ def map_unresolved_function(
|
|
|
2395
2815
|
arg_2 = IntegerType()
|
|
2396
2816
|
|
|
2397
2817
|
if not isinstance(arg_2, (_IntegralType, NullType)):
|
|
2398
|
-
|
|
2818
|
+
exception = AnalysisException(
|
|
2399
2819
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_add({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT" or "NULL") type, however "{snowpark_arg_names[1]}" has the type "{str(arg_2)}".'
|
|
2400
2820
|
)
|
|
2821
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2822
|
+
raise exception
|
|
2401
2823
|
|
|
2402
2824
|
result_exp = _try_to_cast(
|
|
2403
2825
|
"try_to_date",
|
|
@@ -2412,7 +2834,11 @@ def map_unresolved_function(
|
|
|
2412
2834
|
if len(snowpark_args) != 2:
|
|
2413
2835
|
# SQL supports a 3-argument call that gets mapped to timestamp_diff -
|
|
2414
2836
|
# however, if the first argument is invalid, we end up here.
|
|
2415
|
-
|
|
2837
|
+
exception = AnalysisException("date_diff takes 2 arguments")
|
|
2838
|
+
attach_custom_error_code(
|
|
2839
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
2840
|
+
)
|
|
2841
|
+
raise exception
|
|
2416
2842
|
result_exp = _try_to_cast(
|
|
2417
2843
|
"try_to_date",
|
|
2418
2844
|
snowpark_fn.datediff("day", snowpark_args[1], snowpark_args[0]),
|
|
@@ -2469,9 +2895,11 @@ def map_unresolved_function(
|
|
|
2469
2895
|
arg_2 = IntegerType()
|
|
2470
2896
|
|
|
2471
2897
|
if not isinstance(arg_2, (_IntegralType, NullType)):
|
|
2472
|
-
|
|
2898
|
+
exception = AnalysisException(
|
|
2473
2899
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_sub({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT" or "NULL") type, however "{snowpark_arg_names[1]}" has the type "{str(arg_2)}".'
|
|
2474
2900
|
)
|
|
2901
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
2902
|
+
raise exception
|
|
2475
2903
|
result_exp = _try_to_cast(
|
|
2476
2904
|
"try_to_date",
|
|
2477
2905
|
snowpark_fn.to_date(
|
|
@@ -2553,14 +2981,6 @@ def map_unresolved_function(
|
|
|
2553
2981
|
)
|
|
2554
2982
|
result_type = LongType()
|
|
2555
2983
|
case "date_part" | "datepart" | "extract":
|
|
2556
|
-
# Check for interval types and throw NotImplementedError
|
|
2557
|
-
if isinstance(
|
|
2558
|
-
snowpark_typed_args[1].typ, (YearMonthIntervalType, DayTimeIntervalType)
|
|
2559
|
-
):
|
|
2560
|
-
raise NotImplementedError(
|
|
2561
|
-
f"{function_name} with interval types is not supported"
|
|
2562
|
-
)
|
|
2563
|
-
|
|
2564
2984
|
field_lit: str | None = unwrap_literal(exp.unresolved_function.arguments[0])
|
|
2565
2985
|
|
|
2566
2986
|
if field_lit is None:
|
|
@@ -2605,16 +3025,51 @@ def map_unresolved_function(
|
|
|
2605
3025
|
case "div":
|
|
2606
3026
|
# Only called from SQL, either as `a div b` or `div(a, b)`
|
|
2607
3027
|
# Convert it into `(a - a % b) / b`.
|
|
2608
|
-
|
|
2609
|
-
(
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
3028
|
+
if isinstance(snowpark_typed_args[0].typ, YearMonthIntervalType):
|
|
3029
|
+
if isinstance(snowpark_typed_args[1].typ, YearMonthIntervalType):
|
|
3030
|
+
dividend_total = _calculate_total_months(snowpark_args[0])
|
|
3031
|
+
divisor_total = _calculate_total_months(snowpark_args[1])
|
|
3032
|
+
|
|
3033
|
+
# Handle division by zero interval
|
|
3034
|
+
if not spark_sql_ansi_enabled:
|
|
3035
|
+
result_exp = snowpark_fn.when(
|
|
3036
|
+
divisor_total == 0, snowpark_fn.lit(None)
|
|
3037
|
+
).otherwise(snowpark_fn.trunc(dividend_total / divisor_total))
|
|
3038
|
+
else:
|
|
3039
|
+
result_exp = snowpark_fn.trunc(dividend_total / divisor_total)
|
|
3040
|
+
result_type = LongType()
|
|
3041
|
+
else:
|
|
3042
|
+
raise AnalysisException(
|
|
3043
|
+
f"""[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} div {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ({snowpark_typed_args[0].typ} and {snowpark_typed_args[1].typ}).;"""
|
|
3044
|
+
)
|
|
3045
|
+
elif isinstance(snowpark_typed_args[0].typ, DayTimeIntervalType):
|
|
3046
|
+
if isinstance(snowpark_typed_args[1].typ, DayTimeIntervalType):
|
|
3047
|
+
dividend_total = _calculate_total_seconds(snowpark_args[0])
|
|
3048
|
+
divisor_total = _calculate_total_seconds(snowpark_args[1])
|
|
3049
|
+
|
|
3050
|
+
# Handle division by zero interval
|
|
3051
|
+
if not spark_sql_ansi_enabled:
|
|
3052
|
+
result_exp = snowpark_fn.when(
|
|
3053
|
+
divisor_total == 0, snowpark_fn.lit(None)
|
|
3054
|
+
).otherwise(snowpark_fn.trunc(dividend_total / divisor_total))
|
|
3055
|
+
else:
|
|
3056
|
+
result_exp = snowpark_fn.trunc(dividend_total / divisor_total)
|
|
3057
|
+
result_type = LongType()
|
|
3058
|
+
else:
|
|
3059
|
+
raise AnalysisException(
|
|
3060
|
+
f"""[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} div {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ({snowpark_typed_args[0].typ} and {snowpark_typed_args[1].typ}).;"""
|
|
3061
|
+
)
|
|
3062
|
+
else:
|
|
3063
|
+
result_exp = snowpark_fn.cast(
|
|
3064
|
+
(snowpark_args[0] - snowpark_args[0] % snowpark_args[1])
|
|
3065
|
+
/ snowpark_args[1],
|
|
3066
|
+
LongType(),
|
|
3067
|
+
)
|
|
3068
|
+
if not spark_sql_ansi_enabled:
|
|
3069
|
+
result_exp = snowpark_fn.when(
|
|
3070
|
+
snowpark_args[1] == 0, snowpark_fn.lit(None)
|
|
3071
|
+
).otherwise(result_exp)
|
|
3072
|
+
result_type = LongType()
|
|
2618
3073
|
case "e":
|
|
2619
3074
|
spark_function_name = "E()"
|
|
2620
3075
|
result_exp = snowpark_fn.lit(math.e)
|
|
@@ -2637,9 +3092,13 @@ def map_unresolved_function(
|
|
|
2637
3092
|
result_exp = snowpark_fn.element_at(data, spark_index)
|
|
2638
3093
|
result_type = typ.value_type
|
|
2639
3094
|
case _:
|
|
2640
|
-
|
|
3095
|
+
exception = SnowparkConnectNotImplementedError(
|
|
2641
3096
|
f"Unsupported type {typ} for element_at function"
|
|
2642
3097
|
)
|
|
3098
|
+
attach_custom_error_code(
|
|
3099
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
3100
|
+
)
|
|
3101
|
+
raise exception
|
|
2643
3102
|
case "elt":
|
|
2644
3103
|
n = snowpark_args[0]
|
|
2645
3104
|
values = snowpark_fn.array_construct(*snowpark_args[1:])
|
|
@@ -2725,9 +3184,11 @@ def map_unresolved_function(
|
|
|
2725
3184
|
result_type = [input_type.key_type, input_type.value_type]
|
|
2726
3185
|
else:
|
|
2727
3186
|
# Throw proper error for types without key_type/value_type attributes
|
|
2728
|
-
|
|
3187
|
+
exception = AnalysisException(
|
|
2729
3188
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the ("ARRAY" or "MAP") type, however "{snowpark_arg_names[0]}" has the type "{str(input_type)}".'
|
|
2730
3189
|
)
|
|
3190
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
3191
|
+
raise exception
|
|
2731
3192
|
case "expm1":
|
|
2732
3193
|
spark_function_name = f"EXPM1({snowpark_arg_names[0]})"
|
|
2733
3194
|
result_exp = snowpark_fn.exp(*snowpark_args) - 1
|
|
@@ -2848,9 +3309,11 @@ def map_unresolved_function(
|
|
|
2848
3309
|
if not isinstance(
|
|
2849
3310
|
snowpark_typed_args[1].typ, IntegerType
|
|
2850
3311
|
) and not isinstance(snowpark_typed_args[1].typ, LongType):
|
|
2851
|
-
|
|
3312
|
+
exception = AnalysisException(
|
|
2852
3313
|
"The 'scale' parameter of function 'floor' needs to be a int literal."
|
|
2853
3314
|
)
|
|
3315
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
3316
|
+
raise exception
|
|
2854
3317
|
spark_function_name = (
|
|
2855
3318
|
f"floor({snowpark_arg_names[0]}, {snowpark_arg_names[1]})"
|
|
2856
3319
|
)
|
|
@@ -2867,17 +3330,23 @@ def map_unresolved_function(
|
|
|
2867
3330
|
else:
|
|
2868
3331
|
result_exp = TypedColumn(result_exp, lambda: [result_type])
|
|
2869
3332
|
else:
|
|
2870
|
-
|
|
3333
|
+
exception = AnalysisException(
|
|
2871
3334
|
f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `floor` requires 2 parameters but the actual number is {len(snowpark_args)}."
|
|
2872
3335
|
)
|
|
3336
|
+
attach_custom_error_code(
|
|
3337
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
3338
|
+
)
|
|
3339
|
+
raise exception
|
|
2873
3340
|
case "format_number":
|
|
2874
3341
|
col, scale = snowpark_args
|
|
2875
3342
|
col_type = snowpark_typed_args[0].typ
|
|
2876
3343
|
|
|
2877
3344
|
if not isinstance(col_type, _NumericType):
|
|
2878
|
-
|
|
3345
|
+
exception = TypeError(
|
|
2879
3346
|
f'Data type mismatch: Parameter 1 of format_number requires the "NUMERIC" type, however was {col_type}.'
|
|
2880
3347
|
)
|
|
3348
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
3349
|
+
raise exception
|
|
2881
3350
|
|
|
2882
3351
|
@cached_udf(
|
|
2883
3352
|
input_types=[StringType(), LongType()],
|
|
@@ -2953,7 +3422,7 @@ def map_unresolved_function(
|
|
|
2953
3422
|
if options is not None:
|
|
2954
3423
|
if not isinstance(options, dict):
|
|
2955
3424
|
raise TypeError(
|
|
2956
|
-
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
3425
|
+
"[snowpark_connect::invalid_input] [INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
2957
3426
|
)
|
|
2958
3427
|
|
|
2959
3428
|
max_chars_per_column = options.get(
|
|
@@ -2970,7 +3439,7 @@ def map_unresolved_function(
|
|
|
2970
3439
|
type(v).__name__, type(v).__name__.upper()
|
|
2971
3440
|
)
|
|
2972
3441
|
raise TypeError(
|
|
2973
|
-
f'[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
|
|
3442
|
+
f'[snowpark_connect::type_mismatch] [INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
|
|
2974
3443
|
)
|
|
2975
3444
|
|
|
2976
3445
|
csv_data = csv_data.split(sep)
|
|
@@ -2993,7 +3462,7 @@ def map_unresolved_function(
|
|
|
2993
3462
|
and len(str(csv_data[i])) > max_chars_per_column
|
|
2994
3463
|
):
|
|
2995
3464
|
raise ValueError(
|
|
2996
|
-
f"Max chars per column exceeded {max_chars_per_column}: {str(csv_data[i])}"
|
|
3465
|
+
f"[snowpark_connect::invalid_input] Max chars per column exceeded {max_chars_per_column}: {str(csv_data[i])}"
|
|
2997
3466
|
)
|
|
2998
3467
|
|
|
2999
3468
|
return results
|
|
@@ -3004,9 +3473,11 @@ def map_unresolved_function(
|
|
|
3004
3473
|
if len(snowpark_arg_names) > 2 and snowpark_arg_names[2].startswith(
|
|
3005
3474
|
"named_struct"
|
|
3006
3475
|
):
|
|
3007
|
-
|
|
3476
|
+
exception = TypeError(
|
|
3008
3477
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
3009
3478
|
)
|
|
3479
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
3480
|
+
raise exception
|
|
3010
3481
|
|
|
3011
3482
|
match snowpark_args:
|
|
3012
3483
|
case [csv_data, schemas]:
|
|
@@ -3020,7 +3491,11 @@ def map_unresolved_function(
|
|
|
3020
3491
|
snowpark_fn.cast(csv_data, StringType()), schemas, options
|
|
3021
3492
|
)
|
|
3022
3493
|
case _:
|
|
3023
|
-
|
|
3494
|
+
exception = ValueError("Unrecognized from_csv parameters")
|
|
3495
|
+
attach_custom_error_code(
|
|
3496
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
3497
|
+
)
|
|
3498
|
+
raise exception
|
|
3024
3499
|
|
|
3025
3500
|
result_exp = snowpark_fn.when(
|
|
3026
3501
|
snowpark_args[0].is_null(), snowpark_fn.lit(None)
|
|
@@ -3029,15 +3504,19 @@ def map_unresolved_function(
|
|
|
3029
3504
|
# TODO: support options.
|
|
3030
3505
|
if len(snowpark_args) > 2:
|
|
3031
3506
|
if not isinstance(snowpark_typed_args[2].typ, MapType):
|
|
3032
|
-
|
|
3507
|
+
exception = AnalysisException(
|
|
3033
3508
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
3034
3509
|
)
|
|
3510
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
3511
|
+
raise exception
|
|
3035
3512
|
if not isinstance(
|
|
3036
3513
|
snowpark_typed_args[2].typ.key_type, StringType
|
|
3037
3514
|
) or not isinstance(snowpark_typed_args[2].typ.value_type, StringType):
|
|
3038
|
-
|
|
3515
|
+
exception = AnalysisException(
|
|
3039
3516
|
f"""[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "{snowpark_typed_args[2].typ.simpleString().upper()}"."""
|
|
3040
3517
|
)
|
|
3518
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
3519
|
+
raise exception
|
|
3041
3520
|
|
|
3042
3521
|
spark_function_name = f"from_json({snowpark_arg_names[0]})"
|
|
3043
3522
|
lit_schema = unwrap_literal(exp.unresolved_function.arguments[1])
|
|
@@ -3172,9 +3651,11 @@ def map_unresolved_function(
|
|
|
3172
3651
|
input_arg_type: DataType,
|
|
3173
3652
|
format: str = "yyyy-MM-dd HH:mm:ss",
|
|
3174
3653
|
):
|
|
3175
|
-
|
|
3654
|
+
exception = AnalysisException(
|
|
3176
3655
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "from_unixtime({input_arg_name}, {format})" due to data type mismatch: Parameter 1 requires the "BIGINT" type, however "{input_arg_name}" has the type "{input_arg_type}"'
|
|
3177
3656
|
)
|
|
3657
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
3658
|
+
raise exception
|
|
3178
3659
|
|
|
3179
3660
|
# Strip decimal part of the number to ensure proper result after calling snowflake counterparts
|
|
3180
3661
|
match snowpark_typed_args[0].typ:
|
|
@@ -3240,15 +3721,20 @@ def map_unresolved_function(
|
|
|
3240
3721
|
timestamp_format,
|
|
3241
3722
|
)
|
|
3242
3723
|
except AnalysisException as e:
|
|
3724
|
+
attach_custom_error_code(e, ErrorCodes.INVALID_INPUT)
|
|
3243
3725
|
raise e
|
|
3244
3726
|
except Exception:
|
|
3245
3727
|
# The second argument must either be a string or none. It can't be a column.
|
|
3246
3728
|
# So if it's anything that isn't a literal, we catch the error and just return NULL
|
|
3247
3729
|
result_exp = snowpark_fn.lit(None)
|
|
3248
3730
|
case _:
|
|
3249
|
-
|
|
3731
|
+
exception = AnalysisException(
|
|
3250
3732
|
f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `from_unixtime` requires [1, 2] parameters but the actual number is {len(snowpark_args)}."
|
|
3251
3733
|
)
|
|
3734
|
+
attach_custom_error_code(
|
|
3735
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
3736
|
+
)
|
|
3737
|
+
raise exception
|
|
3252
3738
|
result_type = StringType()
|
|
3253
3739
|
case "from_utc_timestamp":
|
|
3254
3740
|
target_tz = _map_from_spark_tz(snowpark_args[1])
|
|
@@ -3323,10 +3809,12 @@ def map_unresolved_function(
|
|
|
3323
3809
|
for sp_col in snowpark_args
|
|
3324
3810
|
]
|
|
3325
3811
|
if current_grouping_cols != spark_col_args:
|
|
3326
|
-
|
|
3812
|
+
exception = AnalysisException(
|
|
3327
3813
|
f"[GROUPING_ID_COLUMN_MISMATCH] Columns of grouping_id: {spark_col_args} doesnt match "
|
|
3328
3814
|
f"Grouping columns: {current_grouping_cols}"
|
|
3329
3815
|
)
|
|
3816
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
3817
|
+
raise exception
|
|
3330
3818
|
if function_name == "grouping_id":
|
|
3331
3819
|
result_exp = snowpark_fn.grouping_id(*snowpark_args)
|
|
3332
3820
|
else:
|
|
@@ -3344,12 +3832,14 @@ def map_unresolved_function(
|
|
|
3344
3832
|
if not snowflake_compat and not spark_sql_legacy_allow_hash_on_map_type:
|
|
3345
3833
|
for arg in snowpark_typed_args:
|
|
3346
3834
|
if any(isinstance(t, MapType) for t in arg.types):
|
|
3347
|
-
|
|
3835
|
+
exception = AnalysisException(
|
|
3348
3836
|
'[DATATYPE_MISMATCH.HASH_MAP_TYPE] Cannot resolve "hash(value)" due to data type mismatch: '
|
|
3349
3837
|
'Input to the function `hash` cannot contain elements of the "MAP" type. '
|
|
3350
3838
|
'In Spark, same maps may have different hashcode, thus hash expressions are prohibited on "MAP" elements. '
|
|
3351
3839
|
'To restore previous behavior set "spark.sql.legacy.allowHashOnMapType" to "true".'
|
|
3352
3840
|
)
|
|
3841
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
3842
|
+
raise exception
|
|
3353
3843
|
result_exp = snowpark_fn.hash(*snowpark_args)
|
|
3354
3844
|
result_type = LongType()
|
|
3355
3845
|
case "hex":
|
|
@@ -3388,9 +3878,11 @@ def map_unresolved_function(
|
|
|
3388
3878
|
if isinstance(aggregate_input_typ, DecimalType):
|
|
3389
3879
|
# mimic bug from Spark 3.5.3.
|
|
3390
3880
|
# In 3.5.5 it's fixed and this exception shouldn't be thrown
|
|
3391
|
-
|
|
3881
|
+
exception = ValueError(
|
|
3392
3882
|
"class org.apache.spark.sql.types.Decimal cannot be cast to class java.lang.Number (org.apache.spark.sql.types.Decimal is in unnamed module of loader 'app'; java.lang.Number is in module java.base of loader 'bootstrap')"
|
|
3393
3883
|
)
|
|
3884
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
3885
|
+
raise exception
|
|
3394
3886
|
|
|
3395
3887
|
histogram_return_type = ArrayType(
|
|
3396
3888
|
StructType(
|
|
@@ -3409,8 +3901,6 @@ def map_unresolved_function(
|
|
|
3409
3901
|
result should be either way good enough.
|
|
3410
3902
|
"""
|
|
3411
3903
|
|
|
3412
|
-
from datetime import date, datetime, time, timedelta
|
|
3413
|
-
|
|
3414
3904
|
def __init__(self) -> None:
|
|
3415
3905
|
|
|
3416
3906
|
# init the RNG for breaking ties in histogram merging. A fixed seed is specified here
|
|
@@ -3554,7 +4044,8 @@ def map_unresolved_function(
|
|
|
3554
4044
|
# just increment 'bin'. This is not done now because we don't want to make any
|
|
3555
4045
|
# assumptions about the range of numeric data being analyzed.
|
|
3556
4046
|
if bin < self.n_used_bins and self.bins[bin][0] == v:
|
|
3557
|
-
self.bins[bin]
|
|
4047
|
+
bin_x, bin_y = self.bins[bin]
|
|
4048
|
+
self.bins[bin] = (bin_x, bin_y + 1)
|
|
3558
4049
|
else:
|
|
3559
4050
|
self.bins.insert(bin + 1, (v, 1.0))
|
|
3560
4051
|
self.n_used_bins += 1
|
|
@@ -3604,13 +4095,12 @@ def map_unresolved_function(
|
|
|
3604
4095
|
input_types=[aggregate_input_typ, IntegerType()],
|
|
3605
4096
|
)
|
|
3606
4097
|
|
|
3607
|
-
result_exp =
|
|
4098
|
+
result_exp = _resolve_aggregate_exp(
|
|
3608
4099
|
_histogram_numeric_udaf(
|
|
3609
4100
|
snowpark_args[0], snowpark_fn.lit(snowpark_args[1])
|
|
3610
4101
|
),
|
|
3611
4102
|
histogram_return_type,
|
|
3612
4103
|
)
|
|
3613
|
-
result_type = histogram_return_type
|
|
3614
4104
|
case "hll_sketch_agg":
|
|
3615
4105
|
# check if input type is correct
|
|
3616
4106
|
if type(snowpark_typed_args[0].typ) not in [
|
|
@@ -3620,9 +4110,11 @@ def map_unresolved_function(
|
|
|
3620
4110
|
BinaryType,
|
|
3621
4111
|
]:
|
|
3622
4112
|
type_str = snowpark_typed_args[0].typ.simpleString().upper()
|
|
3623
|
-
|
|
4113
|
+
exception = AnalysisException(
|
|
3624
4114
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INT" or "BIGINT" or "STRING" or "BINARY") type, however "{snowpark_arg_names[0]}" has the type "{type_str}".'
|
|
3625
4115
|
)
|
|
4116
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
4117
|
+
raise exception
|
|
3626
4118
|
|
|
3627
4119
|
match snowpark_args:
|
|
3628
4120
|
case [sketch]:
|
|
@@ -3761,10 +4253,12 @@ def map_unresolved_function(
|
|
|
3761
4253
|
type_mismatched = True
|
|
3762
4254
|
|
|
3763
4255
|
if type_mismatched:
|
|
3764
|
-
|
|
4256
|
+
exception = AnalysisException(
|
|
3765
4257
|
f'[DATATYPE_MISMATCH.DATA_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: '
|
|
3766
4258
|
f'Input to `in` should all be the same type, but it\'s [{", ".join(type_names)}].'
|
|
3767
4259
|
)
|
|
4260
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
4261
|
+
raise exception
|
|
3768
4262
|
|
|
3769
4263
|
try:
|
|
3770
4264
|
result_exp = snowpark_args[0].in_(snowpark_args[1:])
|
|
@@ -3797,9 +4291,11 @@ def map_unresolved_function(
|
|
|
3797
4291
|
except Exception:
|
|
3798
4292
|
type_str = str(input_type)
|
|
3799
4293
|
|
|
3800
|
-
|
|
4294
|
+
exception = AnalysisException(
|
|
3801
4295
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "inline({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "ARRAY<STRUCT>" type, however "{snowpark_arg_names[0]}" has the type {type_str}.'
|
|
3802
4296
|
)
|
|
4297
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
4298
|
+
raise exception
|
|
3803
4299
|
|
|
3804
4300
|
is_outer = function_name == "inline_outer"
|
|
3805
4301
|
|
|
@@ -3891,11 +4387,13 @@ def map_unresolved_function(
|
|
|
3891
4387
|
if arg_type not in allowed_arg_types:
|
|
3892
4388
|
spark_type = map_snowpark_to_pyspark_types(arg_type)
|
|
3893
4389
|
|
|
3894
|
-
|
|
4390
|
+
exception = AnalysisException(
|
|
3895
4391
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: """
|
|
3896
4392
|
f"""Parameter {arg_idx+3} requires the ("BOOLEAN" or "TINYINT" or "SMALLINT" or "INT" or "BIGINT" or "FLOAT" or "DOUBLE" or "STRING") type, """
|
|
3897
4393
|
f"""however "{snowpark_arg_names[arg_idx+2]}" has the type "{spark_type.simpleString()}"."""
|
|
3898
4394
|
)
|
|
4395
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
4396
|
+
raise exception
|
|
3899
4397
|
|
|
3900
4398
|
arg_values = snowpark_fn.cast(
|
|
3901
4399
|
snowpark_fn.array_construct(
|
|
@@ -3924,9 +4422,11 @@ def map_unresolved_function(
|
|
|
3924
4422
|
if not isinstance(
|
|
3925
4423
|
snowpark_typed_args[0].typ, StringType
|
|
3926
4424
|
) and not isinstance(snowpark_typed_args[0].typ, NullType):
|
|
3927
|
-
|
|
4425
|
+
exception = AnalysisException(
|
|
3928
4426
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "json_array_length({",".join(snowpark_arg_names)})" due to data type mismatch: Parameter 1 requires the "STRING" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}"."""
|
|
3929
4427
|
)
|
|
4428
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
4429
|
+
raise exception
|
|
3930
4430
|
arr_exp = snowpark_fn.function("TRY_PARSE_JSON")(snowpark_args[0])
|
|
3931
4431
|
result_exp = snowpark_fn.array_size(arr_exp)
|
|
3932
4432
|
result_type = LongType()
|
|
@@ -3934,9 +4434,11 @@ def map_unresolved_function(
|
|
|
3934
4434
|
if not isinstance(
|
|
3935
4435
|
snowpark_typed_args[0].typ, StringType
|
|
3936
4436
|
) and not isinstance(snowpark_typed_args[0].typ, NullType):
|
|
3937
|
-
|
|
4437
|
+
exception = AnalysisException(
|
|
3938
4438
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "json_object_keys({",".join(snowpark_arg_names)})" due to data type mismatch: Parameter 1 requires the "STRING" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ.simpleString().upper()}"."""
|
|
3939
4439
|
)
|
|
4440
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
4441
|
+
raise exception
|
|
3940
4442
|
obj_exp = snowpark_fn.function("TRY_PARSE_JSON")(
|
|
3941
4443
|
snowpark_args[0], snowpark_fn.lit("d")
|
|
3942
4444
|
)
|
|
@@ -4080,9 +4582,11 @@ def map_unresolved_function(
|
|
|
4080
4582
|
else snowpark_fn.builtin("try_to_date")(*snowpark_args)
|
|
4081
4583
|
)
|
|
4082
4584
|
case _:
|
|
4083
|
-
|
|
4585
|
+
exception = AnalysisException(
|
|
4084
4586
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "last_day({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DATE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0]}".'
|
|
4085
4587
|
)
|
|
4588
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
4589
|
+
raise exception
|
|
4086
4590
|
|
|
4087
4591
|
result_exp = snowpark_fn.last_day(result_exp)
|
|
4088
4592
|
result_type = DateType()
|
|
@@ -4156,7 +4660,13 @@ def map_unresolved_function(
|
|
|
4156
4660
|
snowpark_fn.lit(None),
|
|
4157
4661
|
).otherwise(snowpark_fn.lit(-1))
|
|
4158
4662
|
case _:
|
|
4159
|
-
|
|
4663
|
+
exception = ValueError(
|
|
4664
|
+
f"Invalid number of arguments to {function_name}"
|
|
4665
|
+
)
|
|
4666
|
+
attach_custom_error_code(
|
|
4667
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
4668
|
+
)
|
|
4669
|
+
raise exception
|
|
4160
4670
|
result_type = LongType()
|
|
4161
4671
|
case "like":
|
|
4162
4672
|
result_exp = snowpark_fn.call_function("like", *snowpark_args)
|
|
@@ -4329,6 +4839,17 @@ def map_unresolved_function(
|
|
|
4329
4839
|
date_str_exp = snowpark_fn.concat(y, dash, m, dash, d)
|
|
4330
4840
|
result_exp = snowpark_fn.builtin(snowpark_function)(date_str_exp)
|
|
4331
4841
|
result_type = DateType()
|
|
4842
|
+
case "make_dt_interval":
|
|
4843
|
+
# Pad argument names for display purposes
|
|
4844
|
+
padded_arg_names = snowpark_arg_names.copy()
|
|
4845
|
+
while len(padded_arg_names) < 3: # days, hours, minutes are integers
|
|
4846
|
+
padded_arg_names.append("0")
|
|
4847
|
+
if len(padded_arg_names) < 4: # seconds can be decimal
|
|
4848
|
+
padded_arg_names.append("0.000000")
|
|
4849
|
+
|
|
4850
|
+
spark_function_name = f"make_dt_interval({', '.join(padded_arg_names)})"
|
|
4851
|
+
result_exp = snowpark_fn.interval_day_time_from_parts(*snowpark_args)
|
|
4852
|
+
result_type = DayTimeIntervalType()
|
|
4332
4853
|
case "make_timestamp" | "make_timestamp_ltz" | "make_timestamp_ntz":
|
|
4333
4854
|
y, m, d, h, mins = map(lambda col: col.cast(LongType()), snowpark_args[:5])
|
|
4334
4855
|
y_abs = snowpark_fn.abs(y)
|
|
@@ -4382,6 +4903,15 @@ def map_unresolved_function(
|
|
|
4382
4903
|
result_exp = snowpark_fn.when(
|
|
4383
4904
|
snowpark_fn.is_null(parsed_str_exp), snowpark_fn.lit(None)
|
|
4384
4905
|
).otherwise(make_timestamp_res)
|
|
4906
|
+
case "make_ym_interval":
|
|
4907
|
+
# Pad argument names for display purposes
|
|
4908
|
+
padded_arg_names = snowpark_arg_names.copy()
|
|
4909
|
+
while len(padded_arg_names) < 2: # years, months
|
|
4910
|
+
padded_arg_names.append("0")
|
|
4911
|
+
|
|
4912
|
+
spark_function_name = f"make_ym_interval({', '.join(padded_arg_names)})"
|
|
4913
|
+
result_exp = snowpark_fn.interval_year_month_from_parts(*snowpark_args)
|
|
4914
|
+
result_type = YearMonthIntervalType()
|
|
4385
4915
|
case "map":
|
|
4386
4916
|
allow_duplicate_keys = (
|
|
4387
4917
|
global_config.spark_sql_mapKeyDedupPolicy == "LAST_WIN"
|
|
@@ -4400,13 +4930,21 @@ def map_unresolved_function(
|
|
|
4400
4930
|
)
|
|
4401
4931
|
result_type = MapType(NullType(), NullType())
|
|
4402
4932
|
elif (num_args % 2) == 1:
|
|
4403
|
-
|
|
4933
|
+
exception = AnalysisException(
|
|
4404
4934
|
f"[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `map` requires 2n (n > 0) parameters but the actual number is {num_args}"
|
|
4405
4935
|
)
|
|
4936
|
+
attach_custom_error_code(
|
|
4937
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
4938
|
+
)
|
|
4939
|
+
raise exception
|
|
4406
4940
|
elif key_type is None or isinstance(key_type, NullType):
|
|
4407
|
-
|
|
4941
|
+
exception = SparkRuntimeException(
|
|
4408
4942
|
"[NULL_MAP_KEY] Cannot use null as map key."
|
|
4409
4943
|
)
|
|
4944
|
+
attach_custom_error_code(
|
|
4945
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
4946
|
+
)
|
|
4947
|
+
raise exception
|
|
4410
4948
|
else:
|
|
4411
4949
|
value_type = value_type if value_type else NullType()
|
|
4412
4950
|
|
|
@@ -4452,7 +4990,7 @@ def map_unresolved_function(
|
|
|
4452
4990
|
for key, value in m.items():
|
|
4453
4991
|
if key in new_map and not allow_dups:
|
|
4454
4992
|
raise ValueError(
|
|
4455
|
-
DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)
|
|
4993
|
+
f"[snowpark_connect::invalid_operation] {DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)}"
|
|
4456
4994
|
)
|
|
4457
4995
|
else:
|
|
4458
4996
|
new_map[key] = value
|
|
@@ -4483,13 +5021,17 @@ def map_unresolved_function(
|
|
|
4483
5021
|
result_type = MapType(key_type, value_type)
|
|
4484
5022
|
case "map_contains_key":
|
|
4485
5023
|
if isinstance(snowpark_typed_args[0].typ, NullType):
|
|
4486
|
-
|
|
5024
|
+
exception = AnalysisException(
|
|
4487
5025
|
f"""[DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES] Cannot resolve "map_contains_key({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Input to `map_contains_key` should have been "MAP" followed by a value with same key type, but it's ["VOID", "INT"]."""
|
|
4488
5026
|
)
|
|
5027
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5028
|
+
raise exception
|
|
4489
5029
|
if isinstance(snowpark_typed_args[1].typ, NullType):
|
|
4490
|
-
|
|
5030
|
+
exception = AnalysisException(
|
|
4491
5031
|
f"""[DATATYPE_MISMATCH.NULL_TYPE] Cannot resolve "map_contains_key({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: Null typed values cannot be used as arguments of `map_contains_key`."""
|
|
4492
5032
|
)
|
|
5033
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5034
|
+
raise exception
|
|
4493
5035
|
args = (
|
|
4494
5036
|
[snowpark_args[1], snowpark_args[0]]
|
|
4495
5037
|
if isinstance(snowpark_typed_args[0].typ, MapType)
|
|
@@ -4499,23 +5041,29 @@ def map_unresolved_function(
|
|
|
4499
5041
|
result_type = BooleanType()
|
|
4500
5042
|
case "map_entries":
|
|
4501
5043
|
if not isinstance(snowpark_typed_args[0].typ, MapType):
|
|
4502
|
-
|
|
5044
|
+
exception = AnalysisException(
|
|
4503
5045
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "map_entries({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "MAP" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
4504
5046
|
)
|
|
5047
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5048
|
+
raise exception
|
|
4505
5049
|
key_type = snowpark_typed_args[0].typ.key_type
|
|
4506
5050
|
value_type = snowpark_typed_args[0].typ.value_type
|
|
4507
5051
|
|
|
4508
5052
|
# SNOW-2040715
|
|
4509
5053
|
def _map_entries(obj: dict):
|
|
4510
5054
|
if obj is None:
|
|
4511
|
-
raise TypeError(
|
|
5055
|
+
raise TypeError(
|
|
5056
|
+
f"[snowpark_connect::type_mismatch] Expected MapType but received {obj} instead."
|
|
5057
|
+
)
|
|
4512
5058
|
return [{"key": key, "value": value} for key, value in obj.items()]
|
|
4513
5059
|
|
|
4514
5060
|
arg_type = snowpark_typed_args[0].typ
|
|
4515
5061
|
if not isinstance(arg_type, MapType):
|
|
4516
|
-
|
|
5062
|
+
exception = TypeError(
|
|
4517
5063
|
f"map_entries requires a MapType argument, got {arg_type}"
|
|
4518
5064
|
)
|
|
5065
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5066
|
+
raise exception
|
|
4519
5067
|
|
|
4520
5068
|
map_entries = snowpark_fn.udf(
|
|
4521
5069
|
_map_entries,
|
|
@@ -4545,9 +5093,11 @@ def map_unresolved_function(
|
|
|
4545
5093
|
if not isinstance(keys_type, ArrayType) or not isinstance(
|
|
4546
5094
|
values_type, ArrayType
|
|
4547
5095
|
):
|
|
4548
|
-
|
|
5096
|
+
exception = TypeError(
|
|
4549
5097
|
f"map_from_arrays requires two arguments of type ArrayType, got {keys_type} and {values_type}"
|
|
4550
5098
|
)
|
|
5099
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5100
|
+
raise exception
|
|
4551
5101
|
key_type = keys_type.element_type if keys_type.structured else VariantType()
|
|
4552
5102
|
value_type = (
|
|
4553
5103
|
values_type.element_type if values_type.structured else VariantType()
|
|
@@ -4562,7 +5112,7 @@ def map_unresolved_function(
|
|
|
4562
5112
|
return None
|
|
4563
5113
|
if len(keys) != len(values):
|
|
4564
5114
|
raise ValueError(
|
|
4565
|
-
"The key array and value array of must have the same length"
|
|
5115
|
+
"[snowpark_connect::internal_error] The key array and value array of must have the same length"
|
|
4566
5116
|
)
|
|
4567
5117
|
|
|
4568
5118
|
if not allow_dups and len(set(keys)) != len(keys):
|
|
@@ -4570,7 +5120,7 @@ def map_unresolved_function(
|
|
|
4570
5120
|
for key in keys:
|
|
4571
5121
|
if key in seen:
|
|
4572
5122
|
raise ValueError(
|
|
4573
|
-
DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)
|
|
5123
|
+
f"[snowpark_connect::invalid_operation] {DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)}"
|
|
4574
5124
|
)
|
|
4575
5125
|
seen.add(key)
|
|
4576
5126
|
# will overwrite the last occurrence if there are duplicates.
|
|
@@ -4592,9 +5142,11 @@ def map_unresolved_function(
|
|
|
4592
5142
|
result_type = MapType(key_type, value_type)
|
|
4593
5143
|
case "map_from_entries":
|
|
4594
5144
|
if not isinstance(snowpark_typed_args[0].typ, ArrayType):
|
|
4595
|
-
|
|
5145
|
+
exception = TypeError(
|
|
4596
5146
|
f"map_from_entries requires an argument of type ArrayType, got {snowpark_typed_args[0].typ}"
|
|
4597
5147
|
)
|
|
5148
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5149
|
+
raise exception
|
|
4598
5150
|
|
|
4599
5151
|
entry_type = snowpark_typed_args[0].typ.element_type
|
|
4600
5152
|
|
|
@@ -4613,9 +5165,11 @@ def map_unresolved_function(
|
|
|
4613
5165
|
value_type = entry_type.fields[1].datatype
|
|
4614
5166
|
[key_field, value_field] = entry_type.names
|
|
4615
5167
|
case _:
|
|
4616
|
-
|
|
5168
|
+
exception = TypeError(
|
|
4617
5169
|
f"map_from_entries requires an array of StructType, got array of {entry_type}"
|
|
4618
5170
|
)
|
|
5171
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5172
|
+
raise exception
|
|
4619
5173
|
|
|
4620
5174
|
last_win_dedup = global_config.spark_sql_mapKeyDedupPolicy == "LAST_WIN"
|
|
4621
5175
|
|
|
@@ -4656,7 +5210,11 @@ def map_unresolved_function(
|
|
|
4656
5210
|
case "map_keys":
|
|
4657
5211
|
arg_type = snowpark_typed_args[0].typ
|
|
4658
5212
|
if not isinstance(arg_type, MapType):
|
|
4659
|
-
|
|
5213
|
+
exception = TypeError(
|
|
5214
|
+
f"map_keys requires a MapType argument, got {arg_type}"
|
|
5215
|
+
)
|
|
5216
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5217
|
+
raise exception
|
|
4660
5218
|
|
|
4661
5219
|
if arg_type.structured:
|
|
4662
5220
|
result_exp = snowpark_fn.map_keys(snowpark_args[0])
|
|
@@ -4669,9 +5227,11 @@ def map_unresolved_function(
|
|
|
4669
5227
|
# technically this could be done with a lateral join, but it's probably not worth the effort
|
|
4670
5228
|
arg_type = snowpark_typed_args[0].typ
|
|
4671
5229
|
if not isinstance(arg_type, (MapType, NullType)):
|
|
4672
|
-
|
|
5230
|
+
exception = AnalysisException(
|
|
4673
5231
|
f"map_values requires a MapType argument, got {arg_type}"
|
|
4674
5232
|
)
|
|
5233
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5234
|
+
raise exception
|
|
4675
5235
|
|
|
4676
5236
|
def _map_values(obj: dict) -> list:
|
|
4677
5237
|
if obj is None:
|
|
@@ -4770,20 +5330,30 @@ def map_unresolved_function(
|
|
|
4770
5330
|
):
|
|
4771
5331
|
pass
|
|
4772
5332
|
elif not isinstance(arg_type, StringType):
|
|
4773
|
-
|
|
5333
|
+
exception = AnalysisException(
|
|
4774
5334
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter {i + 1} requires the "STRING" type, however "{arg_name}" has the type "{arg_type.simpleString().upper()}".;"""
|
|
4775
5335
|
)
|
|
5336
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5337
|
+
raise exception
|
|
4776
5338
|
elif (
|
|
4777
5339
|
exp.unresolved_function.arguments[i].WhichOneof("expr_type")
|
|
4778
5340
|
!= "literal"
|
|
4779
5341
|
):
|
|
4780
|
-
|
|
5342
|
+
exception = AnalysisException(
|
|
4781
5343
|
f"""[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "{spark_function_name}" due to data type mismatch: the input {col_arg_names[i]} should be a foldable "STRING" expression; however, got "{arg_name}"."""
|
|
4782
5344
|
)
|
|
5345
|
+
attach_custom_error_code(
|
|
5346
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
5347
|
+
)
|
|
5348
|
+
raise exception
|
|
4783
5349
|
elif len(arg_name) != 1:
|
|
4784
|
-
|
|
5350
|
+
exception = AnalysisException(
|
|
4785
5351
|
f"""[DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE] Cannot resolve "{spark_function_name}" due to data type mismatch: Length of {col_arg_names[i]} should be 1."""
|
|
4786
5352
|
)
|
|
5353
|
+
attach_custom_error_code(
|
|
5354
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
5355
|
+
)
|
|
5356
|
+
raise exception
|
|
4787
5357
|
|
|
4788
5358
|
random_tag_suffix = "".join(random.sample(string.ascii_uppercase, 6))
|
|
4789
5359
|
tags = [
|
|
@@ -4823,17 +5393,17 @@ def map_unresolved_function(
|
|
|
4823
5393
|
# MD5 in Spark only accepts BinaryType or types that can be implicitly cast to it (StringType)
|
|
4824
5394
|
if not snowflake_compat:
|
|
4825
5395
|
if not isinstance(snowpark_typed_args[0].typ, (BinaryType, StringType)):
|
|
4826
|
-
|
|
5396
|
+
exception = AnalysisException(
|
|
4827
5397
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "md5({snowpark_arg_names[0]})" due to data type mismatch: '
|
|
4828
5398
|
f'Parameter 1 requires the "BINARY" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
|
|
4829
5399
|
)
|
|
5400
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5401
|
+
raise exception
|
|
4830
5402
|
result_exp = snowpark_fn.md5(snowpark_args[0])
|
|
4831
5403
|
result_type = StringType(32)
|
|
4832
5404
|
case "median":
|
|
4833
|
-
result_exp =
|
|
4834
|
-
|
|
4835
|
-
result_exp = TypedColumn(
|
|
4836
|
-
snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
|
|
5405
|
+
result_exp = _resolve_aggregate_exp(
|
|
5406
|
+
snowpark_fn.median(snowpark_args[0]), DoubleType()
|
|
4837
5407
|
)
|
|
4838
5408
|
case "min":
|
|
4839
5409
|
result_exp = _handle_structured_aggregate_result(
|
|
@@ -4924,9 +5494,13 @@ def map_unresolved_function(
|
|
|
4924
5494
|
expanded_args.append(arg_typed_column.col)
|
|
4925
5495
|
|
|
4926
5496
|
if len(expanded_args) % 2 != 0:
|
|
4927
|
-
|
|
5497
|
+
exception = ValueError(
|
|
4928
5498
|
"Number of arguments must be even (a list of key-value pairs)."
|
|
4929
5499
|
)
|
|
5500
|
+
attach_custom_error_code(
|
|
5501
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
5502
|
+
)
|
|
5503
|
+
raise exception
|
|
4930
5504
|
|
|
4931
5505
|
# field types for the schema
|
|
4932
5506
|
field_names = []
|
|
@@ -4992,27 +5566,37 @@ def map_unresolved_function(
|
|
|
4992
5566
|
spark_function_name = f"(- {snowpark_arg_names[0]})"
|
|
4993
5567
|
else:
|
|
4994
5568
|
spark_function_name = f"negative({snowpark_arg_names[0]})"
|
|
4995
|
-
if
|
|
5569
|
+
if (
|
|
5570
|
+
isinstance(arg_type, _NumericType)
|
|
5571
|
+
or isinstance(arg_type, YearMonthIntervalType)
|
|
5572
|
+
or isinstance(arg_type, DayTimeIntervalType)
|
|
5573
|
+
):
|
|
4996
5574
|
# Instead of using snowpark_fn.negate which can generate invalid SQL for nested minus operations,
|
|
4997
5575
|
# use a direct multiplication by -1 which generates cleaner SQL
|
|
4998
5576
|
result_exp = snowpark_args[0] * snowpark_fn.lit(-1)
|
|
4999
5577
|
elif isinstance(arg_type, StringType):
|
|
5000
5578
|
if spark_sql_ansi_enabled:
|
|
5001
|
-
|
|
5579
|
+
exception = NumberFormatException(
|
|
5002
5580
|
f'The value \'{snowpark_args[0]}\' of the type {arg_type} cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
|
|
5003
5581
|
)
|
|
5582
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
5583
|
+
raise exception
|
|
5004
5584
|
else:
|
|
5005
5585
|
result_exp = snowpark_fn.lit(None)
|
|
5006
5586
|
elif isinstance(arg_type, NullType):
|
|
5007
5587
|
result_exp = snowpark_fn.lit(None)
|
|
5008
5588
|
else:
|
|
5009
|
-
|
|
5589
|
+
exception = AnalysisException(
|
|
5010
5590
|
f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve {spark_function_name} due to data type mismatch: "
|
|
5011
5591
|
f'Parameter 1 requires the ("NUMERIC") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0]}".'
|
|
5012
5592
|
)
|
|
5593
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5594
|
+
raise exception
|
|
5013
5595
|
result_type = (
|
|
5014
5596
|
snowpark_typed_args[0].types
|
|
5015
5597
|
if isinstance(arg_type, _NumericType)
|
|
5598
|
+
or isinstance(arg_type, YearMonthIntervalType)
|
|
5599
|
+
or isinstance(arg_type, DayTimeIntervalType)
|
|
5016
5600
|
else DoubleType()
|
|
5017
5601
|
)
|
|
5018
5602
|
case "next_day":
|
|
@@ -5020,9 +5604,11 @@ def map_unresolved_function(
|
|
|
5020
5604
|
date = unwrap_literal(exp.unresolved_function.arguments[1])
|
|
5021
5605
|
if date is None or date.lower() not in dates:
|
|
5022
5606
|
if spark_sql_ansi_enabled:
|
|
5023
|
-
|
|
5607
|
+
exception = IllegalArgumentException(
|
|
5024
5608
|
"""Illegal input for day of week. If necessary set "spark.sql.ansi.enabled" to false to bypass this error."""
|
|
5025
5609
|
)
|
|
5610
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
5611
|
+
raise exception
|
|
5026
5612
|
else:
|
|
5027
5613
|
result_exp = snowpark_fn.lit(None)
|
|
5028
5614
|
else:
|
|
@@ -5081,9 +5667,11 @@ def map_unresolved_function(
|
|
|
5081
5667
|
)
|
|
5082
5668
|
case "octet_length":
|
|
5083
5669
|
if isinstance(snowpark_typed_args[0].typ, (ArrayType, MapType)):
|
|
5084
|
-
|
|
5670
|
+
exception = AnalysisException(
|
|
5085
5671
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "octet_length({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the ("STRING" or "BINARY") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}"."""
|
|
5086
5672
|
)
|
|
5673
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5674
|
+
raise exception
|
|
5087
5675
|
result_exp = snowpark_fn.octet_length(snowpark_args[0])
|
|
5088
5676
|
if isinstance(snowpark_typed_args[0].typ, _FractionalType):
|
|
5089
5677
|
# All decimal types have to have 3 characters at a minimum.
|
|
@@ -5215,9 +5803,11 @@ def map_unresolved_function(
|
|
|
5215
5803
|
)
|
|
5216
5804
|
|
|
5217
5805
|
if not isinstance(snowpark_typed_args[0].typ, (_NumericType, StringType)):
|
|
5218
|
-
|
|
5806
|
+
exception = AnalysisException(
|
|
5219
5807
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, {snowpark_arg_names[2]})" due to data type mismatch: Parameter 1 requires the "NUMERIC" type, however "value" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
5220
5808
|
)
|
|
5809
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
5810
|
+
raise exception
|
|
5221
5811
|
elif len(snowpark_args) == 3:
|
|
5222
5812
|
|
|
5223
5813
|
class PercentileUDAF:
|
|
@@ -5237,7 +5827,9 @@ def map_unresolved_function(
|
|
|
5237
5827
|
def accumulate(self, value, percentages, frequency: int):
|
|
5238
5828
|
|
|
5239
5829
|
if frequency < 0:
|
|
5240
|
-
raise ValueError(
|
|
5830
|
+
raise ValueError(
|
|
5831
|
+
f"[snowpark_connect::invalid_input] Negative values found in {frequency}"
|
|
5832
|
+
)
|
|
5241
5833
|
|
|
5242
5834
|
if not self.percentages:
|
|
5243
5835
|
self.percentages = percentages
|
|
@@ -5247,7 +5839,7 @@ def map_unresolved_function(
|
|
|
5247
5839
|
for percentage in self.percentages
|
|
5248
5840
|
):
|
|
5249
5841
|
raise ValueError(
|
|
5250
|
-
"The percentage must be between [0.0, 1.0]"
|
|
5842
|
+
"[snowpark_connect::invalid_input] The percentage must be between [0.0, 1.0]"
|
|
5251
5843
|
)
|
|
5252
5844
|
|
|
5253
5845
|
if value is None:
|
|
@@ -5293,7 +5885,9 @@ def map_unresolved_function(
|
|
|
5293
5885
|
Algorithm based on Spark code: https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala#L194
|
|
5294
5886
|
"""
|
|
5295
5887
|
if not accumulated_counts:
|
|
5296
|
-
raise ValueError(
|
|
5888
|
+
raise ValueError(
|
|
5889
|
+
"[snowpark_connect::internal_error] accumulated_counts cannot be empty"
|
|
5890
|
+
)
|
|
5297
5891
|
|
|
5298
5892
|
total_count = accumulated_counts[-1][1]
|
|
5299
5893
|
position = (total_count - 1) * percentile
|
|
@@ -5353,7 +5947,7 @@ def map_unresolved_function(
|
|
|
5353
5947
|
)
|
|
5354
5948
|
result_type = DoubleType()
|
|
5355
5949
|
|
|
5356
|
-
result_exp =
|
|
5950
|
+
result_exp = _resolve_aggregate_exp(
|
|
5357
5951
|
_percentile_udaf(column_value, percentage, snowpark_args[2]),
|
|
5358
5952
|
result_type,
|
|
5359
5953
|
)
|
|
@@ -5371,26 +5965,45 @@ def map_unresolved_function(
|
|
|
5371
5965
|
for arg in array_func.arguments
|
|
5372
5966
|
]
|
|
5373
5967
|
)
|
|
5374
|
-
result_exp = snowpark_fn.cast(
|
|
5375
|
-
result_exp,
|
|
5376
|
-
ArrayType(element_type=FloatType(), contains_null=False),
|
|
5377
|
-
)
|
|
5378
5968
|
result_type = ArrayType(element_type=DoubleType(), contains_null=False)
|
|
5969
|
+
result_exp = _resolve_aggregate_exp(result_exp, result_type)
|
|
5379
5970
|
spark_function_name = f"{function_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, 1)"
|
|
5380
5971
|
else:
|
|
5381
5972
|
result_exp = snowpark_fn.function("percentile_cont")(
|
|
5382
5973
|
_check_percentile_percentage(exp.unresolved_function.arguments[1])
|
|
5383
5974
|
).within_group(column_value)
|
|
5384
|
-
result_exp =
|
|
5385
|
-
snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
|
|
5386
|
-
)
|
|
5975
|
+
result_exp = _resolve_aggregate_exp(result_exp, DoubleType())
|
|
5387
5976
|
spark_function_name = f"{function_name}({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, 1)"
|
|
5388
5977
|
case "percentile_cont" | "percentiledisc":
|
|
5389
5978
|
if function_name == "percentiledisc":
|
|
5390
5979
|
function_name = "percentile_disc"
|
|
5980
|
+
order_by_col = snowpark_args[0]
|
|
5981
|
+
args = exp.unresolved_function.arguments
|
|
5982
|
+
if len(args) != 3:
|
|
5983
|
+
exception = AssertionError(
|
|
5984
|
+
f"{function_name} expected 3 args but got {len(args)}"
|
|
5985
|
+
)
|
|
5986
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
5987
|
+
raise exception
|
|
5988
|
+
# literal value 0.0 - 1.0
|
|
5989
|
+
percentage_arg = args[1]
|
|
5990
|
+
sort_direction = args[2].sort_order.direction
|
|
5991
|
+
direction_str = "" # defaultValue
|
|
5992
|
+
if (
|
|
5993
|
+
sort_direction
|
|
5994
|
+
== expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
|
|
5995
|
+
):
|
|
5996
|
+
direction_str = "DESC"
|
|
5997
|
+
|
|
5998
|
+
# Apply sort direction to the order_by column
|
|
5999
|
+
if direction_str == "DESC":
|
|
6000
|
+
order_by_col_with_direction = order_by_col.desc()
|
|
6001
|
+
else:
|
|
6002
|
+
order_by_col_with_direction = order_by_col.asc()
|
|
6003
|
+
|
|
5391
6004
|
result_exp = snowpark_fn.function(function_name)(
|
|
5392
|
-
_check_percentile_percentage(
|
|
5393
|
-
).within_group(
|
|
6005
|
+
_check_percentile_percentage(percentage_arg)
|
|
6006
|
+
).within_group(order_by_col_with_direction)
|
|
5394
6007
|
result_exp = (
|
|
5395
6008
|
TypedColumn(
|
|
5396
6009
|
snowpark_fn.cast(result_exp, FloatType()), lambda: [DoubleType()]
|
|
@@ -5399,7 +6012,8 @@ def map_unresolved_function(
|
|
|
5399
6012
|
else TypedColumnWithDeferredCast(result_exp, lambda: [DoubleType()])
|
|
5400
6013
|
)
|
|
5401
6014
|
|
|
5402
|
-
|
|
6015
|
+
direction_part = f" {direction_str}" if direction_str else ""
|
|
6016
|
+
spark_function_name = f"{function_name}({unwrap_literal(percentage_arg)}) WITHIN GROUP (ORDER BY {snowpark_arg_names[0]}{direction_part})"
|
|
5403
6017
|
case "pi":
|
|
5404
6018
|
spark_function_name = "PI()"
|
|
5405
6019
|
result_exp = snowpark_fn.lit(math.pi)
|
|
@@ -5428,9 +6042,11 @@ def map_unresolved_function(
|
|
|
5428
6042
|
result_exp = snowpark_fn.cast(result_exp, result_type)
|
|
5429
6043
|
result_exp = TypedColumn(result_exp, lambda: [result_type])
|
|
5430
6044
|
else:
|
|
5431
|
-
|
|
6045
|
+
exception = AnalysisException(
|
|
5432
6046
|
f"""pyspark.errors.exceptions.captured.AnalysisException: [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{dividend_type}" and "{divisor_type}")."""
|
|
5433
6047
|
)
|
|
6048
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
6049
|
+
raise exception
|
|
5434
6050
|
case "posexplode" | "posexplode_outer":
|
|
5435
6051
|
input_type = snowpark_typed_args[0].typ
|
|
5436
6052
|
is_nullable = function_name == "posexplode_outer"
|
|
@@ -5510,9 +6126,11 @@ def map_unresolved_function(
|
|
|
5510
6126
|
input_type.value_type,
|
|
5511
6127
|
]
|
|
5512
6128
|
else:
|
|
5513
|
-
|
|
6129
|
+
exception = TypeError(
|
|
5514
6130
|
f"Data type mismatch: {function_name} requires an array or map input, but got {input_type}."
|
|
5515
6131
|
)
|
|
6132
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
6133
|
+
raise exception
|
|
5516
6134
|
result_exp = snowpark_fn.call_table_function(
|
|
5517
6135
|
posexplode_udtf.name, snowpark_args[0], snowpark_fn.lit(function_name)
|
|
5518
6136
|
)
|
|
@@ -5535,25 +6153,35 @@ def map_unresolved_function(
|
|
|
5535
6153
|
case "positive":
|
|
5536
6154
|
arg_type = snowpark_typed_args[0].typ
|
|
5537
6155
|
spark_function_name = f"(+ {snowpark_arg_names[0]})"
|
|
5538
|
-
if
|
|
6156
|
+
if (
|
|
6157
|
+
isinstance(arg_type, _NumericType)
|
|
6158
|
+
or isinstance(arg_type, YearMonthIntervalType)
|
|
6159
|
+
or isinstance(arg_type, DayTimeIntervalType)
|
|
6160
|
+
):
|
|
5539
6161
|
result_exp = snowpark_args[0]
|
|
5540
6162
|
elif isinstance(arg_type, StringType):
|
|
5541
6163
|
if spark_sql_ansi_enabled:
|
|
5542
|
-
|
|
6164
|
+
exception = NumberFormatException(
|
|
5543
6165
|
f'The value \'{snowpark_args[0]}\' of the type {arg_type} cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
|
|
5544
6166
|
)
|
|
6167
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
6168
|
+
raise exception
|
|
5545
6169
|
else:
|
|
5546
6170
|
result_exp = snowpark_fn.lit(None)
|
|
5547
6171
|
elif isinstance(arg_type, NullType):
|
|
5548
6172
|
result_exp = snowpark_fn.lit(None)
|
|
5549
6173
|
else:
|
|
5550
|
-
|
|
6174
|
+
exception = AnalysisException(
|
|
5551
6175
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "(+ {snowpark_arg_names[0]}" due to data type mismatch: '
|
|
5552
6176
|
f'Parameter 1 requires the ("NUMERIC") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0]}".'
|
|
5553
6177
|
)
|
|
6178
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
6179
|
+
raise exception
|
|
5554
6180
|
result_type = (
|
|
5555
6181
|
snowpark_typed_args[0].types
|
|
5556
6182
|
if isinstance(arg_type, _NumericType)
|
|
6183
|
+
or isinstance(arg_type, YearMonthIntervalType)
|
|
6184
|
+
or isinstance(arg_type, DayTimeIntervalType)
|
|
5557
6185
|
else DoubleType()
|
|
5558
6186
|
)
|
|
5559
6187
|
|
|
@@ -5616,9 +6244,11 @@ def map_unresolved_function(
|
|
|
5616
6244
|
if not isinstance(
|
|
5617
6245
|
snowpark_typed_args[0].typ, (IntegerType, LongType, NullType)
|
|
5618
6246
|
):
|
|
5619
|
-
|
|
6247
|
+
exception = AnalysisException(
|
|
5620
6248
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INT" or "BIGINT") type, however {snowpark_arg_names[0]} has the type "{snowpark_typed_args[0].typ}"""
|
|
5621
6249
|
)
|
|
6250
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
6251
|
+
raise exception
|
|
5622
6252
|
result_exp = snowpark_fn.random(unwrap_literal(args[0]))
|
|
5623
6253
|
else:
|
|
5624
6254
|
result_exp = snowpark_fn.random()
|
|
@@ -6006,9 +6636,11 @@ def map_unresolved_function(
|
|
|
6006
6636
|
),
|
|
6007
6637
|
snowpark_typed_args[0].typ,
|
|
6008
6638
|
):
|
|
6009
|
-
|
|
6639
|
+
exception = ArithmeticException(
|
|
6010
6640
|
'[ARITHMETIC_OVERFLOW] Overflow. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.'
|
|
6011
6641
|
)
|
|
6642
|
+
attach_custom_error_code(exception, ErrorCodes.ARITHMETIC_ERROR)
|
|
6643
|
+
raise exception
|
|
6012
6644
|
if len(snowpark_args) == 1:
|
|
6013
6645
|
spark_function_name = f"{function_name}({snowpark_arg_names[0]}, 0)"
|
|
6014
6646
|
result_exp = snowpark_fn.round(snowpark_args[0], snowpark_fn.lit(0))
|
|
@@ -6063,21 +6695,25 @@ def map_unresolved_function(
|
|
|
6063
6695
|
exp.unresolved_function.arguments[0].WhichOneof("expr_type")
|
|
6064
6696
|
!= "literal"
|
|
6065
6697
|
):
|
|
6066
|
-
|
|
6698
|
+
exception = AnalysisException(
|
|
6067
6699
|
"[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "
|
|
6068
6700
|
f'"schema_of_csv({snowpark_arg_names[0]})" due to data type mismatch: '
|
|
6069
6701
|
'the input csv should be a foldable "STRING" expression; however, '
|
|
6070
6702
|
f'got "{snowpark_arg_names[0]}".'
|
|
6071
6703
|
)
|
|
6704
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
6705
|
+
raise exception
|
|
6072
6706
|
|
|
6073
6707
|
if isinstance(snowpark_typed_args[0].typ, StringType):
|
|
6074
6708
|
if exp.unresolved_function.arguments[0].literal.string == "":
|
|
6075
|
-
|
|
6709
|
+
exception = AnalysisException(
|
|
6076
6710
|
"[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "
|
|
6077
6711
|
f'"schema_of_csv({snowpark_arg_names[0]})" due to data type mismatch: '
|
|
6078
6712
|
'the input csv should be a foldable "STRING" expression; however, '
|
|
6079
6713
|
f'got "{snowpark_arg_names[0]}".'
|
|
6080
6714
|
)
|
|
6715
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
6716
|
+
raise exception
|
|
6081
6717
|
|
|
6082
6718
|
snowpark_args = [
|
|
6083
6719
|
typed_arg.column(to_semi_structure=True)
|
|
@@ -6132,7 +6768,11 @@ def map_unresolved_function(
|
|
|
6132
6768
|
case [csv_data, options]:
|
|
6133
6769
|
result_exp = _schema_of_csv(csv_data, options)
|
|
6134
6770
|
case _:
|
|
6135
|
-
|
|
6771
|
+
exception = ValueError("Unrecognized from_csv parameters")
|
|
6772
|
+
attach_custom_error_code(
|
|
6773
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
6774
|
+
)
|
|
6775
|
+
raise exception
|
|
6136
6776
|
result_type = StringType()
|
|
6137
6777
|
case "schema_of_json":
|
|
6138
6778
|
|
|
@@ -6250,15 +6890,19 @@ def map_unresolved_function(
|
|
|
6250
6890
|
obj = json.loads(json_str)
|
|
6251
6891
|
return _infer_pyspark_type(obj)
|
|
6252
6892
|
except json.JSONDecodeError as e:
|
|
6253
|
-
raise ValueError(
|
|
6893
|
+
raise ValueError(
|
|
6894
|
+
f"[snowpark_connect::invalid_input] Invalid JSON: {e}"
|
|
6895
|
+
)
|
|
6254
6896
|
|
|
6255
6897
|
if (
|
|
6256
6898
|
exp.unresolved_function.arguments[0].WhichOneof("expr_type")
|
|
6257
6899
|
!= "literal"
|
|
6258
6900
|
):
|
|
6259
|
-
|
|
6901
|
+
exception = AnalysisException(
|
|
6260
6902
|
f"""[DATATYPE_MISMATCH.NON_FOLDABLE_INPUT] Cannot resolve "schema_of_json({",".join(snowpark_arg_names)})" due to data type mismatch: the input json should be a foldable "STRING" expression; however, got "{",".join(snowpark_arg_names)}"."""
|
|
6261
6903
|
)
|
|
6904
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
6905
|
+
raise exception
|
|
6262
6906
|
result_exp = _infer_schema(snowpark_args[0])
|
|
6263
6907
|
result_type = StringType()
|
|
6264
6908
|
case "sec":
|
|
@@ -6299,12 +6943,14 @@ def map_unresolved_function(
|
|
|
6299
6943
|
not isinstance(snowpark_typed_args[0].typ, _IntegralType)
|
|
6300
6944
|
or not isinstance(snowpark_typed_args[1].typ, _IntegralType)
|
|
6301
6945
|
):
|
|
6302
|
-
|
|
6946
|
+
exception = AnalysisException(
|
|
6303
6947
|
f"""[DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES] Cannot resolve "sequence({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: `sequence` uses the wrong parameter type. The parameter type must conform to:
|
|
6304
6948
|
1. The start and stop expressions must resolve to the same type.
|
|
6305
6949
|
2. Otherwise, if start and stop expressions resolve to the "INTEGRAL" type, then the step expression must resolve to the same type.
|
|
6306
6950
|
"""
|
|
6307
6951
|
)
|
|
6952
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
6953
|
+
raise exception
|
|
6308
6954
|
result_exp = snowpark_fn.cast(
|
|
6309
6955
|
snowpark_fn.sequence(*snowpark_args),
|
|
6310
6956
|
ArrayType(LongType(), contains_null=False),
|
|
@@ -6322,15 +6968,21 @@ def map_unresolved_function(
|
|
|
6322
6968
|
num_bits = unwrap_literal(exp.unresolved_function.arguments[1])
|
|
6323
6969
|
if num_bits is None:
|
|
6324
6970
|
if spark_sql_ansi_enabled:
|
|
6325
|
-
|
|
6971
|
+
exception = NumberFormatException(
|
|
6326
6972
|
f"""[CAST_INVALID_INPUT] The value {snowpark_arg_names[0]} of the type "{snowpark_typed_args[0].typ}" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type."""
|
|
6327
6973
|
)
|
|
6974
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
6975
|
+
raise exception
|
|
6328
6976
|
result_exp = snowpark_fn.lit(None)
|
|
6329
6977
|
result_type = StringType()
|
|
6330
6978
|
elif num_bits not in bit_values:
|
|
6331
|
-
|
|
6979
|
+
exception = IllegalArgumentException(
|
|
6332
6980
|
f"""requirement failed: numBits {num_bits} is not in the permitted values (0, 224, 256, 384, 512)"""
|
|
6333
6981
|
)
|
|
6982
|
+
attach_custom_error_code(
|
|
6983
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
6984
|
+
)
|
|
6985
|
+
raise exception
|
|
6334
6986
|
else:
|
|
6335
6987
|
# 0 equivalent to 256 in PySpark, but is not allowed in Snowpark
|
|
6336
6988
|
num_bits = 256 if num_bits == 0 else num_bits
|
|
@@ -6400,11 +7052,43 @@ def map_unresolved_function(
|
|
|
6400
7052
|
fn_name = "sign"
|
|
6401
7053
|
|
|
6402
7054
|
spark_function_name = f"{fn_name}({snowpark_arg_names[0]})"
|
|
6403
|
-
|
|
6404
|
-
|
|
6405
|
-
|
|
6406
|
-
|
|
6407
|
-
|
|
7055
|
+
|
|
7056
|
+
if isinstance(snowpark_typed_args[0].typ, YearMonthIntervalType):
|
|
7057
|
+
# Use SQL expression for zero year-month interval comparison
|
|
7058
|
+
result_exp = (
|
|
7059
|
+
snowpark_fn.when(
|
|
7060
|
+
snowpark_args[0]
|
|
7061
|
+
> snowpark_fn.sql_expr("INTERVAL '0-0' YEAR TO MONTH"),
|
|
7062
|
+
snowpark_fn.lit(1.0),
|
|
7063
|
+
)
|
|
7064
|
+
.when(
|
|
7065
|
+
snowpark_args[0]
|
|
7066
|
+
< snowpark_fn.sql_expr("INTERVAL '0-0' YEAR TO MONTH"),
|
|
7067
|
+
snowpark_fn.lit(-1.0),
|
|
7068
|
+
)
|
|
7069
|
+
.otherwise(snowpark_fn.lit(0.0))
|
|
7070
|
+
)
|
|
7071
|
+
elif isinstance(snowpark_typed_args[0].typ, DayTimeIntervalType):
|
|
7072
|
+
# Use SQL expression for zero day-time interval comparison
|
|
7073
|
+
result_exp = (
|
|
7074
|
+
snowpark_fn.when(
|
|
7075
|
+
snowpark_args[0]
|
|
7076
|
+
> snowpark_fn.sql_expr("INTERVAL '0 0:0:0' DAY TO SECOND"),
|
|
7077
|
+
snowpark_fn.lit(1.0),
|
|
7078
|
+
)
|
|
7079
|
+
.when(
|
|
7080
|
+
snowpark_args[0]
|
|
7081
|
+
< snowpark_fn.sql_expr("INTERVAL '0 0:0:0' DAY TO SECOND"),
|
|
7082
|
+
snowpark_fn.lit(-1.0),
|
|
7083
|
+
)
|
|
7084
|
+
.otherwise(snowpark_fn.lit(0.0))
|
|
7085
|
+
)
|
|
7086
|
+
else:
|
|
7087
|
+
result_exp = snowpark_fn.when(
|
|
7088
|
+
snowpark_args[0] == NAN, snowpark_fn.lit(NAN)
|
|
7089
|
+
).otherwise(
|
|
7090
|
+
snowpark_fn.cast(snowpark_fn.sign(snowpark_args[0]), DoubleType())
|
|
7091
|
+
)
|
|
6408
7092
|
result_type = DoubleType()
|
|
6409
7093
|
case "sin":
|
|
6410
7094
|
spark_function_name = f"SIN({snowpark_arg_names[0]})"
|
|
@@ -6504,9 +7188,11 @@ def map_unresolved_function(
|
|
|
6504
7188
|
if len(snowpark_args) == 2 and not isinstance(
|
|
6505
7189
|
snowpark_typed_args[1].typ, BooleanType
|
|
6506
7190
|
):
|
|
6507
|
-
|
|
7191
|
+
exception = AnalysisException(
|
|
6508
7192
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the "BOOLEAN" type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ.simpleString().upper()}"'
|
|
6509
7193
|
)
|
|
7194
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7195
|
+
raise exception
|
|
6510
7196
|
sort_asc = (
|
|
6511
7197
|
unwrap_literal(exp.unresolved_function.arguments[1])
|
|
6512
7198
|
if len(snowpark_args) == 2
|
|
@@ -6557,10 +7243,10 @@ def map_unresolved_function(
|
|
|
6557
7243
|
import re
|
|
6558
7244
|
|
|
6559
7245
|
try:
|
|
6560
|
-
re.compile(pattern)
|
|
7246
|
+
compiled_pattern = re.compile(pattern)
|
|
6561
7247
|
except re.error:
|
|
6562
7248
|
raise ValueError(
|
|
6563
|
-
f"Failed to split string, provided pattern: {pattern} is invalid"
|
|
7249
|
+
f"[snowpark_connect::invalid_input] Failed to split string, provided pattern: {pattern} is invalid"
|
|
6564
7250
|
)
|
|
6565
7251
|
|
|
6566
7252
|
if limit == 1:
|
|
@@ -6579,7 +7265,7 @@ def map_unresolved_function(
|
|
|
6579
7265
|
|
|
6580
7266
|
match pattern:
|
|
6581
7267
|
case "|":
|
|
6582
|
-
split_result =
|
|
7268
|
+
split_result = compiled_pattern.split(input, 0)
|
|
6583
7269
|
input_limit = limit + 1 if limit > 0 else len(split_result)
|
|
6584
7270
|
return (
|
|
6585
7271
|
split_result
|
|
@@ -6591,7 +7277,7 @@ def map_unresolved_function(
|
|
|
6591
7277
|
case "^":
|
|
6592
7278
|
return [input]
|
|
6593
7279
|
case _:
|
|
6594
|
-
return
|
|
7280
|
+
return compiled_pattern.split(input, maxsplit)
|
|
6595
7281
|
|
|
6596
7282
|
def split_string(str_: Column, pattern: Column, limit: Column):
|
|
6597
7283
|
native_split = _split(str_, pattern, limit)
|
|
@@ -6639,9 +7325,24 @@ def map_unresolved_function(
|
|
|
6639
7325
|
case [str_, pattern, limit]: # noqa: F841
|
|
6640
7326
|
result_exp = split_string(str_, pattern, limit)
|
|
6641
7327
|
case _:
|
|
6642
|
-
|
|
7328
|
+
exception = ValueError(
|
|
7329
|
+
f"Invalid number of arguments to {function_name}"
|
|
7330
|
+
)
|
|
7331
|
+
attach_custom_error_code(
|
|
7332
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
7333
|
+
)
|
|
7334
|
+
raise exception
|
|
6643
7335
|
case "split_part":
|
|
6644
|
-
|
|
7336
|
+
# Check for index 0 and throw error to match PySpark behavior
|
|
7337
|
+
raise_error = _raise_error_helper(StringType(), SparkRuntimeException)
|
|
7338
|
+
result_exp = snowpark_fn.when(
|
|
7339
|
+
snowpark_args[2] == 0,
|
|
7340
|
+
raise_error(
|
|
7341
|
+
snowpark_fn.lit(
|
|
7342
|
+
"[INVALID_INDEX_OF_ZERO] The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)."
|
|
7343
|
+
)
|
|
7344
|
+
),
|
|
7345
|
+
).otherwise(snowpark_fn.call_function("split_part", *snowpark_args))
|
|
6645
7346
|
result_type = StringType()
|
|
6646
7347
|
case "sqrt":
|
|
6647
7348
|
spark_function_name = f"SQRT({snowpark_arg_names[0]})"
|
|
@@ -6649,9 +7350,11 @@ def map_unresolved_function(
|
|
|
6649
7350
|
if isinstance(snowpark_typed_args[0].typ, StringType):
|
|
6650
7351
|
sqrt_arg = snowpark_fn.try_cast(snowpark_args[0], DoubleType())
|
|
6651
7352
|
elif not isinstance(snowpark_typed_args[0].typ, _NumericType):
|
|
6652
|
-
|
|
7353
|
+
exception = AnalysisException(
|
|
6653
7354
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "SQRT({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}"."""
|
|
6654
7355
|
)
|
|
7356
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7357
|
+
raise exception
|
|
6655
7358
|
result_exp = (
|
|
6656
7359
|
snowpark_fn.when(sqrt_arg < 0, NAN)
|
|
6657
7360
|
.when(sqrt_arg.isNull(), snowpark_fn.lit(None))
|
|
@@ -6663,16 +7366,22 @@ def map_unresolved_function(
|
|
|
6663
7366
|
# will depend on the input specified. All arguments in the input (apart from the first one that specifies
|
|
6664
7367
|
# `num_rows`) must be the same type.
|
|
6665
7368
|
if len(exp.unresolved_function.arguments) <= 1:
|
|
6666
|
-
|
|
7369
|
+
exception = AnalysisException(
|
|
6667
7370
|
f"""
|
|
6668
7371
|
[WRONG_NUM_ARGS.WITHOUT_SUGGESTION] The `stack` requires > 1 parameters but the actual number is {len(exp.unresolved_function.arguments)}.
|
|
6669
7372
|
"""
|
|
6670
7373
|
)
|
|
7374
|
+
attach_custom_error_code(
|
|
7375
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
7376
|
+
)
|
|
7377
|
+
raise exception
|
|
6671
7378
|
num_rows = unwrap_literal(exp.unresolved_function.arguments[0])
|
|
6672
7379
|
if not isinstance(snowpark_typed_args[0].typ, IntegerType):
|
|
6673
|
-
|
|
7380
|
+
exception = AnalysisException(
|
|
6674
7381
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "INT" type, however "{num_rows}" has the type "{snowpark_typed_args[0].typ}"."""
|
|
6675
7382
|
)
|
|
7383
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7384
|
+
raise exception
|
|
6676
7385
|
|
|
6677
7386
|
num_arguments = len(snowpark_args) - 1
|
|
6678
7387
|
num_cols = math.ceil(num_arguments / num_rows)
|
|
@@ -6683,9 +7392,11 @@ def map_unresolved_function(
|
|
|
6683
7392
|
if arg != spark_col_types[i % num_cols] and not isinstance(
|
|
6684
7393
|
arg, NullType
|
|
6685
7394
|
):
|
|
6686
|
-
|
|
7395
|
+
exception = AnalysisException(
|
|
6687
7396
|
f"""[DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES] Cannot resolve "stack({snowpark_arg_names[0]})" due to data type mismatch: The data type of the column ({snowpark_arg_names[0]}) do not have the same type."""
|
|
6688
7397
|
)
|
|
7398
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7399
|
+
raise exception
|
|
6689
7400
|
if isinstance(arg, NullType):
|
|
6690
7401
|
spark_col_types[i] = VariantType()
|
|
6691
7402
|
snowpark_args[i + 1] = snowpark_fn.cast(
|
|
@@ -6742,9 +7453,11 @@ def map_unresolved_function(
|
|
|
6742
7453
|
snowpark_args[0], DoubleType()
|
|
6743
7454
|
)
|
|
6744
7455
|
else:
|
|
6745
|
-
|
|
7456
|
+
exception = AnalysisException(
|
|
6746
7457
|
f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "stddev({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
6747
7458
|
)
|
|
7459
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7460
|
+
raise exception
|
|
6748
7461
|
result_exp = snowpark_fn.stddev(stddev_argument)
|
|
6749
7462
|
result_type = DoubleType()
|
|
6750
7463
|
case "stddev_pop":
|
|
@@ -6755,9 +7468,11 @@ def map_unresolved_function(
|
|
|
6755
7468
|
snowpark_args[0], DoubleType()
|
|
6756
7469
|
)
|
|
6757
7470
|
else:
|
|
6758
|
-
|
|
7471
|
+
exception = AnalysisException(
|
|
6759
7472
|
f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "stddev_pop({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
6760
7473
|
)
|
|
7474
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7475
|
+
raise exception
|
|
6761
7476
|
result_exp = snowpark_fn.stddev_pop(stddev_pop_argument)
|
|
6762
7477
|
result_type = DoubleType()
|
|
6763
7478
|
case "stddev_samp" | "std":
|
|
@@ -6768,9 +7483,11 @@ def map_unresolved_function(
|
|
|
6768
7483
|
snowpark_args[0], DoubleType()
|
|
6769
7484
|
)
|
|
6770
7485
|
else:
|
|
6771
|
-
|
|
7486
|
+
exception = AnalysisException(
|
|
6772
7487
|
f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "stddev_samp({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
6773
7488
|
)
|
|
7489
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7490
|
+
raise exception
|
|
6774
7491
|
result_exp = snowpark_fn.stddev_samp(stddev_samp_argument)
|
|
6775
7492
|
result_type = DoubleType()
|
|
6776
7493
|
case "str_to_map":
|
|
@@ -6818,7 +7535,7 @@ def map_unresolved_function(
|
|
|
6818
7535
|
|
|
6819
7536
|
if key in result_map and not allow_dups:
|
|
6820
7537
|
raise ValueError(
|
|
6821
|
-
DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)
|
|
7538
|
+
f"[snowpark_connect::invalid_input] {DUPLICATE_KEY_FOUND_ERROR_TEMPLATE.format(key=key)}"
|
|
6822
7539
|
)
|
|
6823
7540
|
|
|
6824
7541
|
result_map[key] = val
|
|
@@ -6957,9 +7674,11 @@ def map_unresolved_function(
|
|
|
6957
7674
|
result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6958
7675
|
case "timestamp_millis":
|
|
6959
7676
|
if not isinstance(snowpark_typed_args[0].typ, _IntegralType):
|
|
6960
|
-
|
|
7677
|
+
exception = AnalysisException(
|
|
6961
7678
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "timestamp_millis({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the "INTEGRAL" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
|
|
6962
7679
|
)
|
|
7680
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7681
|
+
raise exception
|
|
6963
7682
|
result_exp = snowpark_fn.cast(
|
|
6964
7683
|
snowpark_fn.to_timestamp(snowpark_args[0] * 1_000, 6),
|
|
6965
7684
|
TimestampType(snowpark.types.TimestampTimeZone.LTZ),
|
|
@@ -6970,9 +7689,11 @@ def map_unresolved_function(
|
|
|
6970
7689
|
# even though the documentation explicitly says that it does.
|
|
6971
7690
|
# As a workaround, use integer milliseconds instead of fractional seconds.
|
|
6972
7691
|
if not isinstance(snowpark_typed_args[0].typ, _NumericType):
|
|
6973
|
-
|
|
7692
|
+
exception = AnalysisException(
|
|
6974
7693
|
f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "NUMERIC" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
6975
7694
|
)
|
|
7695
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
7696
|
+
raise exception
|
|
6976
7697
|
result_exp = snowpark_fn.cast(
|
|
6977
7698
|
snowpark_fn.to_timestamp(
|
|
6978
7699
|
snowpark_fn.cast(snowpark_args[0] * 1_000_000, LongType()), 6
|
|
@@ -7145,7 +7866,7 @@ def map_unresolved_function(
|
|
|
7145
7866
|
if options is not None:
|
|
7146
7867
|
if not isinstance(options, dict):
|
|
7147
7868
|
raise TypeError(
|
|
7148
|
-
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
7869
|
+
"[snowpark_connect::invalid_input] [INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
7149
7870
|
)
|
|
7150
7871
|
|
|
7151
7872
|
python_to_snowflake_type = {
|
|
@@ -7164,7 +7885,7 @@ def map_unresolved_function(
|
|
|
7164
7885
|
type(v).__name__, type(v).__name__.upper()
|
|
7165
7886
|
)
|
|
7166
7887
|
raise TypeError(
|
|
7167
|
-
f'[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
|
|
7888
|
+
f'[snowpark_connect::type_mismatch] [INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
|
|
7168
7889
|
)
|
|
7169
7890
|
|
|
7170
7891
|
options = options or {}
|
|
@@ -7302,7 +8023,7 @@ def map_unresolved_function(
|
|
|
7302
8023
|
result.append(escape_and_quote_string(str_value))
|
|
7303
8024
|
case _:
|
|
7304
8025
|
raise ValueError(
|
|
7305
|
-
f"Unable to determine type for value: {python_type}"
|
|
8026
|
+
f"[snowpark_connect::type_mismatch] Unable to determine type for value: {python_type}"
|
|
7306
8027
|
)
|
|
7307
8028
|
elif isinstance(value, str):
|
|
7308
8029
|
strip_value = (
|
|
@@ -7335,9 +8056,11 @@ def map_unresolved_function(
|
|
|
7335
8056
|
if len(snowpark_arg_names) > 1 and snowpark_arg_names[1].startswith(
|
|
7336
8057
|
"named_struct"
|
|
7337
8058
|
):
|
|
7338
|
-
|
|
8059
|
+
exception = TypeError(
|
|
7339
8060
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
7340
8061
|
)
|
|
8062
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
8063
|
+
raise exception
|
|
7341
8064
|
|
|
7342
8065
|
def get_snowpark_type_name(snowpark_type: DataType) -> str:
|
|
7343
8066
|
return (
|
|
@@ -7370,7 +8093,11 @@ def map_unresolved_function(
|
|
|
7370
8093
|
case [csv_data, options]:
|
|
7371
8094
|
result_exp = _to_csv(csv_data, field_names, field_types, options)
|
|
7372
8095
|
case _:
|
|
7373
|
-
|
|
8096
|
+
exception = ValueError("Unrecognized from_csv parameters")
|
|
8097
|
+
attach_custom_error_code(
|
|
8098
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
8099
|
+
)
|
|
8100
|
+
raise exception
|
|
7374
8101
|
result_type = StringType()
|
|
7375
8102
|
case "to_date":
|
|
7376
8103
|
if not spark_sql_ansi_enabled:
|
|
@@ -7397,23 +8124,29 @@ def map_unresolved_function(
|
|
|
7397
8124
|
case NullType():
|
|
7398
8125
|
result_exp = snowpark_fn.lit(None)
|
|
7399
8126
|
case _:
|
|
7400
|
-
|
|
8127
|
+
exception = AnalysisException(
|
|
7401
8128
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "to_date({snowpark_arg_names[0]}" due to data type mismatch: Parameter 1 requires the ("STRING" or "DATE" or "TIMESTAMP" or "TIMESTAMP_NTZ") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".'
|
|
7402
8129
|
)
|
|
8130
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
8131
|
+
raise exception
|
|
7403
8132
|
|
|
7404
8133
|
result_type = DateType()
|
|
7405
8134
|
case "to_json":
|
|
7406
8135
|
if len(snowpark_args) > 1:
|
|
7407
8136
|
if not isinstance(snowpark_typed_args[1].typ, MapType):
|
|
7408
|
-
|
|
8137
|
+
exception = AnalysisException(
|
|
7409
8138
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
7410
8139
|
)
|
|
8140
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
8141
|
+
raise exception
|
|
7411
8142
|
if not isinstance(
|
|
7412
8143
|
snowpark_typed_args[1].typ.key_type, StringType
|
|
7413
8144
|
) or not isinstance(snowpark_typed_args[1].typ.value_type, StringType):
|
|
7414
|
-
|
|
8145
|
+
exception = AnalysisException(
|
|
7415
8146
|
f"""[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "{snowpark_typed_args[1].typ.simpleString().upper()}"."""
|
|
7416
8147
|
)
|
|
8148
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
8149
|
+
raise exception
|
|
7417
8150
|
result_exp = snowpark_fn.to_json(snowpark_fn.to_variant(snowpark_args[0]))
|
|
7418
8151
|
result_type = StringType()
|
|
7419
8152
|
case "to_number":
|
|
@@ -7468,7 +8201,13 @@ def map_unresolved_function(
|
|
|
7468
8201
|
)
|
|
7469
8202
|
)
|
|
7470
8203
|
case _:
|
|
7471
|
-
|
|
8204
|
+
exception = ValueError(
|
|
8205
|
+
f"Invalid number of arguments to {function_name}"
|
|
8206
|
+
)
|
|
8207
|
+
attach_custom_error_code(
|
|
8208
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
8209
|
+
)
|
|
8210
|
+
raise exception
|
|
7472
8211
|
result_exp = snowpark_fn.cast(result_exp, get_timestamp_type())
|
|
7473
8212
|
result_type = get_timestamp_type()
|
|
7474
8213
|
|
|
@@ -7486,7 +8225,13 @@ def map_unresolved_function(
|
|
|
7486
8225
|
),
|
|
7487
8226
|
)
|
|
7488
8227
|
case _:
|
|
7489
|
-
|
|
8228
|
+
exception = ValueError(
|
|
8229
|
+
f"Invalid number of arguments to {function_name}"
|
|
8230
|
+
)
|
|
8231
|
+
attach_custom_error_code(
|
|
8232
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
8233
|
+
)
|
|
8234
|
+
raise exception
|
|
7490
8235
|
result_exp = snowpark_fn.cast(
|
|
7491
8236
|
result_exp, TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
7492
8237
|
)
|
|
@@ -7511,7 +8256,13 @@ def map_unresolved_function(
|
|
|
7511
8256
|
),
|
|
7512
8257
|
)
|
|
7513
8258
|
case _:
|
|
7514
|
-
|
|
8259
|
+
exception = ValueError(
|
|
8260
|
+
f"Invalid number of arguments to {function_name}"
|
|
8261
|
+
)
|
|
8262
|
+
attach_custom_error_code(
|
|
8263
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
8264
|
+
)
|
|
8265
|
+
raise exception
|
|
7515
8266
|
result_exp = snowpark_fn.cast(
|
|
7516
8267
|
result_exp, TimestampType(snowpark.types.TimestampTimeZone.NTZ)
|
|
7517
8268
|
)
|
|
@@ -7553,9 +8304,13 @@ def map_unresolved_function(
|
|
|
7553
8304
|
snowpark_fn.lit("YYYY-MM-DD HH24:MI:SS"),
|
|
7554
8305
|
)
|
|
7555
8306
|
case _:
|
|
7556
|
-
|
|
8307
|
+
exception = SnowparkConnectNotImplementedError(
|
|
7557
8308
|
"to_unix_timestamp expected 1 or 2 arguments."
|
|
7558
8309
|
)
|
|
8310
|
+
attach_custom_error_code(
|
|
8311
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
8312
|
+
)
|
|
8313
|
+
raise exception
|
|
7559
8314
|
|
|
7560
8315
|
if len(exp.unresolved_function.arguments) == 1:
|
|
7561
8316
|
spark_function_name = f"to_unix_timestamp({snowpark_arg_names[0]}, {'yyyy-MM-dd HH:mm:ss'})"
|
|
@@ -7617,14 +8372,123 @@ def map_unresolved_function(
|
|
|
7617
8372
|
)
|
|
7618
8373
|
result_type = DateType()
|
|
7619
8374
|
case "try_add":
|
|
7620
|
-
#
|
|
7621
|
-
|
|
7622
|
-
|
|
7623
|
-
|
|
7624
|
-
|
|
8375
|
+
# Handle interval arithmetic with overflow detection
|
|
8376
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
8377
|
+
case (DateType(), t) | (t, DateType()) if isinstance(
|
|
8378
|
+
t, YearMonthIntervalType
|
|
8379
|
+
):
|
|
8380
|
+
result_type = DateType()
|
|
8381
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
8382
|
+
case (DateType(), t) | (t, DateType()) if isinstance(
|
|
8383
|
+
t, DayTimeIntervalType
|
|
8384
|
+
):
|
|
8385
|
+
result_type = TimestampType()
|
|
8386
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
8387
|
+
case (TimestampType(), t) | (t, TimestampType()) if isinstance(
|
|
8388
|
+
t, (DayTimeIntervalType, YearMonthIntervalType)
|
|
8389
|
+
):
|
|
8390
|
+
result_type = (
|
|
8391
|
+
snowpark_typed_args[0].typ
|
|
8392
|
+
if isinstance(snowpark_typed_args[0].typ, TimestampType)
|
|
8393
|
+
else snowpark_typed_args[1].typ
|
|
7625
8394
|
)
|
|
7626
|
-
|
|
7627
|
-
|
|
8395
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
8396
|
+
case (t1, t2) if (
|
|
8397
|
+
isinstance(t1, YearMonthIntervalType)
|
|
8398
|
+
and isinstance(t2, (_NumericType, StringType))
|
|
8399
|
+
) or (
|
|
8400
|
+
isinstance(t2, YearMonthIntervalType)
|
|
8401
|
+
and isinstance(t1, (_NumericType, StringType))
|
|
8402
|
+
):
|
|
8403
|
+
# YearMonthInterval + numeric/string or numeric/string + YearMonthInterval should throw error
|
|
8404
|
+
exception = AnalysisException(
|
|
8405
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "try_add({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
8406
|
+
)
|
|
8407
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
8408
|
+
raise exception
|
|
8409
|
+
case (t1, t2) if isinstance(t1, YearMonthIntervalType) and isinstance(
|
|
8410
|
+
t2, YearMonthIntervalType
|
|
8411
|
+
):
|
|
8412
|
+
result_type = YearMonthIntervalType(
|
|
8413
|
+
min(t1.start_field, t2.start_field),
|
|
8414
|
+
max(t1.end_field, t2.end_field),
|
|
8415
|
+
)
|
|
8416
|
+
|
|
8417
|
+
# For year-month intervals, throw ArithmeticException if operands reach 10+ digits OR result exceeds 9 digits
|
|
8418
|
+
total1 = _calculate_total_months(snowpark_args[0])
|
|
8419
|
+
total2 = _calculate_total_months(snowpark_args[1])
|
|
8420
|
+
ten_digit_limit = snowpark_fn.lit(MAX_10_DIGIT_LIMIT)
|
|
8421
|
+
|
|
8422
|
+
precision_violation = (
|
|
8423
|
+
# Check if either operand already reaches 10 digits (parsing limit)
|
|
8424
|
+
(snowpark_fn.abs(total1) >= ten_digit_limit)
|
|
8425
|
+
| (snowpark_fn.abs(total2) >= ten_digit_limit)
|
|
8426
|
+
| (
|
|
8427
|
+
(total1 > 0)
|
|
8428
|
+
& (total2 > 0)
|
|
8429
|
+
& (total1 >= ten_digit_limit - total2)
|
|
8430
|
+
)
|
|
8431
|
+
| (
|
|
8432
|
+
(total1 < 0)
|
|
8433
|
+
& (total2 < 0)
|
|
8434
|
+
& (total1 <= -ten_digit_limit - total2)
|
|
8435
|
+
)
|
|
8436
|
+
)
|
|
8437
|
+
|
|
8438
|
+
raise_error = _raise_error_helper(result_type, ArithmeticException)
|
|
8439
|
+
result_exp = snowpark_fn.when(
|
|
8440
|
+
precision_violation,
|
|
8441
|
+
raise_error(
|
|
8442
|
+
snowpark_fn.lit(
|
|
8443
|
+
"Year-Month Interval result exceeds Snowflake interval precision limit"
|
|
8444
|
+
)
|
|
8445
|
+
),
|
|
8446
|
+
).otherwise(snowpark_args[0] + snowpark_args[1])
|
|
8447
|
+
case (t1, t2) if isinstance(t1, DayTimeIntervalType) and isinstance(
|
|
8448
|
+
t2, DayTimeIntervalType
|
|
8449
|
+
):
|
|
8450
|
+
result_type = DayTimeIntervalType(
|
|
8451
|
+
min(t1.start_field, t2.start_field),
|
|
8452
|
+
max(t1.end_field, t2.end_field),
|
|
8453
|
+
)
|
|
8454
|
+
# Check for Snowflake's day limit (106751991 days is the cutoff)
|
|
8455
|
+
days1 = snowpark_fn.date_part("day", snowpark_args[0])
|
|
8456
|
+
days2 = snowpark_fn.date_part("day", snowpark_args[1])
|
|
8457
|
+
max_days = snowpark_fn.lit(
|
|
8458
|
+
MAX_DAY_TIME_DAYS
|
|
8459
|
+
) # Snowflake's actual limit
|
|
8460
|
+
min_days = snowpark_fn.lit(-MAX_DAY_TIME_DAYS)
|
|
8461
|
+
|
|
8462
|
+
# Check if either operand exceeds the day limit - throw error like Spark does
|
|
8463
|
+
operand_limit_violation = (snowpark_fn.abs(days1) > max_days) | (
|
|
8464
|
+
snowpark_fn.abs(days2) > max_days
|
|
8465
|
+
)
|
|
8466
|
+
|
|
8467
|
+
# Check if result would exceed day limit (but operands are valid) - return NULL
|
|
8468
|
+
result_overflow = (
|
|
8469
|
+
# Check if result would exceed day limit (positive overflow)
|
|
8470
|
+
((days1 > 0) & (days2 > 0) & (days1 > max_days - days2))
|
|
8471
|
+
| ((days1 < 0) & (days2 < 0) & (days1 < min_days - days2))
|
|
8472
|
+
)
|
|
8473
|
+
|
|
8474
|
+
raise_error = _raise_error_helper(result_type, ArithmeticException)
|
|
8475
|
+
result_exp = (
|
|
8476
|
+
snowpark_fn.when(
|
|
8477
|
+
operand_limit_violation,
|
|
8478
|
+
raise_error(
|
|
8479
|
+
snowpark_fn.lit(
|
|
8480
|
+
"Day-Time Interval operand exceeds Snowflake interval precision limit"
|
|
8481
|
+
)
|
|
8482
|
+
),
|
|
8483
|
+
)
|
|
8484
|
+
.when(result_overflow, snowpark_fn.lit(None))
|
|
8485
|
+
.otherwise(snowpark_args[0] + snowpark_args[1])
|
|
8486
|
+
)
|
|
8487
|
+
case _:
|
|
8488
|
+
result_exp = _try_arithmetic_helper(
|
|
8489
|
+
snowpark_typed_args, snowpark_args, 0
|
|
8490
|
+
)
|
|
8491
|
+
result_exp = _type_with_typer(result_exp)
|
|
7628
8492
|
case "try_aes_decrypt":
|
|
7629
8493
|
result_exp = _aes_helper(
|
|
7630
8494
|
"TRY_DECRYPT",
|
|
@@ -7676,13 +8540,49 @@ def map_unresolved_function(
|
|
|
7676
8540
|
DoubleType(), cleaned, calculating_avg=True
|
|
7677
8541
|
)
|
|
7678
8542
|
case "try_divide":
|
|
7679
|
-
#
|
|
7680
|
-
for arg in snowpark_typed_args:
|
|
7681
|
-
if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
|
|
7682
|
-
raise NotImplementedError(
|
|
7683
|
-
"try_divide with interval types is not supported"
|
|
7684
|
-
)
|
|
8543
|
+
# Handle interval division with overflow detection
|
|
7685
8544
|
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
8545
|
+
case (t1, t2) if isinstance(t1, _AnsiIntervalType) and isinstance(
|
|
8546
|
+
t2, (_NumericType, StringType)
|
|
8547
|
+
):
|
|
8548
|
+
# Interval / numeric/string
|
|
8549
|
+
result_type = t1
|
|
8550
|
+
interval_arg = snowpark_args[0]
|
|
8551
|
+
divisor = (
|
|
8552
|
+
snowpark_args[1]
|
|
8553
|
+
if isinstance(t2, _NumericType)
|
|
8554
|
+
else snowpark_fn.cast(snowpark_args[1], "double")
|
|
8555
|
+
)
|
|
8556
|
+
|
|
8557
|
+
# Check for division by zero first
|
|
8558
|
+
zero_check = divisor == 0
|
|
8559
|
+
|
|
8560
|
+
if isinstance(result_type, YearMonthIntervalType):
|
|
8561
|
+
# For year-month intervals, check if result exceeds 32-bit signed integer limit
|
|
8562
|
+
result_type = YearMonthIntervalType()
|
|
8563
|
+
total_months = _calculate_total_months(interval_arg)
|
|
8564
|
+
max_months = snowpark_fn.lit(MAX_32BIT_SIGNED_INT)
|
|
8565
|
+
overflow_check = (
|
|
8566
|
+
snowpark_fn.abs(total_months / divisor) > max_months
|
|
8567
|
+
)
|
|
8568
|
+
result_exp = (
|
|
8569
|
+
snowpark_fn.when(zero_check, snowpark_fn.lit(None))
|
|
8570
|
+
.when(overflow_check, snowpark_fn.lit(None))
|
|
8571
|
+
.otherwise(interval_arg / divisor)
|
|
8572
|
+
)
|
|
8573
|
+
else: # DayTimeIntervalType
|
|
8574
|
+
# For day-time intervals, check if result exceeds day limit
|
|
8575
|
+
result_type = DayTimeIntervalType()
|
|
8576
|
+
total_days = _calculate_total_days(interval_arg)
|
|
8577
|
+
max_days = snowpark_fn.lit(MAX_DAY_TIME_DAYS)
|
|
8578
|
+
overflow_check = (
|
|
8579
|
+
snowpark_fn.abs(total_days / divisor) > max_days
|
|
8580
|
+
)
|
|
8581
|
+
result_exp = (
|
|
8582
|
+
snowpark_fn.when(zero_check, snowpark_fn.lit(None))
|
|
8583
|
+
.when(overflow_check, snowpark_fn.lit(None))
|
|
8584
|
+
.otherwise(interval_arg / divisor)
|
|
8585
|
+
)
|
|
7686
8586
|
case (NullType(), t) | (t, NullType()):
|
|
7687
8587
|
result_exp = snowpark_fn.lit(None)
|
|
7688
8588
|
result_type = FloatType()
|
|
@@ -7742,9 +8642,11 @@ def map_unresolved_function(
|
|
|
7742
8642
|
).otherwise(cleaned_left / cleaned_right)
|
|
7743
8643
|
result_exp = _type_with_typer(result_exp)
|
|
7744
8644
|
case (_, _):
|
|
7745
|
-
|
|
8645
|
+
exception = AnalysisException(
|
|
7746
8646
|
f"Incompatible types: {snowpark_typed_args[0].typ}, {snowpark_typed_args[1].typ}"
|
|
7747
8647
|
)
|
|
8648
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
8649
|
+
raise exception
|
|
7748
8650
|
|
|
7749
8651
|
case "try_element_at":
|
|
7750
8652
|
# For structured ArrayType and MapType columns, Snowflake raises an error when an index is out of bounds or a key does not exist.
|
|
@@ -7786,17 +8688,82 @@ def map_unresolved_function(
|
|
|
7786
8688
|
case _:
|
|
7787
8689
|
# Currently we do not handle VariantType columns as the first argument here.
|
|
7788
8690
|
# Spark will not support VariantType until 4.0.0, revisit this when the support is added.
|
|
7789
|
-
|
|
8691
|
+
exception = AnalysisException(
|
|
7790
8692
|
f"Expected either (ArrayType, IntegralType) or (MapType, StringType), got {snowpark_typed_args[0].typ}, {snowpark_typed_args[1].typ}."
|
|
7791
8693
|
)
|
|
8694
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
8695
|
+
raise exception
|
|
7792
8696
|
case "try_multiply":
|
|
7793
|
-
# Check for interval types and throw NotImplementedError
|
|
7794
|
-
for arg in snowpark_typed_args:
|
|
7795
|
-
if isinstance(arg.typ, (YearMonthIntervalType, DayTimeIntervalType)):
|
|
7796
|
-
raise NotImplementedError(
|
|
7797
|
-
"try_multiply with interval types is not supported"
|
|
7798
|
-
)
|
|
7799
8697
|
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
8698
|
+
case (t1, t2) if isinstance(t1, _AnsiIntervalType) and isinstance(
|
|
8699
|
+
t2, (_NumericType, StringType)
|
|
8700
|
+
):
|
|
8701
|
+
# Interval * numeric/string
|
|
8702
|
+
result_type = t1
|
|
8703
|
+
interval_arg = snowpark_args[0]
|
|
8704
|
+
multiplier = (
|
|
8705
|
+
snowpark_args[1]
|
|
8706
|
+
if isinstance(t2, _NumericType)
|
|
8707
|
+
else snowpark_fn.cast(snowpark_args[1], "double")
|
|
8708
|
+
)
|
|
8709
|
+
|
|
8710
|
+
if isinstance(result_type, YearMonthIntervalType):
|
|
8711
|
+
# For year-month intervals, check if result exceeds 32-bit signed integer limit
|
|
8712
|
+
result_type = YearMonthIntervalType()
|
|
8713
|
+
total_months = _calculate_total_months(interval_arg)
|
|
8714
|
+
max_months = snowpark_fn.lit(MAX_32BIT_SIGNED_INT)
|
|
8715
|
+
overflow_check = (
|
|
8716
|
+
snowpark_fn.abs(total_months * multiplier) > max_months
|
|
8717
|
+
)
|
|
8718
|
+
result_exp = snowpark_fn.when(
|
|
8719
|
+
overflow_check, snowpark_fn.lit(None)
|
|
8720
|
+
).otherwise(interval_arg * multiplier)
|
|
8721
|
+
else: # DayTimeIntervalType
|
|
8722
|
+
# For day-time intervals, check if result exceeds day limit
|
|
8723
|
+
result_type = DayTimeIntervalType()
|
|
8724
|
+
total_days = _calculate_total_days(interval_arg)
|
|
8725
|
+
max_days = snowpark_fn.lit(MAX_DAY_TIME_DAYS)
|
|
8726
|
+
overflow_check = (
|
|
8727
|
+
snowpark_fn.abs(total_days * multiplier) > max_days
|
|
8728
|
+
)
|
|
8729
|
+
result_exp = snowpark_fn.when(
|
|
8730
|
+
overflow_check, snowpark_fn.lit(None)
|
|
8731
|
+
).otherwise(interval_arg * multiplier)
|
|
8732
|
+
|
|
8733
|
+
case (t1, t2) if isinstance(t2, _AnsiIntervalType) and isinstance(
|
|
8734
|
+
t1, (_NumericType, StringType)
|
|
8735
|
+
):
|
|
8736
|
+
# numeric/string * Interval
|
|
8737
|
+
result_type = t2
|
|
8738
|
+
interval_arg = snowpark_args[1]
|
|
8739
|
+
multiplier = (
|
|
8740
|
+
snowpark_args[0]
|
|
8741
|
+
if isinstance(t1, _NumericType)
|
|
8742
|
+
else snowpark_fn.cast(snowpark_args[0], "double")
|
|
8743
|
+
)
|
|
8744
|
+
|
|
8745
|
+
if isinstance(result_type, YearMonthIntervalType):
|
|
8746
|
+
# For year-month intervals, check if result exceeds 32-bit signed integer limit
|
|
8747
|
+
result_type = YearMonthIntervalType()
|
|
8748
|
+
total_months = _calculate_total_months(interval_arg)
|
|
8749
|
+
max_months = snowpark_fn.lit(MAX_32BIT_SIGNED_INT)
|
|
8750
|
+
overflow_check = (
|
|
8751
|
+
snowpark_fn.abs(total_months * multiplier) > max_months
|
|
8752
|
+
)
|
|
8753
|
+
result_exp = snowpark_fn.when(
|
|
8754
|
+
overflow_check, snowpark_fn.lit(None)
|
|
8755
|
+
).otherwise(interval_arg * multiplier)
|
|
8756
|
+
else: # DayTimeIntervalType
|
|
8757
|
+
# For day-time intervals, check if result exceeds day limit
|
|
8758
|
+
result_type = DayTimeIntervalType()
|
|
8759
|
+
total_days = _calculate_total_days(interval_arg)
|
|
8760
|
+
max_days = snowpark_fn.lit(MAX_DAY_TIME_DAYS)
|
|
8761
|
+
overflow_check = (
|
|
8762
|
+
snowpark_fn.abs(total_days * multiplier) > max_days
|
|
8763
|
+
)
|
|
8764
|
+
result_exp = snowpark_fn.when(
|
|
8765
|
+
overflow_check, snowpark_fn.lit(None)
|
|
8766
|
+
).otherwise(interval_arg * multiplier)
|
|
7800
8767
|
case (NullType(), t) | (t, NullType()):
|
|
7801
8768
|
result_exp = snowpark_fn.lit(None)
|
|
7802
8769
|
match t:
|
|
@@ -7870,9 +8837,11 @@ def map_unresolved_function(
|
|
|
7870
8837
|
result_exp = cleaned_left * cleaned_right
|
|
7871
8838
|
result_exp = _type_with_typer(result_exp)
|
|
7872
8839
|
case (_, _):
|
|
7873
|
-
|
|
8840
|
+
exception = AnalysisException(
|
|
7874
8841
|
f"Incompatible types: {snowpark_typed_args[0].typ}, {snowpark_typed_args[1].typ}"
|
|
7875
8842
|
)
|
|
8843
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
8844
|
+
raise exception
|
|
7876
8845
|
case "try_sum":
|
|
7877
8846
|
# Snowflake raises an error when a value that cannot be cast into a numeric is passed to SUM. Spark treats these as NULL values and
|
|
7878
8847
|
# does not throw an error. Additionally, Spark returns NULL when this calculation results in an overflow, whereas Snowflake raises a "TypeError".
|
|
@@ -7894,14 +8863,112 @@ def map_unresolved_function(
|
|
|
7894
8863
|
snowpark_typed_args[0].typ, snowpark_args[0]
|
|
7895
8864
|
)
|
|
7896
8865
|
case "try_subtract":
|
|
7897
|
-
#
|
|
7898
|
-
|
|
7899
|
-
if isinstance(
|
|
7900
|
-
|
|
7901
|
-
|
|
8866
|
+
# Handle interval arithmetic with overflow detection
|
|
8867
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
8868
|
+
case (DateType(), t) if isinstance(t, YearMonthIntervalType):
|
|
8869
|
+
result_type = DateType()
|
|
8870
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
8871
|
+
case (DateType(), t) if isinstance(t, DayTimeIntervalType):
|
|
8872
|
+
result_type = TimestampType()
|
|
8873
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
8874
|
+
case (TimestampType(), t) if isinstance(
|
|
8875
|
+
t, (DayTimeIntervalType, YearMonthIntervalType)
|
|
8876
|
+
):
|
|
8877
|
+
result_type = snowpark_typed_args[0].typ
|
|
8878
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
8879
|
+
case (t1, t2) if (
|
|
8880
|
+
isinstance(t1, YearMonthIntervalType)
|
|
8881
|
+
and isinstance(t2, (_NumericType, StringType))
|
|
8882
|
+
) or (
|
|
8883
|
+
isinstance(t2, YearMonthIntervalType)
|
|
8884
|
+
and isinstance(t1, (_NumericType, StringType))
|
|
8885
|
+
):
|
|
8886
|
+
# YearMonthInterval - numeric/string or numeric/string - YearMonthInterval should throw error
|
|
8887
|
+
exception = AnalysisException(
|
|
8888
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "try_subtract({snowpark_arg_names[0]}, {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
8889
|
+
)
|
|
8890
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
8891
|
+
raise exception
|
|
8892
|
+
case (t1, t2) if isinstance(t1, YearMonthIntervalType) and isinstance(
|
|
8893
|
+
t2, YearMonthIntervalType
|
|
8894
|
+
):
|
|
8895
|
+
result_type = YearMonthIntervalType(
|
|
8896
|
+
min(t1.start_field, t2.start_field),
|
|
8897
|
+
max(t1.end_field, t2.end_field),
|
|
8898
|
+
)
|
|
8899
|
+
# Check for Snowflake's precision limits: 10+ digits for operands, 9+ digits for results
|
|
8900
|
+
total1 = _calculate_total_months(snowpark_args[0])
|
|
8901
|
+
total2 = _calculate_total_months(snowpark_args[1])
|
|
8902
|
+
ten_digit_limit = snowpark_fn.lit(MAX_10_DIGIT_LIMIT)
|
|
8903
|
+
|
|
8904
|
+
precision_violation = (
|
|
8905
|
+
# Check if either operand already reaches 10 digits (parsing limit)
|
|
8906
|
+
(snowpark_fn.abs(total1) >= ten_digit_limit)
|
|
8907
|
+
| (snowpark_fn.abs(total2) >= ten_digit_limit)
|
|
8908
|
+
| (
|
|
8909
|
+
(total1 > 0)
|
|
8910
|
+
& (total2 < 0)
|
|
8911
|
+
& (total1 >= ten_digit_limit + total2)
|
|
8912
|
+
)
|
|
8913
|
+
| (
|
|
8914
|
+
(total1 < 0)
|
|
8915
|
+
& (total2 > 0)
|
|
8916
|
+
& (total1 <= -ten_digit_limit + total2)
|
|
8917
|
+
)
|
|
7902
8918
|
)
|
|
7903
|
-
|
|
7904
|
-
|
|
8919
|
+
|
|
8920
|
+
raise_error = _raise_error_helper(result_type, ArithmeticException)
|
|
8921
|
+
result_exp = snowpark_fn.when(
|
|
8922
|
+
precision_violation,
|
|
8923
|
+
raise_error(
|
|
8924
|
+
snowpark_fn.lit(
|
|
8925
|
+
"Year-Month Interval result exceeds Snowflake interval precision limit"
|
|
8926
|
+
)
|
|
8927
|
+
),
|
|
8928
|
+
).otherwise(snowpark_args[0] - snowpark_args[1])
|
|
8929
|
+
case (t1, t2) if isinstance(t1, DayTimeIntervalType) and isinstance(
|
|
8930
|
+
t2, DayTimeIntervalType
|
|
8931
|
+
):
|
|
8932
|
+
result_type = DayTimeIntervalType(
|
|
8933
|
+
min(t1.start_field, t2.start_field),
|
|
8934
|
+
max(t1.end_field, t2.end_field),
|
|
8935
|
+
)
|
|
8936
|
+
# Check for Snowflake's day limit (106751991 days is the cutoff)
|
|
8937
|
+
days1 = snowpark_fn.date_part("day", snowpark_args[0])
|
|
8938
|
+
days2 = snowpark_fn.date_part("day", snowpark_args[1])
|
|
8939
|
+
max_days = snowpark_fn.lit(
|
|
8940
|
+
MAX_DAY_TIME_DAYS
|
|
8941
|
+
) # Snowflake's actual limit
|
|
8942
|
+
min_days = snowpark_fn.lit(-MAX_DAY_TIME_DAYS)
|
|
8943
|
+
|
|
8944
|
+
# Check if either operand exceeds the day limit - throw error like Spark does
|
|
8945
|
+
operand_limit_violation = (snowpark_fn.abs(days1) > max_days) | (
|
|
8946
|
+
snowpark_fn.abs(days2) > max_days
|
|
8947
|
+
)
|
|
8948
|
+
|
|
8949
|
+
# Check if result would exceed day limit (but operands are valid) - return NULL
|
|
8950
|
+
result_overflow = (
|
|
8951
|
+
(days1 > 0) & (days2 < 0) & (days1 > max_days + days2)
|
|
8952
|
+
) | ((days1 < 0) & (days2 > 0) & (days1 < min_days + days2))
|
|
8953
|
+
|
|
8954
|
+
raise_error = _raise_error_helper(result_type, ArithmeticException)
|
|
8955
|
+
result_exp = (
|
|
8956
|
+
snowpark_fn.when(
|
|
8957
|
+
operand_limit_violation,
|
|
8958
|
+
raise_error(
|
|
8959
|
+
snowpark_fn.lit(
|
|
8960
|
+
"Day-Time Interval operand exceeds day limit"
|
|
8961
|
+
)
|
|
8962
|
+
),
|
|
8963
|
+
)
|
|
8964
|
+
.when(result_overflow, snowpark_fn.lit(None))
|
|
8965
|
+
.otherwise(snowpark_args[0] - snowpark_args[1])
|
|
8966
|
+
)
|
|
8967
|
+
case _:
|
|
8968
|
+
result_exp = _try_arithmetic_helper(
|
|
8969
|
+
snowpark_typed_args, snowpark_args, 1
|
|
8970
|
+
)
|
|
8971
|
+
result_exp = _type_with_typer(result_exp)
|
|
7905
8972
|
case "try_to_number":
|
|
7906
8973
|
try_to_number = snowpark_fn.function("try_to_number")
|
|
7907
8974
|
precision, scale = resolve_to_number_precision_and_scale(exp)
|
|
@@ -7924,7 +8991,13 @@ def map_unresolved_function(
|
|
|
7924
8991
|
),
|
|
7925
8992
|
)
|
|
7926
8993
|
case _:
|
|
7927
|
-
|
|
8994
|
+
exception = ValueError(
|
|
8995
|
+
f"Invalid number of arguments to {function_name}"
|
|
8996
|
+
)
|
|
8997
|
+
attach_custom_error_code(
|
|
8998
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
8999
|
+
)
|
|
9000
|
+
raise exception
|
|
7928
9001
|
result_type = get_timestamp_type()
|
|
7929
9002
|
result_exp = snowpark_fn.cast(result_exp, result_type)
|
|
7930
9003
|
case "typeof":
|
|
@@ -8036,9 +9109,13 @@ def map_unresolved_function(
|
|
|
8036
9109
|
snowpark_fn.lit("YYYY-MM-DD HH24:MI:SS"),
|
|
8037
9110
|
)
|
|
8038
9111
|
case _:
|
|
8039
|
-
|
|
9112
|
+
exception = SnowparkConnectNotImplementedError(
|
|
8040
9113
|
"unix_timestamp expected 0, 1 or 2 arguments."
|
|
8041
9114
|
)
|
|
9115
|
+
attach_custom_error_code(
|
|
9116
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
9117
|
+
)
|
|
9118
|
+
raise exception
|
|
8042
9119
|
result_type = LongType()
|
|
8043
9120
|
case "unwrap_udt":
|
|
8044
9121
|
snowpark_col_name = snowpark_args[0].get_name()
|
|
@@ -8055,9 +9132,11 @@ def map_unresolved_function(
|
|
|
8055
9132
|
)
|
|
8056
9133
|
|
|
8057
9134
|
if "__udt_info__" not in metadata:
|
|
8058
|
-
|
|
9135
|
+
exception = AnalysisException(
|
|
8059
9136
|
f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve '{spark_function_name})' due to data type mismatch: Parameter 1 requires the 'USERDEFINEDTYPE' type"
|
|
8060
9137
|
)
|
|
9138
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9139
|
+
raise exception
|
|
8061
9140
|
|
|
8062
9141
|
result_type = map_json_schema_to_snowpark(
|
|
8063
9142
|
metadata["__udt_info__"]["sqlType"]
|
|
@@ -8113,9 +9192,11 @@ def map_unresolved_function(
|
|
|
8113
9192
|
snowpark_args[0], DoubleType()
|
|
8114
9193
|
)
|
|
8115
9194
|
else:
|
|
8116
|
-
|
|
9195
|
+
exception = AnalysisException(
|
|
8117
9196
|
f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
8118
9197
|
)
|
|
9198
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9199
|
+
raise exception
|
|
8119
9200
|
result_type = DoubleType()
|
|
8120
9201
|
result_exp = _resolve_aggregate_exp(
|
|
8121
9202
|
snowpark_fn.var_pop(var_pop_argument), result_type
|
|
@@ -8128,9 +9209,11 @@ def map_unresolved_function(
|
|
|
8128
9209
|
snowpark_args[0], DoubleType()
|
|
8129
9210
|
)
|
|
8130
9211
|
else:
|
|
8131
|
-
|
|
9212
|
+
exception = AnalysisException(
|
|
8132
9213
|
f"""AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{function_name}({snowpark_arg_names[0]})" due to data type mismatch: Parameter 1 requires the "DOUBLE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".;"""
|
|
8133
9214
|
)
|
|
9215
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9216
|
+
raise exception
|
|
8134
9217
|
result_type = DoubleType()
|
|
8135
9218
|
result_exp = _resolve_aggregate_exp(
|
|
8136
9219
|
snowpark_fn.var_samp(var_samp_argument), result_type
|
|
@@ -8163,10 +9246,12 @@ def map_unresolved_function(
|
|
|
8163
9246
|
if len(snowpark_typed_args) > 0:
|
|
8164
9247
|
condition_type = snowpark_typed_args[0].typ
|
|
8165
9248
|
if not isinstance(condition_type, BooleanType):
|
|
8166
|
-
|
|
9249
|
+
exception = AnalysisException(
|
|
8167
9250
|
f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve CASE WHEN condition due to data type mismatch: "
|
|
8168
9251
|
f"Parameter 1 requires the 'BOOLEAN' type, however got '{condition_type}'"
|
|
8169
9252
|
)
|
|
9253
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9254
|
+
raise exception
|
|
8170
9255
|
|
|
8171
9256
|
name_components = ["CASE"]
|
|
8172
9257
|
name_components.append("WHEN")
|
|
@@ -8189,10 +9274,12 @@ def map_unresolved_function(
|
|
|
8189
9274
|
# Validate each WHEN condition
|
|
8190
9275
|
condition_type = snowpark_typed_args[i].typ
|
|
8191
9276
|
if not isinstance(condition_type, BooleanType):
|
|
8192
|
-
|
|
9277
|
+
exception = AnalysisException(
|
|
8193
9278
|
f"[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve CASE WHEN condition due to data type mismatch: "
|
|
8194
9279
|
f"Parameter {i + 1} requires the 'BOOLEAN' type, however got '{condition_type}'"
|
|
8195
9280
|
)
|
|
9281
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9282
|
+
raise exception
|
|
8196
9283
|
result_exp = result_exp.when(snowpark_args[i], snowpark_args[i + 1])
|
|
8197
9284
|
result_type_indexes.append(i + 1)
|
|
8198
9285
|
name_components.append("END")
|
|
@@ -8429,9 +9516,11 @@ def map_unresolved_function(
|
|
|
8429
9516
|
# TODO: Add more here as we come across them.
|
|
8430
9517
|
# Unfortunately the scope of function names are not documented in
|
|
8431
9518
|
# the proto file.
|
|
8432
|
-
|
|
9519
|
+
exception = SnowparkConnectNotImplementedError(
|
|
8433
9520
|
f"Unsupported function name {other}"
|
|
8434
9521
|
)
|
|
9522
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
9523
|
+
raise exception
|
|
8435
9524
|
|
|
8436
9525
|
def _to_typed_column(
|
|
8437
9526
|
res: Column | TypedColumn,
|
|
@@ -8446,9 +9535,11 @@ def map_unresolved_function(
|
|
|
8446
9535
|
# 1. Static type: Assign directly to `result_type` when type is known at resolve time
|
|
8447
9536
|
# 2. Dynamic type based on function arguments types: Use `snowpark_typed_args` to determine type
|
|
8448
9537
|
# 3. Use _type_with_typer() as last resort - it calls GS to determine the type
|
|
8449
|
-
|
|
9538
|
+
exception = SnowparkConnectNotImplementedError(
|
|
8450
9539
|
f"Result type of function {function_name} not implemented"
|
|
8451
9540
|
)
|
|
9541
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
9542
|
+
raise exception
|
|
8452
9543
|
elif type(res_type) is list:
|
|
8453
9544
|
tc = TypedColumn(res, lambda: res_type)
|
|
8454
9545
|
else:
|
|
@@ -8460,7 +9551,7 @@ def map_unresolved_function(
|
|
|
8460
9551
|
spark_col_names if len(spark_col_names) > 0 else [spark_function_name]
|
|
8461
9552
|
)
|
|
8462
9553
|
typed_col = _to_typed_column(result_exp, result_type, function_name)
|
|
8463
|
-
typed_col.set_qualifiers(
|
|
9554
|
+
typed_col.set_qualifiers({ColumnQualifier(tuple(qualifier_parts))})
|
|
8464
9555
|
return spark_col_names, typed_col
|
|
8465
9556
|
|
|
8466
9557
|
|
|
@@ -8534,15 +9625,19 @@ def _extract_window_args(fn: expressions_proto.Expression) -> (str, str):
|
|
|
8534
9625
|
args = fn.unresolved_function.arguments
|
|
8535
9626
|
match args:
|
|
8536
9627
|
case [_, _, _]:
|
|
8537
|
-
|
|
9628
|
+
exception = SnowparkConnectNotImplementedError(
|
|
8538
9629
|
"the slide_duration parameter is not supported"
|
|
8539
9630
|
)
|
|
9631
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
9632
|
+
raise exception
|
|
8540
9633
|
case [_, window_duration, slide_duration, _] if unwrap_literal(
|
|
8541
9634
|
window_duration
|
|
8542
9635
|
) != unwrap_literal(slide_duration):
|
|
8543
|
-
|
|
9636
|
+
exception = SnowparkConnectNotImplementedError(
|
|
8544
9637
|
"the slide_duration parameter is not supported"
|
|
8545
9638
|
)
|
|
9639
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
9640
|
+
raise exception
|
|
8546
9641
|
case [_, window_duration, _, start_time]:
|
|
8547
9642
|
return unwrap_literal(window_duration), unwrap_literal(start_time)
|
|
8548
9643
|
case [_, window_duration]:
|
|
@@ -8610,7 +9705,9 @@ def _find_common_type(
|
|
|
8610
9705
|
typ = _common(type1.element_type, type2.element_type)
|
|
8611
9706
|
return ArrayType(typ)
|
|
8612
9707
|
case (ArrayType(), _) | (_, ArrayType()) if func_name == "concat":
|
|
8613
|
-
|
|
9708
|
+
exception = AnalysisException(exception_base_message)
|
|
9709
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9710
|
+
raise exception
|
|
8614
9711
|
case (NullType(), t) | (t, NullType()):
|
|
8615
9712
|
return t
|
|
8616
9713
|
case (BinaryType(), BinaryType()):
|
|
@@ -8639,7 +9736,9 @@ def _find_common_type(
|
|
|
8639
9736
|
if [field.name for field in fields1] != [
|
|
8640
9737
|
field.name for field in fields2
|
|
8641
9738
|
]:
|
|
8642
|
-
|
|
9739
|
+
exception = AnalysisException(exception_base_message)
|
|
9740
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9741
|
+
raise exception
|
|
8643
9742
|
fields = []
|
|
8644
9743
|
for idx, field in enumerate(fields1):
|
|
8645
9744
|
typ = _common(field.datatype, fields2[idx].datatype)
|
|
@@ -8649,8 +9748,24 @@ def _find_common_type(
|
|
|
8649
9748
|
key_type = _common(type1.key_type, type2.key_type)
|
|
8650
9749
|
value_type = _common(type1.value_type, type2.value_type)
|
|
8651
9750
|
return MapType(key_type, value_type)
|
|
9751
|
+
case (_, _) if isinstance(type1, YearMonthIntervalType) and isinstance(
|
|
9752
|
+
type2, YearMonthIntervalType
|
|
9753
|
+
):
|
|
9754
|
+
return YearMonthIntervalType(
|
|
9755
|
+
min(type1.start_field, type2.start_field),
|
|
9756
|
+
max(type1.end_field, type2.end_field),
|
|
9757
|
+
)
|
|
9758
|
+
case (_, _) if isinstance(type1, DayTimeIntervalType) and isinstance(
|
|
9759
|
+
type2, DayTimeIntervalType
|
|
9760
|
+
):
|
|
9761
|
+
return DayTimeIntervalType(
|
|
9762
|
+
min(type1.start_field, type2.start_field),
|
|
9763
|
+
max(type1.end_field, type2.end_field),
|
|
9764
|
+
)
|
|
8652
9765
|
case _:
|
|
8653
|
-
|
|
9766
|
+
exception = AnalysisException(exception_base_message)
|
|
9767
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9768
|
+
raise exception
|
|
8654
9769
|
|
|
8655
9770
|
types = list(filter(lambda tp: tp is not None, types))
|
|
8656
9771
|
if not types:
|
|
@@ -8663,7 +9778,9 @@ def _find_common_type(
|
|
|
8663
9778
|
func_name_message = f" to `{func_name}`" if func_name else ""
|
|
8664
9779
|
types_message = " or ".join([f'"{type}"' for type in types])
|
|
8665
9780
|
exception_message = f"{exception_base_message} Cannot resolve expression due to data type mismatch: Input{func_name_message} should all be the same type, but it's ({types_message})."
|
|
8666
|
-
|
|
9781
|
+
exception = AnalysisException(exception_message)
|
|
9782
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9783
|
+
raise exception
|
|
8667
9784
|
else:
|
|
8668
9785
|
raise
|
|
8669
9786
|
|
|
@@ -8821,7 +9938,9 @@ def _resolve_function_with_lambda(
|
|
|
8821
9938
|
case ArrayType():
|
|
8822
9939
|
return VariantType()
|
|
8823
9940
|
case t:
|
|
8824
|
-
|
|
9941
|
+
exception = ValueError(f"Expected array, got {t}")
|
|
9942
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9943
|
+
raise exception
|
|
8825
9944
|
|
|
8826
9945
|
def _get_map_types(tc: TypedColumn):
|
|
8827
9946
|
match tc.typ:
|
|
@@ -8830,9 +9949,11 @@ def _resolve_function_with_lambda(
|
|
|
8830
9949
|
case MapType():
|
|
8831
9950
|
return VariantType(), VariantType()
|
|
8832
9951
|
case t:
|
|
8833
|
-
|
|
9952
|
+
exception = AnalysisException(
|
|
8834
9953
|
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Parameter 1 requires the "MAP" type, however "id" has the type "{t}".'
|
|
8835
9954
|
)
|
|
9955
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
9956
|
+
raise exception
|
|
8836
9957
|
|
|
8837
9958
|
def _map_to_array(m: dict) -> Optional[list]:
|
|
8838
9959
|
# confirm that m is a dict and not a sqlNullWrapper
|
|
@@ -8898,9 +10019,13 @@ def _resolve_function_with_lambda(
|
|
|
8898
10019
|
result_type = arg4_tc.typ # it's type of 'finish' lambda body
|
|
8899
10020
|
result_exp = snowpark_fn.get(result_exp, snowpark_fn.lit(0))
|
|
8900
10021
|
case _:
|
|
8901
|
-
|
|
10022
|
+
exception = SnowparkConnectNotImplementedError(
|
|
8902
10023
|
f"{function_name} function requires 3 or 4 arguments"
|
|
8903
10024
|
)
|
|
10025
|
+
attach_custom_error_code(
|
|
10026
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
10027
|
+
)
|
|
10028
|
+
raise exception
|
|
8904
10029
|
|
|
8905
10030
|
snowpark_arg_names = [
|
|
8906
10031
|
arg1_name,
|
|
@@ -9124,9 +10249,13 @@ def _resolve_function_with_lambda(
|
|
|
9124
10249
|
f"lambdafunction({lambda_body_name}, namedlambdavariable(), namedlambdavariable())",
|
|
9125
10250
|
]
|
|
9126
10251
|
case _:
|
|
9127
|
-
|
|
10252
|
+
exception = SnowparkConnectNotImplementedError(
|
|
9128
10253
|
f"{function_name} function requires lambda function with 1 or 2 arguments"
|
|
9129
10254
|
)
|
|
10255
|
+
attach_custom_error_code(
|
|
10256
|
+
exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
|
|
10257
|
+
)
|
|
10258
|
+
raise exception
|
|
9130
10259
|
case "transform_keys":
|
|
9131
10260
|
_map_to_array_udf = cached_udf(
|
|
9132
10261
|
_map_to_array,
|
|
@@ -9280,9 +10409,11 @@ def _resolve_function_with_lambda(
|
|
|
9280
10409
|
result_exp = TypedColumn(result_exp, lambda: [ArrayType(fn_body.typ)])
|
|
9281
10410
|
case other:
|
|
9282
10411
|
# TODO: Add more here as we come across them.
|
|
9283
|
-
|
|
10412
|
+
exception = SnowparkConnectNotImplementedError(
|
|
9284
10413
|
f"Unsupported function name {other}"
|
|
9285
10414
|
)
|
|
10415
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
10416
|
+
raise exception
|
|
9286
10417
|
|
|
9287
10418
|
spark_function_name = f"{function_name}({', '.join(snowpark_arg_names)})"
|
|
9288
10419
|
if not isinstance(result_exp, TypedColumn):
|
|
@@ -9847,9 +10978,11 @@ def _try_arithmetic_helper(
|
|
|
9847
10978
|
) or (
|
|
9848
10979
|
isinstance(arg2, DateType) and not isinstance(arg1, _IntegralType)
|
|
9849
10980
|
):
|
|
9850
|
-
|
|
10981
|
+
exception = AnalysisException(
|
|
9851
10982
|
'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_add(dt, add)" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type'
|
|
9852
10983
|
)
|
|
10984
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
10985
|
+
raise exception
|
|
9853
10986
|
args = (
|
|
9854
10987
|
snowpark_args[::-1]
|
|
9855
10988
|
if isinstance(arg1, _IntegralType)
|
|
@@ -9872,9 +11005,11 @@ def _try_arithmetic_helper(
|
|
|
9872
11005
|
elif isinstance(arg1, DateType) and isinstance(arg2, DateType):
|
|
9873
11006
|
return snowpark_fn.daydiff(snowpark_args[0], snowpark_args[1])
|
|
9874
11007
|
else:
|
|
9875
|
-
|
|
11008
|
+
exception = AnalysisException(
|
|
9876
11009
|
'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "date_sub(dt, sub)" due to data type mismatch: Parameter 1 requires the "DATE" type and parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type'
|
|
9877
11010
|
)
|
|
11011
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
11012
|
+
raise exception
|
|
9878
11013
|
case (DecimalType(), _IntegralType()) | (_IntegralType(), DecimalType()) | (
|
|
9879
11014
|
DecimalType(),
|
|
9880
11015
|
DecimalType(),
|
|
@@ -9926,9 +11061,11 @@ def _try_arithmetic_helper(
|
|
|
9926
11061
|
return updated_args[0] - updated_args[1]
|
|
9927
11062
|
|
|
9928
11063
|
case (BooleanType(), _) | (_, BooleanType()):
|
|
9929
|
-
|
|
11064
|
+
exception = AnalysisException(
|
|
9930
11065
|
f"Incompatible types: {typed_args[0].typ}, {typed_args[1].typ}"
|
|
9931
11066
|
)
|
|
11067
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
11068
|
+
raise exception
|
|
9932
11069
|
case _:
|
|
9933
11070
|
# Return NULL for incompatible types
|
|
9934
11071
|
return snowpark_fn.lit(None)
|
|
@@ -9968,21 +11105,23 @@ def _get_add_sub_result_type(
|
|
|
9968
11105
|
)
|
|
9969
11106
|
case _:
|
|
9970
11107
|
if global_config.spark_sql_ansi_enabled:
|
|
9971
|
-
|
|
11108
|
+
exception = AnalysisException(
|
|
9972
11109
|
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".',
|
|
9973
11110
|
)
|
|
11111
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
11112
|
+
raise exception
|
|
9974
11113
|
else:
|
|
9975
11114
|
result_type = DoubleType()
|
|
9976
11115
|
case BooleanType():
|
|
9977
|
-
|
|
11116
|
+
exception = AnalysisException(
|
|
9978
11117
|
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "BOOLEAN".',
|
|
9979
11118
|
)
|
|
11119
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
11120
|
+
raise exception
|
|
9980
11121
|
return result_type, overflow_possible
|
|
9981
11122
|
|
|
9982
11123
|
|
|
9983
|
-
def _get_interval_type_name(
|
|
9984
|
-
interval_type: Union[YearMonthIntervalType, DayTimeIntervalType]
|
|
9985
|
-
) -> str:
|
|
11124
|
+
def _get_interval_type_name(interval_type: _AnsiIntervalType) -> str:
|
|
9986
11125
|
"""Get the formatted interval type name for error messages."""
|
|
9987
11126
|
if isinstance(interval_type, YearMonthIntervalType):
|
|
9988
11127
|
if interval_type.start_field == 0 and interval_type.end_field == 0:
|
|
@@ -10009,21 +11148,15 @@ def _check_interval_string_comparison(
|
|
|
10009
11148
|
) -> None:
|
|
10010
11149
|
"""Check for invalid interval-string comparisons and raise AnalysisException if found."""
|
|
10011
11150
|
if (
|
|
10012
|
-
isinstance(
|
|
10013
|
-
snowpark_typed_args[0].typ, (YearMonthIntervalType, DayTimeIntervalType)
|
|
10014
|
-
)
|
|
11151
|
+
isinstance(snowpark_typed_args[0].typ, _AnsiIntervalType)
|
|
10015
11152
|
and isinstance(snowpark_typed_args[1].typ, StringType)
|
|
10016
11153
|
or isinstance(snowpark_typed_args[0].typ, StringType)
|
|
10017
|
-
and isinstance(
|
|
10018
|
-
snowpark_typed_args[1].typ, (YearMonthIntervalType, DayTimeIntervalType)
|
|
10019
|
-
)
|
|
11154
|
+
and isinstance(snowpark_typed_args[1].typ, _AnsiIntervalType)
|
|
10020
11155
|
):
|
|
10021
11156
|
# Format interval type name for error message
|
|
10022
11157
|
interval_type = (
|
|
10023
11158
|
snowpark_typed_args[0].typ
|
|
10024
|
-
if isinstance(
|
|
10025
|
-
snowpark_typed_args[0].typ, (YearMonthIntervalType, DayTimeIntervalType)
|
|
10026
|
-
)
|
|
11159
|
+
if isinstance(snowpark_typed_args[0].typ, _AnsiIntervalType)
|
|
10027
11160
|
else snowpark_typed_args[1].typ
|
|
10028
11161
|
)
|
|
10029
11162
|
interval_name = _get_interval_type_name(interval_type)
|
|
@@ -10039,9 +11172,11 @@ def _check_interval_string_comparison(
|
|
|
10039
11172
|
else interval_name
|
|
10040
11173
|
)
|
|
10041
11174
|
|
|
10042
|
-
|
|
11175
|
+
exception = AnalysisException(
|
|
10043
11176
|
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} {operator} {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{left_type}" and "{right_type}").;'
|
|
10044
11177
|
)
|
|
11178
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
11179
|
+
raise exception
|
|
10045
11180
|
|
|
10046
11181
|
|
|
10047
11182
|
def _get_spark_function_name(
|
|
@@ -10088,12 +11223,18 @@ def _get_spark_function_name(
|
|
|
10088
11223
|
case (DateType(), DayTimeIntervalType()) | (
|
|
10089
11224
|
DateType(),
|
|
10090
11225
|
YearMonthIntervalType(),
|
|
11226
|
+
) | (TimestampType(), DayTimeIntervalType()) | (
|
|
11227
|
+
TimestampType(),
|
|
11228
|
+
YearMonthIntervalType(),
|
|
10091
11229
|
):
|
|
10092
11230
|
date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
|
|
10093
11231
|
return f"{date_param_name1} {operation_op} {snowpark_arg_names[1]}"
|
|
10094
11232
|
case (DayTimeIntervalType(), DateType()) | (
|
|
10095
11233
|
YearMonthIntervalType(),
|
|
10096
11234
|
DateType(),
|
|
11235
|
+
) | (DayTimeIntervalType(), TimestampType()) | (
|
|
11236
|
+
YearMonthIntervalType(),
|
|
11237
|
+
TimestampType(),
|
|
10097
11238
|
):
|
|
10098
11239
|
date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
|
|
10099
11240
|
if function_name == "+":
|
|
@@ -10177,12 +11318,18 @@ def _timestamp_format_sanity_check(ts_value: str, ts_format: str) -> None:
|
|
|
10177
11318
|
This is a basic validation to ensure the format matches the string.
|
|
10178
11319
|
"""
|
|
10179
11320
|
if "yyyyyyy" in ts_format:
|
|
10180
|
-
|
|
11321
|
+
exception = DateTimeException(
|
|
10181
11322
|
f"Fail to recognize '{ts_format}' pattern in the DateTimeFormatter."
|
|
10182
11323
|
)
|
|
11324
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
11325
|
+
raise exception
|
|
10183
11326
|
if ts_format == "yy":
|
|
10184
11327
|
if len(ts_value) != 2:
|
|
10185
|
-
|
|
11328
|
+
exception = DateTimeException(
|
|
11329
|
+
f"Fail to parse '{ts_value}' in DateTimeFormatter."
|
|
11330
|
+
)
|
|
11331
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
11332
|
+
raise exception
|
|
10186
11333
|
|
|
10187
11334
|
# For parsing, the acceptable fraction length can be [1, the number of contiguous 'S']
|
|
10188
11335
|
s_contiguous = 0
|
|
@@ -10199,7 +11346,11 @@ def _timestamp_format_sanity_check(ts_value: str, ts_format: str) -> None:
|
|
|
10199
11346
|
char_count += 1
|
|
10200
11347
|
|
|
10201
11348
|
if s_contiguous + sum(x.isalnum() for x in ts_value) < char_count:
|
|
10202
|
-
|
|
11349
|
+
exception = DateTimeException(
|
|
11350
|
+
f"Fail to parse '{ts_value}' in DateTimeFormatter."
|
|
11351
|
+
)
|
|
11352
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
11353
|
+
raise exception
|
|
10203
11354
|
|
|
10204
11355
|
|
|
10205
11356
|
def _bounded_long_floor_expr(expr):
|
|
@@ -10326,17 +11477,21 @@ def _validate_number_format_string(format_str: str) -> None:
|
|
|
10326
11477
|
"""
|
|
10327
11478
|
|
|
10328
11479
|
def _unexpected_char(char):
|
|
10329
|
-
|
|
11480
|
+
exception = AnalysisException(
|
|
10330
11481
|
f"[INVALID_FORMAT.UNEXPECTED_TOKEN] The format is invalid: '{original_format}'. "
|
|
10331
11482
|
f"Found the unexpected character '{char}' in the format string; "
|
|
10332
11483
|
"the structure of the format string must match: "
|
|
10333
11484
|
"`[MI|S]` `[$]` `[0|9|G|,]*` `[.|D]` `[0|9]*` `[$]` `[PR|MI|S]`."
|
|
10334
11485
|
)
|
|
11486
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
11487
|
+
raise exception
|
|
10335
11488
|
|
|
10336
11489
|
if not format_str:
|
|
10337
|
-
|
|
11490
|
+
exception = AnalysisException(
|
|
10338
11491
|
"[INVALID_FORMAT.EMPTY] The format is invalid: ''. The number format string cannot be empty."
|
|
10339
11492
|
)
|
|
11493
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
11494
|
+
raise exception
|
|
10340
11495
|
|
|
10341
11496
|
# Create a working copy of the format string
|
|
10342
11497
|
remaining = format_str
|
|
@@ -10412,9 +11567,11 @@ def _validate_number_format_string(format_str: str) -> None:
|
|
|
10412
11567
|
_unexpected_char(char)
|
|
10413
11568
|
|
|
10414
11569
|
# If no invalid character found but no digits, it's still invalid
|
|
10415
|
-
|
|
11570
|
+
exception = AnalysisException(
|
|
10416
11571
|
f"[INVALID_FORMAT.WRONG_NUM_DIGIT] The format is invalid: '{format_str}'. The format string requires at least one number digit."
|
|
10417
11572
|
)
|
|
11573
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
11574
|
+
raise exception
|
|
10418
11575
|
|
|
10419
11576
|
|
|
10420
11577
|
def _trim_helper(value: Column, trim_value: Column, trim_type: Column) -> Column:
|
|
@@ -10465,3 +11622,30 @@ def _map_from_spark_tz(value: Column) -> Column:
|
|
|
10465
11622
|
.when(value == "VST", snowpark_fn.lit("Asia/Ho_Chi_Minh"))
|
|
10466
11623
|
.otherwise(value) # Return original timezone if no mapping found
|
|
10467
11624
|
)
|
|
11625
|
+
|
|
11626
|
+
|
|
11627
|
+
def _calculate_total_months(interval_arg):
|
|
11628
|
+
"""Calculate total months from a year-month interval."""
|
|
11629
|
+
years = snowpark_fn.date_part("year", interval_arg)
|
|
11630
|
+
months = snowpark_fn.date_part("month", interval_arg)
|
|
11631
|
+
return years * 12 + months
|
|
11632
|
+
|
|
11633
|
+
|
|
11634
|
+
def _calculate_total_days(interval_arg):
|
|
11635
|
+
"""Calculate total days from a day-time interval."""
|
|
11636
|
+
days = snowpark_fn.date_part("day", interval_arg)
|
|
11637
|
+
hours = snowpark_fn.date_part("hour", interval_arg)
|
|
11638
|
+
minutes = snowpark_fn.date_part("minute", interval_arg)
|
|
11639
|
+
seconds = snowpark_fn.date_part("second", interval_arg)
|
|
11640
|
+
# Convert hours, minutes, seconds to fractional days
|
|
11641
|
+
fractional_days = (hours * 3600 + minutes * 60 + seconds) / 86400
|
|
11642
|
+
return days + fractional_days
|
|
11643
|
+
|
|
11644
|
+
|
|
11645
|
+
def _calculate_total_seconds(interval_arg):
|
|
11646
|
+
"""Calculate total seconds from a day-time interval."""
|
|
11647
|
+
days = snowpark_fn.date_part("day", interval_arg)
|
|
11648
|
+
hours = snowpark_fn.date_part("hour", interval_arg)
|
|
11649
|
+
minutes = snowpark_fn.date_part("minute", interval_arg)
|
|
11650
|
+
seconds = snowpark_fn.date_part("second", interval_arg)
|
|
11651
|
+
return days * 86400 + hours * 3600 + minutes * 60 + seconds
|