snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +150 -25
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +48 -4
- snowflake/snowpark_connect/expression/map_extension.py +25 -5
- snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
- snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
- snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
- snowflake/snowpark_connect/relation/map_extension.py +28 -8
- snowflake/snowpark_connect/relation/map_join.py +21 -10
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +91 -24
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +131 -34
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +72 -19
- snowflake/snowpark_connect/type_mapping.py +54 -17
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +5 -1
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +78 -77
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,10 @@ import snowflake.snowpark.functions as snowpark_fn
|
|
|
13
13
|
from snowflake import snowpark
|
|
14
14
|
from snowflake.snowpark.types import IntegerType, PandasDataFrameType, StructType
|
|
15
15
|
|
|
16
|
+
# Removed error imports to avoid UDF serialization issues
|
|
17
|
+
# from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
18
|
+
# from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
def get_map_in_arrow_udtf(
|
|
18
22
|
user_function: Callable,
|
|
@@ -60,14 +64,14 @@ def get_map_in_arrow_udtf(
|
|
|
60
64
|
result_iterator, "__iter__"
|
|
61
65
|
):
|
|
62
66
|
raise RuntimeError(
|
|
63
|
-
f"snowpark_connect::
|
|
67
|
+
f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
|
|
64
68
|
f"iterator of pyarrow.RecordBatch, but is {type(result_iterator).__name__}"
|
|
65
69
|
)
|
|
66
70
|
|
|
67
71
|
for batch in result_iterator:
|
|
68
72
|
if not isinstance(batch, pa.RecordBatch):
|
|
69
73
|
raise RuntimeError(
|
|
70
|
-
f"snowpark_connect::
|
|
74
|
+
f"[snowpark_connect::type_mismatch] Return type of the user-defined function should "
|
|
71
75
|
f"be iterator of pyarrow.RecordBatch, but is iterator of {type(batch).__name__}"
|
|
72
76
|
)
|
|
73
77
|
if batch.num_rows > 0:
|
|
@@ -121,7 +125,7 @@ def create_pandas_udtf(
|
|
|
121
125
|
result_iterator, "__iter__"
|
|
122
126
|
):
|
|
123
127
|
raise RuntimeError(
|
|
124
|
-
f"snowpark_connect::
|
|
128
|
+
f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
|
|
125
129
|
f"iterator of pandas.DataFrame, but is {type(result_iterator).__name__}"
|
|
126
130
|
)
|
|
127
131
|
|
|
@@ -140,7 +144,7 @@ def create_pandas_udtf(
|
|
|
140
144
|
if column not in self.output_column_original_names
|
|
141
145
|
]
|
|
142
146
|
raise RuntimeError(
|
|
143
|
-
f"[RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
|
|
147
|
+
f"[snowpark_connect::invalid_operation] [RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
|
|
144
148
|
"."
|
|
145
149
|
)
|
|
146
150
|
reordered_df = output_df[self.output_column_original_names]
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import cProfile
|
|
12
12
|
import functools
|
|
13
|
+
import inspect
|
|
13
14
|
import os
|
|
14
15
|
from datetime import datetime
|
|
15
16
|
from typing import Any, Callable
|
|
@@ -35,13 +36,29 @@ def profile_method(method: Callable) -> Callable:
|
|
|
35
36
|
profile_filename = f"{PROFILE_OUTPUT_DIR}/{method_name}_{timestamp}.prof"
|
|
36
37
|
|
|
37
38
|
profiler = cProfile.Profile()
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
|
|
40
|
+
if inspect.isgeneratorfunction(method):
|
|
41
|
+
|
|
42
|
+
def profiled_generator():
|
|
43
|
+
profiler.enable()
|
|
44
|
+
try:
|
|
45
|
+
generator = method(*args, **kwargs)
|
|
46
|
+
for item in generator:
|
|
47
|
+
profiler.disable()
|
|
48
|
+
yield item
|
|
49
|
+
profiler.enable()
|
|
50
|
+
finally:
|
|
51
|
+
profiler.disable()
|
|
52
|
+
profiler.dump_stats(profile_filename)
|
|
53
|
+
|
|
54
|
+
return profiled_generator()
|
|
55
|
+
else:
|
|
56
|
+
profiler.enable()
|
|
57
|
+
try:
|
|
58
|
+
result = method(*args, **kwargs)
|
|
59
|
+
return result
|
|
60
|
+
finally:
|
|
61
|
+
profiler.disable()
|
|
62
|
+
profiler.dump_stats(profile_filename)
|
|
46
63
|
|
|
47
64
|
return wrapper
|
|
@@ -22,6 +22,8 @@ from typing import List, Union
|
|
|
22
22
|
|
|
23
23
|
import snowflake.snowpark.types as snowpark_type
|
|
24
24
|
import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
|
|
25
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
26
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
25
27
|
from snowflake.snowpark_connect.resources_initializer import RESOURCE_PATH
|
|
26
28
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
27
29
|
from snowflake.snowpark_connect.utils.udf_utils import (
|
|
@@ -467,7 +469,9 @@ def map_type_to_scala_type(
|
|
|
467
469
|
case snowpark_type.VariantType:
|
|
468
470
|
return "Variant"
|
|
469
471
|
case _:
|
|
470
|
-
|
|
472
|
+
exception = ValueError(f"Unsupported Snowpark type: {t}")
|
|
473
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
474
|
+
raise exception
|
|
471
475
|
|
|
472
476
|
|
|
473
477
|
def map_type_to_snowflake_type(
|
|
@@ -533,7 +537,9 @@ def map_type_to_snowflake_type(
|
|
|
533
537
|
case snowpark_type.VariantType:
|
|
534
538
|
return "VARIANT"
|
|
535
539
|
case _:
|
|
536
|
-
|
|
540
|
+
exception = ValueError(f"Unsupported Snowpark type: {t}")
|
|
541
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
542
|
+
raise exception
|
|
537
543
|
|
|
538
544
|
|
|
539
545
|
def cast_scala_map_args_from_given_type(
|
|
@@ -573,7 +579,9 @@ def cast_scala_map_args_from_given_type(
|
|
|
573
579
|
case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
|
|
574
580
|
return "java.sql.Timestamp.valueOf({arg_name})"
|
|
575
581
|
case _:
|
|
576
|
-
|
|
582
|
+
exception = ValueError(f"Unsupported Snowpark type: {t}")
|
|
583
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
584
|
+
raise exception
|
|
577
585
|
|
|
578
586
|
if (is_snowpark_type and isinstance(input_type, snowpark_type.MapType)) or (
|
|
579
587
|
not is_snowpark_type and input_type.WhichOneof("kind") == "map"
|
|
@@ -11,6 +11,8 @@ from snowflake import snowpark
|
|
|
11
11
|
from snowflake.snowpark.exceptions import SnowparkClientException
|
|
12
12
|
from snowflake.snowpark.session import _get_active_session
|
|
13
13
|
from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.utils.describe_query_cache import (
|
|
15
17
|
instrument_session_for_describe_cache,
|
|
16
18
|
)
|
|
@@ -191,7 +193,9 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
|
|
|
191
193
|
"""Sets Snowpark session query_tag value to the tag from the Spark request."""
|
|
192
194
|
|
|
193
195
|
if any("," in tag for tag in spark_tags):
|
|
194
|
-
|
|
196
|
+
exception = ValueError("Tags cannot contain ','.")
|
|
197
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
198
|
+
raise exception
|
|
195
199
|
|
|
196
200
|
# TODO: Tags might not be set correctly in parallel workloads or multi-threaded code.
|
|
197
201
|
snowpark_session = get_or_create_snowpark_session()
|
|
@@ -27,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
|
|
|
27
27
|
from snowflake.snowpark import Session
|
|
28
28
|
from snowflake.snowpark._internal.utils import get_os_name, get_python_version
|
|
29
29
|
from snowflake.snowpark.version import VERSION as snowpark_version
|
|
30
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
30
31
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
31
32
|
from snowflake.snowpark_connect.version import VERSION as sas_version
|
|
32
33
|
|
|
@@ -346,6 +347,11 @@ class Telemetry:
|
|
|
346
347
|
summary["error_message"] = str(e)
|
|
347
348
|
summary["error_type"] = type(e).__name__
|
|
348
349
|
|
|
350
|
+
if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
|
|
351
|
+
summary["error_code"] = ErrorCodes.INTERNAL_ERROR
|
|
352
|
+
else:
|
|
353
|
+
summary["error_code"] = e.custom_error_code
|
|
354
|
+
|
|
349
355
|
error_location = _error_location(e)
|
|
350
356
|
if error_location:
|
|
351
357
|
summary["error_location"] = error_location
|
|
@@ -7,6 +7,8 @@ from typing import Optional, Tuple
|
|
|
7
7
|
from pyspark.errors import AnalysisException
|
|
8
8
|
|
|
9
9
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
10
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
11
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
10
12
|
from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
11
13
|
from snowflake.snowpark_connect.utils.context import get_session_id
|
|
12
14
|
|
|
@@ -22,9 +24,11 @@ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None
|
|
|
22
24
|
_temp_views.remove(key)
|
|
23
25
|
break
|
|
24
26
|
else:
|
|
25
|
-
|
|
27
|
+
exception = AnalysisException(
|
|
26
28
|
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
|
|
27
29
|
)
|
|
30
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
31
|
+
raise exception
|
|
28
32
|
|
|
29
33
|
_temp_views[(name, current_session_id)] = df
|
|
30
34
|
|
|
@@ -118,7 +118,7 @@ def cached_udaf(
|
|
|
118
118
|
|
|
119
119
|
if class_type is None:
|
|
120
120
|
raise ValueError(
|
|
121
|
-
"Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
|
|
121
|
+
"[snowpark_connect::internal_error] Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
|
|
122
122
|
)
|
|
123
123
|
else:
|
|
124
124
|
# return udaf
|
|
@@ -379,7 +379,9 @@ def register_cached_java_udf(
|
|
|
379
379
|
|
|
380
380
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
381
381
|
if jar_path_in_zip not in zip_ref.namelist():
|
|
382
|
-
raise FileNotFoundError(
|
|
382
|
+
raise FileNotFoundError(
|
|
383
|
+
f"[snowpark_connect::invalid_input] {jar_path_in_zip} not found"
|
|
384
|
+
)
|
|
383
385
|
zip_ref.extract(jar_path_in_zip, temp_dir)
|
|
384
386
|
|
|
385
387
|
jar_path = f"{temp_dir}/{jar_path_in_zip}"
|
|
@@ -388,7 +390,7 @@ def register_cached_java_udf(
|
|
|
388
390
|
|
|
389
391
|
if upload_result[0].status != "UPLOADED":
|
|
390
392
|
raise RuntimeError(
|
|
391
|
-
f"Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
|
|
393
|
+
f"[snowpark_connect::internal_error] Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
|
|
392
394
|
)
|
|
393
395
|
|
|
394
396
|
udf_is_cached = function_name in cache
|
|
@@ -18,6 +18,8 @@ from snowflake.snowpark import Column, Session
|
|
|
18
18
|
from snowflake.snowpark.types import DataType, _parse_datatype_json_value
|
|
19
19
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
20
20
|
from snowflake.snowpark_connect.config import global_config
|
|
21
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
22
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
21
23
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
22
24
|
map_single_column_expression,
|
|
23
25
|
)
|
|
@@ -233,19 +235,25 @@ def _check_supported_udf(
|
|
|
233
235
|
|
|
234
236
|
session = get_or_create_snowpark_session()
|
|
235
237
|
if udf_proto.java_udf.class_name not in session._cached_java_udfs:
|
|
236
|
-
|
|
238
|
+
exception = AnalysisException(
|
|
237
239
|
f"Can not load class {udf_proto.java_udf.class_name}"
|
|
238
240
|
)
|
|
241
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
242
|
+
raise exception
|
|
239
243
|
else:
|
|
240
|
-
|
|
244
|
+
exception = ValueError(
|
|
241
245
|
"Function type java_udf not supported for common inline user-defined function"
|
|
242
246
|
)
|
|
247
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
248
|
+
raise exception
|
|
243
249
|
case "scalar_scala_udf":
|
|
244
250
|
pass
|
|
245
251
|
case _ as function_type:
|
|
246
|
-
|
|
252
|
+
exception = ValueError(
|
|
247
253
|
f"Function type {function_type} not supported for common inline user-defined function"
|
|
248
254
|
)
|
|
255
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
256
|
+
raise exception
|
|
249
257
|
|
|
250
258
|
|
|
251
259
|
def _aggregate_function_check(
|
|
@@ -253,9 +261,11 @@ def _aggregate_function_check(
|
|
|
253
261
|
):
|
|
254
262
|
name, is_aggregate_function = get_is_aggregate_function()
|
|
255
263
|
if not udf_proto.deterministic and name != "default" and is_aggregate_function:
|
|
256
|
-
|
|
264
|
+
exception = AnalysisException(
|
|
257
265
|
f"[AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION] Non-deterministic expression {name}({udf_proto.function_name}) should not appear in the arguments of an aggregate function."
|
|
258
266
|
)
|
|
267
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
268
|
+
raise exception
|
|
259
269
|
|
|
260
270
|
|
|
261
271
|
def _join_checks(snowpark_udf_arg_names: list[str]):
|
|
@@ -282,23 +292,27 @@ def _join_checks(snowpark_udf_arg_names: list[str]):
|
|
|
282
292
|
and is_left_evaluable
|
|
283
293
|
and is_right_evaluable
|
|
284
294
|
):
|
|
285
|
-
|
|
295
|
+
exception = AnalysisException(
|
|
286
296
|
f"Detected implicit cartesian product for {is_evaluating_join_condition[0]} join between logical plans. \n"
|
|
287
297
|
f"Join condition is missing or trivial. \n"
|
|
288
298
|
f"Either: use the CROSS JOIN syntax to allow cartesian products between those relations, or; "
|
|
289
299
|
f"enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=True."
|
|
290
300
|
)
|
|
301
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
302
|
+
raise exception
|
|
291
303
|
if (
|
|
292
304
|
is_evaluating_join_condition[0] != "INNER"
|
|
293
305
|
and is_evaluating_join_condition[1]
|
|
294
306
|
and is_left_evaluable
|
|
295
307
|
and is_right_evaluable
|
|
296
308
|
):
|
|
297
|
-
|
|
309
|
+
exception = AnalysisException(
|
|
298
310
|
f"[UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE] The feature is not supported: "
|
|
299
311
|
f"Python UDF in the ON clause of a {is_evaluating_join_condition[0]} JOIN. "
|
|
300
312
|
f"In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause."
|
|
301
313
|
)
|
|
314
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
315
|
+
raise exception
|
|
302
316
|
|
|
303
317
|
|
|
304
318
|
def infer_snowpark_arguments(
|
|
@@ -103,7 +103,7 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
103
103
|
)
|
|
104
104
|
case _:
|
|
105
105
|
raise ValueError(
|
|
106
|
-
f"Function type {self._function_type} not supported for common inline user-defined function"
|
|
106
|
+
f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
|
|
107
107
|
)
|
|
108
108
|
|
|
109
109
|
@property
|
|
@@ -112,7 +112,7 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
112
112
|
return self._snowpark_udf_args
|
|
113
113
|
else:
|
|
114
114
|
raise ValueError(
|
|
115
|
-
"Column mapping is not provided, cannot get snowpark udf args"
|
|
115
|
+
"[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf args"
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
@property
|
|
@@ -121,7 +121,7 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
121
121
|
return self._snowpark_udf_arg_names
|
|
122
122
|
else:
|
|
123
123
|
raise ValueError(
|
|
124
|
-
"Column mapping is not provided, cannot get snowpark udf arg names"
|
|
124
|
+
"[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf arg names"
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
def _create_python_udf(self):
|
|
@@ -291,5 +291,5 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
291
291
|
return create_scala_udf(self)
|
|
292
292
|
case _:
|
|
293
293
|
raise ValueError(
|
|
294
|
-
f"Function type {self._function_type} not supported for common inline user-defined function"
|
|
294
|
+
f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
|
|
295
295
|
)
|
|
@@ -16,6 +16,8 @@ import snowflake.snowpark_connect.tcm as tcm
|
|
|
16
16
|
from snowflake import snowpark
|
|
17
17
|
from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
|
|
18
18
|
from snowflake.snowpark.types import DataType, StructType, _parse_datatype_json_value
|
|
19
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
20
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
19
21
|
from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
|
|
20
22
|
from snowflake.snowpark_connect.utils import pandas_udtf_utils, udtf_utils
|
|
21
23
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
@@ -37,7 +39,9 @@ def udtf_check(
|
|
|
37
39
|
udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
|
|
38
40
|
) -> None:
|
|
39
41
|
if udtf_proto.WhichOneof("function") != "python_udtf":
|
|
40
|
-
|
|
42
|
+
exception = ValueError(f"Not python udtf {udtf_proto.function}")
|
|
43
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
44
|
+
raise exception
|
|
41
45
|
|
|
42
46
|
|
|
43
47
|
def require_creating_udtf_in_sproc(
|
|
@@ -108,7 +108,9 @@ def create_udtf(
|
|
|
108
108
|
imports=imports,
|
|
109
109
|
)
|
|
110
110
|
case _:
|
|
111
|
-
raise NotImplementedError(
|
|
111
|
+
raise NotImplementedError(
|
|
112
|
+
f"[snowpark_connect::unsupported_operation] {called_from}"
|
|
113
|
+
)
|
|
112
114
|
|
|
113
115
|
|
|
114
116
|
def artifacts_reader_wrapper(user_udtf_cls: type) -> type:
|
|
@@ -196,17 +198,19 @@ def _create_convert_table_argument_to_row():
|
|
|
196
198
|
# Named access: row["col1"], row["col2"]
|
|
197
199
|
if key in self._field_to_index:
|
|
198
200
|
return self._values[self._field_to_index[key]]
|
|
199
|
-
raise KeyError(key)
|
|
201
|
+
raise KeyError(f"[snowpark_connect::invalid_operation] {key}")
|
|
200
202
|
else:
|
|
201
|
-
raise TypeError(
|
|
203
|
+
raise TypeError(
|
|
204
|
+
f"[snowpark_connect::type_mismatch] Invalid key type: {type(key)}"
|
|
205
|
+
)
|
|
202
206
|
|
|
203
207
|
def __getattr__(self, name):
|
|
204
208
|
# Attribute access: row.col1, row.col2
|
|
205
209
|
if name.startswith("_"):
|
|
206
|
-
raise AttributeError(name)
|
|
210
|
+
raise AttributeError(f"[snowpark_connect::invalid_operation] {name}")
|
|
207
211
|
if name in self._field_to_index:
|
|
208
212
|
return self._values[self._field_to_index[name]]
|
|
209
|
-
raise AttributeError(name)
|
|
213
|
+
raise AttributeError(f"[snowpark_connect::invalid_operation] {name}")
|
|
210
214
|
|
|
211
215
|
def __len__(self):
|
|
212
216
|
return len(self._values)
|
|
@@ -280,7 +284,9 @@ def spark_compatible_udtf_wrapper(
|
|
|
280
284
|
return val
|
|
281
285
|
if isinstance(val, datetime.datetime):
|
|
282
286
|
return val.date()
|
|
283
|
-
raise AttributeError(
|
|
287
|
+
raise AttributeError(
|
|
288
|
+
f"[snowpark_connect::invalid_input] Invalid date value {val}"
|
|
289
|
+
)
|
|
284
290
|
|
|
285
291
|
def _coerce_to_binary(val: object, target_type_name: str = "byte") -> bytes | None:
|
|
286
292
|
if target_type_name == "binary":
|
|
@@ -344,7 +350,9 @@ def spark_compatible_udtf_wrapper(
|
|
|
344
350
|
def _coerce_to_timestamp(val: object) -> datetime.datetime | None:
|
|
345
351
|
if isinstance(val, datetime.datetime):
|
|
346
352
|
return val
|
|
347
|
-
raise AttributeError(
|
|
353
|
+
raise AttributeError(
|
|
354
|
+
f"[snowpark_connect::invalid_input] Invalid time stamp value {val}"
|
|
355
|
+
)
|
|
348
356
|
|
|
349
357
|
SCALAR_COERCERS = {
|
|
350
358
|
"bool": _coerce_to_bool,
|
|
@@ -448,7 +456,7 @@ def spark_compatible_udtf_wrapper(
|
|
|
448
456
|
|
|
449
457
|
if not isinstance(raw_row_tuple, (tuple, list)):
|
|
450
458
|
raise TypeError(
|
|
451
|
-
f"[UDTF_INVALID_OUTPUT_ROW_TYPE] return value should be an iterable object containing tuples, but got {type(raw_row_tuple)}"
|
|
459
|
+
f"[snowpark_connect::type_mismatch] [UDTF_INVALID_OUTPUT_ROW_TYPE] return value should be an iterable object containing tuples, but got {type(raw_row_tuple)}"
|
|
452
460
|
)
|
|
453
461
|
|
|
454
462
|
if len(raw_row_tuple) != len(expected_types):
|
|
@@ -468,7 +476,7 @@ def spark_compatible_udtf_wrapper(
|
|
|
468
476
|
and val is not None
|
|
469
477
|
):
|
|
470
478
|
raise RuntimeError(
|
|
471
|
-
f"[UNEXPECTED_TUPLE_WITH_STRUCT] Expected a struct for column at position {i}, but got a primitive value of type {type(val)}"
|
|
479
|
+
f"[snowpark_connect::type_mismatch] [UNEXPECTED_TUPLE_WITH_STRUCT] Expected a struct for column at position {i}, but got a primitive value of type {type(val)}"
|
|
472
480
|
)
|
|
473
481
|
|
|
474
482
|
coerced_row_list = [None] * len(expected_types)
|
|
@@ -534,7 +542,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
534
542
|
return pa.map_(key_type, value_type)
|
|
535
543
|
case _, _:
|
|
536
544
|
raise TypeError(
|
|
537
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported Python scalar type for Arrow conversion: {target_py_type}"
|
|
545
|
+
f"[snowpark_connect::unsupported_type] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported Python scalar type for Arrow conversion: {target_py_type}"
|
|
538
546
|
)
|
|
539
547
|
elif kind == "array":
|
|
540
548
|
element_type_info = type_marker
|
|
@@ -544,7 +552,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
544
552
|
struct_fields_info = type_marker
|
|
545
553
|
if not isinstance(struct_fields_info, dict):
|
|
546
554
|
raise TypeError(
|
|
547
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Invalid struct definition for Arrow: expected dict, got {type(struct_fields_info)}"
|
|
555
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Invalid struct definition for Arrow: expected dict, got {type(struct_fields_info)}"
|
|
548
556
|
)
|
|
549
557
|
fields = []
|
|
550
558
|
for field_name, field_type_info in struct_fields_info.items():
|
|
@@ -553,7 +561,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
553
561
|
return pa.struct(fields)
|
|
554
562
|
else:
|
|
555
563
|
raise TypeError(
|
|
556
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported data kind for Arrow conversion: {kind}"
|
|
564
|
+
f"[snowpark_connect::unsupported_type] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported data kind for Arrow conversion: {kind}"
|
|
557
565
|
)
|
|
558
566
|
|
|
559
567
|
def _convert_to_arrow_value(
|
|
@@ -577,7 +585,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
577
585
|
]
|
|
578
586
|
if not isinstance(obj, (list, tuple)):
|
|
579
587
|
raise TypeError(
|
|
580
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected list or tuple for Arrow array type, got {type(obj).__name__}"
|
|
588
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected list or tuple for Arrow array type, got {type(obj).__name__}"
|
|
581
589
|
)
|
|
582
590
|
element_type = arrow_type.value_type
|
|
583
591
|
return [_convert_to_arrow_value(e, element_type, "array") for e in obj]
|
|
@@ -585,7 +593,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
585
593
|
if pa.types.is_map(arrow_type):
|
|
586
594
|
if not isinstance(obj, dict):
|
|
587
595
|
raise TypeError(
|
|
588
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected dict for Arrow map type, got {type(obj).__name__}"
|
|
596
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected dict for Arrow map type, got {type(obj).__name__}"
|
|
589
597
|
)
|
|
590
598
|
key_type = arrow_type.key_type
|
|
591
599
|
value_type = arrow_type.item_type
|
|
@@ -611,7 +619,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
611
619
|
else:
|
|
612
620
|
# If the UDTF yields a list/tuple (or anything not a dict) for a struct column, it's an error.
|
|
613
621
|
raise TypeError(
|
|
614
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected a dictionary for Arrow struct type column, but got {type(obj).__name__}"
|
|
622
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected a dictionary for Arrow struct type column, but got {type(obj).__name__}"
|
|
615
623
|
)
|
|
616
624
|
|
|
617
625
|
# Check if a scalar type is expected and if obj is a collection; if so, error out.
|
|
@@ -623,7 +631,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
623
631
|
):
|
|
624
632
|
if isinstance(obj, (list, tuple, dict)):
|
|
625
633
|
raise TypeError(
|
|
626
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert Python collection type {type(obj).__name__} to scalar Arrow type {arrow_type}"
|
|
634
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert Python collection type {type(obj).__name__} to scalar Arrow type {arrow_type}"
|
|
627
635
|
)
|
|
628
636
|
|
|
629
637
|
if pa.types.is_boolean(arrow_type):
|
|
@@ -639,7 +647,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
639
647
|
elif obj == 1:
|
|
640
648
|
return True
|
|
641
649
|
raise TypeError(
|
|
642
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {obj} to Arrow boolean"
|
|
650
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {obj} to Arrow boolean"
|
|
643
651
|
)
|
|
644
652
|
if isinstance(obj, str):
|
|
645
653
|
v_str = obj.strip().lower()
|
|
@@ -648,7 +656,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
648
656
|
if v_str == "false":
|
|
649
657
|
return False
|
|
650
658
|
raise TypeError(
|
|
651
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow boolean"
|
|
659
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow boolean"
|
|
652
660
|
)
|
|
653
661
|
|
|
654
662
|
if pa.types.is_integer(arrow_type):
|
|
@@ -664,7 +672,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
664
672
|
except ValueError:
|
|
665
673
|
pass
|
|
666
674
|
raise TypeError(
|
|
667
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow integer"
|
|
675
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow integer"
|
|
668
676
|
)
|
|
669
677
|
|
|
670
678
|
if pa.types.is_floating(arrow_type):
|
|
@@ -676,7 +684,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
676
684
|
except ValueError:
|
|
677
685
|
pass
|
|
678
686
|
raise TypeError(
|
|
679
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow float"
|
|
687
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow float"
|
|
680
688
|
)
|
|
681
689
|
|
|
682
690
|
if pa.types.is_string(arrow_type):
|
|
@@ -688,7 +696,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
688
696
|
if isinstance(obj, str):
|
|
689
697
|
return obj
|
|
690
698
|
raise TypeError(
|
|
691
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow string"
|
|
699
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow string"
|
|
692
700
|
)
|
|
693
701
|
|
|
694
702
|
if pa.types.is_binary(arrow_type) or pa.types.is_fixed_size_binary(arrow_type):
|
|
@@ -699,21 +707,21 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
699
707
|
if isinstance(obj, int):
|
|
700
708
|
return bytearray([obj])
|
|
701
709
|
raise TypeError(
|
|
702
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow binary"
|
|
710
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow binary"
|
|
703
711
|
)
|
|
704
712
|
|
|
705
713
|
if pa.types.is_date(arrow_type):
|
|
706
714
|
if isinstance(obj, datetime.date):
|
|
707
715
|
return obj
|
|
708
716
|
raise TypeError(
|
|
709
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow date. Expected datetime.date."
|
|
717
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow date. Expected datetime.date."
|
|
710
718
|
)
|
|
711
719
|
|
|
712
720
|
if pa.types.is_timestamp(arrow_type):
|
|
713
721
|
if isinstance(obj, datetime.datetime):
|
|
714
722
|
return obj
|
|
715
723
|
raise TypeError(
|
|
716
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow timestamp. Expected datetime.datetime."
|
|
724
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow timestamp. Expected datetime.datetime."
|
|
717
725
|
)
|
|
718
726
|
|
|
719
727
|
if pa.types.is_decimal(arrow_type):
|
|
@@ -728,11 +736,11 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
728
736
|
pass
|
|
729
737
|
|
|
730
738
|
raise TypeError(
|
|
731
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow decimal. Expected decimal.Decimal or compatible int/str."
|
|
739
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow decimal. Expected decimal.Decimal or compatible int/str."
|
|
732
740
|
)
|
|
733
741
|
|
|
734
742
|
raise TypeError(
|
|
735
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported type conversion for {type(obj).__name__} to Arrow type {arrow_type}"
|
|
743
|
+
f"[snowpark_connect::unsupported_operation] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported type conversion for {type(obj).__name__} to Arrow type {arrow_type}"
|
|
736
744
|
)
|
|
737
745
|
|
|
738
746
|
class WrappedUDTF:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowpark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.0
|
|
4
4
|
Summary: Snowpark Connect for Spark
|
|
5
5
|
Author: Snowflake, Inc
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -16,7 +16,8 @@ Requires-Dist: jpype1
|
|
|
16
16
|
Requires-Dist: protobuf<5.0,>=4.25.3
|
|
17
17
|
Requires-Dist: s3fs>=2025.3.0
|
|
18
18
|
Requires-Dist: snowflake.core<2,>=1.0.5
|
|
19
|
-
Requires-Dist: snowflake-snowpark-python[pandas]<1.
|
|
19
|
+
Requires-Dist: snowflake-snowpark-python[pandas]<1.41.0,==1.40.0
|
|
20
|
+
Requires-Dist: snowflake-connector-python<4.0.0,>=3.18.0
|
|
20
21
|
Requires-Dist: sqlglot>=26.3.8
|
|
21
22
|
Requires-Dist: jaydebeapi
|
|
22
23
|
Requires-Dist: aiobotocore~=2.23.0
|