snowpark-connect 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +150 -25
- snowflake/snowpark_connect/config.py +54 -16
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +48 -4
- snowflake/snowpark_connect/expression/map_extension.py +25 -5
- snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
- snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
- snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +66 -4
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
- snowflake/snowpark_connect/relation/map_extension.py +28 -8
- snowflake/snowpark_connect/relation/map_join.py +21 -10
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +36 -9
- snowflake/snowpark_connect/relation/map_sql.py +91 -24
- snowflake/snowpark_connect/relation/map_stats.py +25 -6
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +24 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/utils.py +19 -2
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +146 -63
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +72 -19
- snowflake/snowpark_connect/type_mapping.py +54 -17
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +5 -2
- snowflake/snowpark_connect/utils/telemetry.py +81 -18
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +81 -78
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.0.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.0.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
|
@@ -13,6 +13,10 @@ import snowflake.snowpark.functions as snowpark_fn
|
|
|
13
13
|
from snowflake import snowpark
|
|
14
14
|
from snowflake.snowpark.types import IntegerType, PandasDataFrameType, StructType
|
|
15
15
|
|
|
16
|
+
# Removed error imports to avoid UDF serialization issues
|
|
17
|
+
# from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
18
|
+
# from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
def get_map_in_arrow_udtf(
|
|
18
22
|
user_function: Callable,
|
|
@@ -60,14 +64,14 @@ def get_map_in_arrow_udtf(
|
|
|
60
64
|
result_iterator, "__iter__"
|
|
61
65
|
):
|
|
62
66
|
raise RuntimeError(
|
|
63
|
-
f"snowpark_connect::
|
|
67
|
+
f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
|
|
64
68
|
f"iterator of pyarrow.RecordBatch, but is {type(result_iterator).__name__}"
|
|
65
69
|
)
|
|
66
70
|
|
|
67
71
|
for batch in result_iterator:
|
|
68
72
|
if not isinstance(batch, pa.RecordBatch):
|
|
69
73
|
raise RuntimeError(
|
|
70
|
-
f"snowpark_connect::
|
|
74
|
+
f"[snowpark_connect::type_mismatch] Return type of the user-defined function should "
|
|
71
75
|
f"be iterator of pyarrow.RecordBatch, but is iterator of {type(batch).__name__}"
|
|
72
76
|
)
|
|
73
77
|
if batch.num_rows > 0:
|
|
@@ -121,7 +125,7 @@ def create_pandas_udtf(
|
|
|
121
125
|
result_iterator, "__iter__"
|
|
122
126
|
):
|
|
123
127
|
raise RuntimeError(
|
|
124
|
-
f"snowpark_connect::
|
|
128
|
+
f"[snowpark_connect::type_mismatch] Return type of the user-defined function should be "
|
|
125
129
|
f"iterator of pandas.DataFrame, but is {type(result_iterator).__name__}"
|
|
126
130
|
)
|
|
127
131
|
|
|
@@ -140,7 +144,7 @@ def create_pandas_udtf(
|
|
|
140
144
|
if column not in self.output_column_original_names
|
|
141
145
|
]
|
|
142
146
|
raise RuntimeError(
|
|
143
|
-
f"[RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
|
|
147
|
+
f"[snowpark_connect::invalid_operation] [RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF] Column names of the returned pandas.DataFrame do not match specified schema. Missing: {', '.join(sorted(missing_columns))}. Unexpected: {', '.join(sorted(unexpected_columns))}"
|
|
144
148
|
"."
|
|
145
149
|
)
|
|
146
150
|
reordered_df = output_df[self.output_column_original_names]
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import cProfile
|
|
12
12
|
import functools
|
|
13
|
+
import inspect
|
|
13
14
|
import os
|
|
14
15
|
from datetime import datetime
|
|
15
16
|
from typing import Any, Callable
|
|
@@ -35,13 +36,29 @@ def profile_method(method: Callable) -> Callable:
|
|
|
35
36
|
profile_filename = f"{PROFILE_OUTPUT_DIR}/{method_name}_{timestamp}.prof"
|
|
36
37
|
|
|
37
38
|
profiler = cProfile.Profile()
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
|
|
40
|
+
if inspect.isgeneratorfunction(method):
|
|
41
|
+
|
|
42
|
+
def profiled_generator():
|
|
43
|
+
profiler.enable()
|
|
44
|
+
try:
|
|
45
|
+
generator = method(*args, **kwargs)
|
|
46
|
+
for item in generator:
|
|
47
|
+
profiler.disable()
|
|
48
|
+
yield item
|
|
49
|
+
profiler.enable()
|
|
50
|
+
finally:
|
|
51
|
+
profiler.disable()
|
|
52
|
+
profiler.dump_stats(profile_filename)
|
|
53
|
+
|
|
54
|
+
return profiled_generator()
|
|
55
|
+
else:
|
|
56
|
+
profiler.enable()
|
|
57
|
+
try:
|
|
58
|
+
result = method(*args, **kwargs)
|
|
59
|
+
return result
|
|
60
|
+
finally:
|
|
61
|
+
profiler.disable()
|
|
62
|
+
profiler.dump_stats(profile_filename)
|
|
46
63
|
|
|
47
64
|
return wrapper
|
|
@@ -22,6 +22,8 @@ from typing import List, Union
|
|
|
22
22
|
|
|
23
23
|
import snowflake.snowpark.types as snowpark_type
|
|
24
24
|
import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
|
|
25
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
26
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
25
27
|
from snowflake.snowpark_connect.resources_initializer import RESOURCE_PATH
|
|
26
28
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
27
29
|
from snowflake.snowpark_connect.utils.udf_utils import (
|
|
@@ -467,7 +469,9 @@ def map_type_to_scala_type(
|
|
|
467
469
|
case snowpark_type.VariantType:
|
|
468
470
|
return "Variant"
|
|
469
471
|
case _:
|
|
470
|
-
|
|
472
|
+
exception = ValueError(f"Unsupported Snowpark type: {t}")
|
|
473
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
474
|
+
raise exception
|
|
471
475
|
|
|
472
476
|
|
|
473
477
|
def map_type_to_snowflake_type(
|
|
@@ -533,7 +537,9 @@ def map_type_to_snowflake_type(
|
|
|
533
537
|
case snowpark_type.VariantType:
|
|
534
538
|
return "VARIANT"
|
|
535
539
|
case _:
|
|
536
|
-
|
|
540
|
+
exception = ValueError(f"Unsupported Snowpark type: {t}")
|
|
541
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
542
|
+
raise exception
|
|
537
543
|
|
|
538
544
|
|
|
539
545
|
def cast_scala_map_args_from_given_type(
|
|
@@ -573,7 +579,9 @@ def cast_scala_map_args_from_given_type(
|
|
|
573
579
|
case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
|
|
574
580
|
return "java.sql.Timestamp.valueOf({arg_name})"
|
|
575
581
|
case _:
|
|
576
|
-
|
|
582
|
+
exception = ValueError(f"Unsupported Snowpark type: {t}")
|
|
583
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
584
|
+
raise exception
|
|
577
585
|
|
|
578
586
|
if (is_snowpark_type and isinstance(input_type, snowpark_type.MapType)) or (
|
|
579
587
|
not is_snowpark_type and input_type.WhichOneof("kind") == "map"
|
|
@@ -11,6 +11,8 @@ from snowflake import snowpark
|
|
|
11
11
|
from snowflake.snowpark.exceptions import SnowparkClientException
|
|
12
12
|
from snowflake.snowpark.session import _get_active_session
|
|
13
13
|
from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.utils.describe_query_cache import (
|
|
15
17
|
instrument_session_for_describe_cache,
|
|
16
18
|
)
|
|
@@ -120,7 +122,6 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
120
122
|
"PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false", # this is required for creating udfs from sproc
|
|
121
123
|
"ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
|
|
122
124
|
"QUERY_TAG": f"'{query_tag}'",
|
|
123
|
-
"FEATURE_INTERVAL_TYPES": "enabled",
|
|
124
125
|
}
|
|
125
126
|
|
|
126
127
|
session.sql(
|
|
@@ -192,7 +193,9 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
|
|
|
192
193
|
"""Sets Snowpark session query_tag value to the tag from the Spark request."""
|
|
193
194
|
|
|
194
195
|
if any("," in tag for tag in spark_tags):
|
|
195
|
-
|
|
196
|
+
exception = ValueError("Tags cannot contain ','.")
|
|
197
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
198
|
+
raise exception
|
|
196
199
|
|
|
197
200
|
# TODO: Tags might not be set correctly in parallel workloads or multi-threaded code.
|
|
198
201
|
snowpark_session = get_or_create_snowpark_session()
|
|
@@ -11,8 +11,8 @@ from abc import ABC, abstractmethod
|
|
|
11
11
|
from collections import defaultdict
|
|
12
12
|
from collections.abc import Iterable
|
|
13
13
|
from contextvars import ContextVar
|
|
14
|
+
from dataclasses import dataclass
|
|
14
15
|
from enum import Enum, unique
|
|
15
|
-
from typing import Dict
|
|
16
16
|
|
|
17
17
|
import google.protobuf.message
|
|
18
18
|
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
@@ -27,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
|
|
|
27
27
|
from snowflake.snowpark import Session
|
|
28
28
|
from snowflake.snowpark._internal.utils import get_os_name, get_python_version
|
|
29
29
|
from snowflake.snowpark.version import VERSION as snowpark_version
|
|
30
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
30
31
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
31
32
|
from snowflake.snowpark_connect.version import VERSION as sas_version
|
|
32
33
|
|
|
@@ -57,6 +58,7 @@ class TelemetryType(Enum):
|
|
|
57
58
|
|
|
58
59
|
class EventType(Enum):
|
|
59
60
|
SERVER_STARTED = "scos_server_started"
|
|
61
|
+
WARNING = "scos_warning"
|
|
60
62
|
|
|
61
63
|
|
|
62
64
|
# global labels
|
|
@@ -106,7 +108,16 @@ REDACTED_PLAN_SUFFIXES = [
|
|
|
106
108
|
]
|
|
107
109
|
|
|
108
110
|
|
|
109
|
-
|
|
111
|
+
@dataclass
|
|
112
|
+
class TelemetryMessage:
|
|
113
|
+
"""Container for telemetry messages in the processing queue."""
|
|
114
|
+
|
|
115
|
+
message: dict
|
|
116
|
+
timestamp: int
|
|
117
|
+
is_warning: bool
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _basic_telemetry_data() -> dict:
|
|
110
121
|
return {
|
|
111
122
|
**STATIC_TELEMETRY_DATA,
|
|
112
123
|
TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
|
|
@@ -123,9 +134,11 @@ def safe(func):
|
|
|
123
134
|
def wrap(*args, **kwargs):
|
|
124
135
|
try:
|
|
125
136
|
func(*args, **kwargs)
|
|
126
|
-
except Exception:
|
|
127
|
-
#
|
|
128
|
-
|
|
137
|
+
except Exception as e:
|
|
138
|
+
# report failed operation to telemetry
|
|
139
|
+
telemetry.send_warning_msg(
|
|
140
|
+
f"Telemetry operation {func} failed due to exception", e
|
|
141
|
+
)
|
|
129
142
|
|
|
130
143
|
return wrap
|
|
131
144
|
|
|
@@ -334,6 +347,11 @@ class Telemetry:
|
|
|
334
347
|
summary["error_message"] = str(e)
|
|
335
348
|
summary["error_type"] = type(e).__name__
|
|
336
349
|
|
|
350
|
+
if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
|
|
351
|
+
summary["error_code"] = ErrorCodes.INTERNAL_ERROR
|
|
352
|
+
else:
|
|
353
|
+
summary["error_code"] = e.custom_error_code
|
|
354
|
+
|
|
337
355
|
error_location = _error_location(e)
|
|
338
356
|
if error_location:
|
|
339
357
|
summary["error_location"] = error_location
|
|
@@ -528,8 +546,8 @@ class Telemetry:
|
|
|
528
546
|
@safe
|
|
529
547
|
def send_request_summary_telemetry(self):
|
|
530
548
|
if self._not_in_request():
|
|
531
|
-
|
|
532
|
-
"
|
|
549
|
+
self.send_warning_msg(
|
|
550
|
+
"Trying to send request summary telemetry without initializing it"
|
|
533
551
|
)
|
|
534
552
|
return
|
|
535
553
|
|
|
@@ -541,14 +559,56 @@ class Telemetry:
|
|
|
541
559
|
}
|
|
542
560
|
self._send(message)
|
|
543
561
|
|
|
544
|
-
def
|
|
562
|
+
def send_warning_msg(self, msg: str, e: Exception = None) -> None:
|
|
563
|
+
# using this within @safe decorator may result in recursive loop
|
|
564
|
+
try:
|
|
565
|
+
message = self._build_warning_message(msg, e)
|
|
566
|
+
if not message:
|
|
567
|
+
return
|
|
568
|
+
|
|
569
|
+
self._send(message, is_warning=True)
|
|
570
|
+
except Exception:
|
|
571
|
+
# if there's an exception here, there's nothing we can really do about it
|
|
572
|
+
pass
|
|
573
|
+
|
|
574
|
+
def _build_warning_message(self, warning_msg: str, e: Exception = None) -> dict:
|
|
575
|
+
try:
|
|
576
|
+
data = {"warning_message": warning_msg}
|
|
577
|
+
if e is not None:
|
|
578
|
+
data["exception"] = repr(e)
|
|
579
|
+
|
|
580
|
+
# add session and operation id if available
|
|
581
|
+
spark_session_id = self._request_summary.get().get("spark_session_id", None)
|
|
582
|
+
if spark_session_id is not None:
|
|
583
|
+
data["spark_session_id"] = spark_session_id
|
|
584
|
+
|
|
585
|
+
spark_operation_id = self._request_summary.get().get(
|
|
586
|
+
"spark_operation_id", None
|
|
587
|
+
)
|
|
588
|
+
if spark_operation_id is not None:
|
|
589
|
+
data["spark_operation_id"] = spark_operation_id
|
|
590
|
+
|
|
591
|
+
message = {
|
|
592
|
+
**_basic_telemetry_data(),
|
|
593
|
+
TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
|
|
594
|
+
TelemetryType.EVENT_TYPE.value: EventType.WARNING.value,
|
|
595
|
+
TelemetryField.KEY_DATA.value: data,
|
|
596
|
+
}
|
|
597
|
+
return message
|
|
598
|
+
except Exception:
|
|
599
|
+
return {}
|
|
600
|
+
|
|
601
|
+
def _send(self, msg: dict, is_warning: bool = False) -> None:
|
|
545
602
|
"""Queue a telemetry message for asynchronous processing."""
|
|
546
603
|
if not self._is_enabled:
|
|
547
604
|
return
|
|
548
605
|
|
|
549
606
|
timestamp = get_time_millis()
|
|
550
607
|
try:
|
|
551
|
-
|
|
608
|
+
telemetry_msg = TelemetryMessage(
|
|
609
|
+
message=msg, timestamp=timestamp, is_warning=is_warning
|
|
610
|
+
)
|
|
611
|
+
self._message_queue.put_nowait(telemetry_msg)
|
|
552
612
|
except queue.Full:
|
|
553
613
|
# If queue is full, drop the message to avoid blocking
|
|
554
614
|
logger.warning("Telemetry queue is full, dropping message")
|
|
@@ -566,13 +626,16 @@ class Telemetry:
|
|
|
566
626
|
while True:
|
|
567
627
|
try:
|
|
568
628
|
# block to allow the GIL to switch threads
|
|
569
|
-
|
|
570
|
-
if
|
|
571
|
-
# shutdown
|
|
629
|
+
telemetry_msg = self._message_queue.get()
|
|
630
|
+
if telemetry_msg is None:
|
|
631
|
+
# shutdown signal
|
|
572
632
|
break
|
|
573
|
-
self._sink.add_telemetry_data(
|
|
574
|
-
|
|
575
|
-
|
|
633
|
+
self._sink.add_telemetry_data(
|
|
634
|
+
telemetry_msg.message, telemetry_msg.timestamp
|
|
635
|
+
)
|
|
636
|
+
except Exception as e:
|
|
637
|
+
if not telemetry_msg.is_warning:
|
|
638
|
+
self.send_warning_msg("Failed to add telemetry message to sink", e)
|
|
576
639
|
finally:
|
|
577
640
|
self._message_queue.task_done()
|
|
578
641
|
|
|
@@ -585,7 +648,7 @@ class Telemetry:
|
|
|
585
648
|
return
|
|
586
649
|
|
|
587
650
|
try:
|
|
588
|
-
self._message_queue.put_nowait(
|
|
651
|
+
self._message_queue.put_nowait(None)
|
|
589
652
|
# Wait for worker thread to finish
|
|
590
653
|
self._worker_thread.join(timeout=3.0)
|
|
591
654
|
except Exception:
|
|
@@ -594,7 +657,7 @@ class Telemetry:
|
|
|
594
657
|
)
|
|
595
658
|
|
|
596
659
|
|
|
597
|
-
def _error_location(e: Exception) ->
|
|
660
|
+
def _error_location(e: Exception) -> dict | None:
|
|
598
661
|
"""
|
|
599
662
|
Inspect the exception traceback and extract the file name, line number, and function name
|
|
600
663
|
from the last frame (the one that raised the exception).
|
|
@@ -675,7 +738,7 @@ def _protobuf_to_json_with_redaction(
|
|
|
675
738
|
"""Recursively convert protobuf message to dict"""
|
|
676
739
|
|
|
677
740
|
if not isinstance(msg, google.protobuf.message.Message):
|
|
678
|
-
|
|
741
|
+
telemetry.send_warning_msg(f"Expected a protobuf message, got: {type(msg)}")
|
|
679
742
|
return {}
|
|
680
743
|
|
|
681
744
|
result = {}
|
|
@@ -7,6 +7,8 @@ from typing import Optional, Tuple
|
|
|
7
7
|
from pyspark.errors import AnalysisException
|
|
8
8
|
|
|
9
9
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
10
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
11
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
10
12
|
from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
11
13
|
from snowflake.snowpark_connect.utils.context import get_session_id
|
|
12
14
|
|
|
@@ -22,9 +24,11 @@ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None
|
|
|
22
24
|
_temp_views.remove(key)
|
|
23
25
|
break
|
|
24
26
|
else:
|
|
25
|
-
|
|
27
|
+
exception = AnalysisException(
|
|
26
28
|
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
|
|
27
29
|
)
|
|
30
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
31
|
+
raise exception
|
|
28
32
|
|
|
29
33
|
_temp_views[(name, current_session_id)] = df
|
|
30
34
|
|
|
@@ -118,7 +118,7 @@ def cached_udaf(
|
|
|
118
118
|
|
|
119
119
|
if class_type is None:
|
|
120
120
|
raise ValueError(
|
|
121
|
-
"Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
|
|
121
|
+
"[snowpark_connect::internal_error] Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
|
|
122
122
|
)
|
|
123
123
|
else:
|
|
124
124
|
# return udaf
|
|
@@ -379,7 +379,9 @@ def register_cached_java_udf(
|
|
|
379
379
|
|
|
380
380
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
381
381
|
if jar_path_in_zip not in zip_ref.namelist():
|
|
382
|
-
raise FileNotFoundError(
|
|
382
|
+
raise FileNotFoundError(
|
|
383
|
+
f"[snowpark_connect::invalid_input] {jar_path_in_zip} not found"
|
|
384
|
+
)
|
|
383
385
|
zip_ref.extract(jar_path_in_zip, temp_dir)
|
|
384
386
|
|
|
385
387
|
jar_path = f"{temp_dir}/{jar_path_in_zip}"
|
|
@@ -388,7 +390,7 @@ def register_cached_java_udf(
|
|
|
388
390
|
|
|
389
391
|
if upload_result[0].status != "UPLOADED":
|
|
390
392
|
raise RuntimeError(
|
|
391
|
-
f"Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
|
|
393
|
+
f"[snowpark_connect::internal_error] Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
|
|
392
394
|
)
|
|
393
395
|
|
|
394
396
|
udf_is_cached = function_name in cache
|
|
@@ -18,6 +18,8 @@ from snowflake.snowpark import Column, Session
|
|
|
18
18
|
from snowflake.snowpark.types import DataType, _parse_datatype_json_value
|
|
19
19
|
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
|
|
20
20
|
from snowflake.snowpark_connect.config import global_config
|
|
21
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
22
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
21
23
|
from snowflake.snowpark_connect.expression.map_expression import (
|
|
22
24
|
map_single_column_expression,
|
|
23
25
|
)
|
|
@@ -233,19 +235,25 @@ def _check_supported_udf(
|
|
|
233
235
|
|
|
234
236
|
session = get_or_create_snowpark_session()
|
|
235
237
|
if udf_proto.java_udf.class_name not in session._cached_java_udfs:
|
|
236
|
-
|
|
238
|
+
exception = AnalysisException(
|
|
237
239
|
f"Can not load class {udf_proto.java_udf.class_name}"
|
|
238
240
|
)
|
|
241
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
242
|
+
raise exception
|
|
239
243
|
else:
|
|
240
|
-
|
|
244
|
+
exception = ValueError(
|
|
241
245
|
"Function type java_udf not supported for common inline user-defined function"
|
|
242
246
|
)
|
|
247
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
248
|
+
raise exception
|
|
243
249
|
case "scalar_scala_udf":
|
|
244
250
|
pass
|
|
245
251
|
case _ as function_type:
|
|
246
|
-
|
|
252
|
+
exception = ValueError(
|
|
247
253
|
f"Function type {function_type} not supported for common inline user-defined function"
|
|
248
254
|
)
|
|
255
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
256
|
+
raise exception
|
|
249
257
|
|
|
250
258
|
|
|
251
259
|
def _aggregate_function_check(
|
|
@@ -253,9 +261,11 @@ def _aggregate_function_check(
|
|
|
253
261
|
):
|
|
254
262
|
name, is_aggregate_function = get_is_aggregate_function()
|
|
255
263
|
if not udf_proto.deterministic and name != "default" and is_aggregate_function:
|
|
256
|
-
|
|
264
|
+
exception = AnalysisException(
|
|
257
265
|
f"[AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION] Non-deterministic expression {name}({udf_proto.function_name}) should not appear in the arguments of an aggregate function."
|
|
258
266
|
)
|
|
267
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
268
|
+
raise exception
|
|
259
269
|
|
|
260
270
|
|
|
261
271
|
def _join_checks(snowpark_udf_arg_names: list[str]):
|
|
@@ -282,23 +292,27 @@ def _join_checks(snowpark_udf_arg_names: list[str]):
|
|
|
282
292
|
and is_left_evaluable
|
|
283
293
|
and is_right_evaluable
|
|
284
294
|
):
|
|
285
|
-
|
|
295
|
+
exception = AnalysisException(
|
|
286
296
|
f"Detected implicit cartesian product for {is_evaluating_join_condition[0]} join between logical plans. \n"
|
|
287
297
|
f"Join condition is missing or trivial. \n"
|
|
288
298
|
f"Either: use the CROSS JOIN syntax to allow cartesian products between those relations, or; "
|
|
289
299
|
f"enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=True."
|
|
290
300
|
)
|
|
301
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
302
|
+
raise exception
|
|
291
303
|
if (
|
|
292
304
|
is_evaluating_join_condition[0] != "INNER"
|
|
293
305
|
and is_evaluating_join_condition[1]
|
|
294
306
|
and is_left_evaluable
|
|
295
307
|
and is_right_evaluable
|
|
296
308
|
):
|
|
297
|
-
|
|
309
|
+
exception = AnalysisException(
|
|
298
310
|
f"[UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE] The feature is not supported: "
|
|
299
311
|
f"Python UDF in the ON clause of a {is_evaluating_join_condition[0]} JOIN. "
|
|
300
312
|
f"In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause."
|
|
301
313
|
)
|
|
314
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
315
|
+
raise exception
|
|
302
316
|
|
|
303
317
|
|
|
304
318
|
def infer_snowpark_arguments(
|
|
@@ -103,7 +103,7 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
103
103
|
)
|
|
104
104
|
case _:
|
|
105
105
|
raise ValueError(
|
|
106
|
-
f"Function type {self._function_type} not supported for common inline user-defined function"
|
|
106
|
+
f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
|
|
107
107
|
)
|
|
108
108
|
|
|
109
109
|
@property
|
|
@@ -112,7 +112,7 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
112
112
|
return self._snowpark_udf_args
|
|
113
113
|
else:
|
|
114
114
|
raise ValueError(
|
|
115
|
-
"Column mapping is not provided, cannot get snowpark udf args"
|
|
115
|
+
"[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf args"
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
@property
|
|
@@ -121,7 +121,7 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
121
121
|
return self._snowpark_udf_arg_names
|
|
122
122
|
else:
|
|
123
123
|
raise ValueError(
|
|
124
|
-
"Column mapping is not provided, cannot get snowpark udf arg names"
|
|
124
|
+
"[snowpark_connect::internal_error] Column mapping is not provided, cannot get snowpark udf arg names"
|
|
125
125
|
)
|
|
126
126
|
|
|
127
127
|
def _create_python_udf(self):
|
|
@@ -291,5 +291,5 @@ class ProcessCommonInlineUserDefinedFunction:
|
|
|
291
291
|
return create_scala_udf(self)
|
|
292
292
|
case _:
|
|
293
293
|
raise ValueError(
|
|
294
|
-
f"Function type {self._function_type} not supported for common inline user-defined function"
|
|
294
|
+
f"[snowpark_connect::unsupported_operation] Function type {self._function_type} not supported for common inline user-defined function"
|
|
295
295
|
)
|
|
@@ -16,6 +16,8 @@ import snowflake.snowpark_connect.tcm as tcm
|
|
|
16
16
|
from snowflake import snowpark
|
|
17
17
|
from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
|
|
18
18
|
from snowflake.snowpark.types import DataType, StructType, _parse_datatype_json_value
|
|
19
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
20
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
19
21
|
from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
|
|
20
22
|
from snowflake.snowpark_connect.utils import pandas_udtf_utils, udtf_utils
|
|
21
23
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
@@ -37,7 +39,9 @@ def udtf_check(
|
|
|
37
39
|
udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
|
|
38
40
|
) -> None:
|
|
39
41
|
if udtf_proto.WhichOneof("function") != "python_udtf":
|
|
40
|
-
|
|
42
|
+
exception = ValueError(f"Not python udtf {udtf_proto.function}")
|
|
43
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
44
|
+
raise exception
|
|
41
45
|
|
|
42
46
|
|
|
43
47
|
def require_creating_udtf_in_sproc(
|