snowpark-connect 0.21.0__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/config.py +19 -3
- snowflake/snowpark_connect/error/error_utils.py +25 -0
- snowflake/snowpark_connect/expression/map_udf.py +4 -4
- snowflake/snowpark_connect/expression/map_unresolved_function.py +203 -128
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/map_aggregate.py +102 -18
- snowflake/snowpark_connect/relation/map_column_ops.py +21 -2
- snowflake/snowpark_connect/relation/map_map_partitions.py +3 -1
- snowflake/snowpark_connect/relation/map_sql.py +18 -191
- snowflake/snowpark_connect/relation/map_udtf.py +4 -4
- snowflake/snowpark_connect/relation/read/map_read_json.py +12 -1
- snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
- snowflake/snowpark_connect/relation/write/map_write.py +68 -24
- snowflake/snowpark_connect/server.py +9 -0
- snowflake/snowpark_connect/type_mapping.py +4 -0
- snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
- snowflake/snowpark_connect/utils/session.py +0 -4
- snowflake/snowpark_connect/utils/telemetry.py +213 -61
- snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/__init__.py +0 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
- snowflake/snowpark_decoder/dp_session.py +111 -0
- snowflake/snowpark_decoder/spark_decoder.py +76 -0
- {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +2 -2
- {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +40 -29
- {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
- spark/__init__.py +0 -0
- spark/connect/__init__.py +0 -0
- spark/connect/envelope_pb2.py +31 -0
- spark/connect/envelope_pb2.pyi +46 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- {snowpark_connect-0.21.0.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.21.0.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.21.0.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.21.0.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/NOTICE-binary +0 -0
|
@@ -168,6 +168,9 @@ class GlobalConfig:
|
|
|
168
168
|
"snowpark.connect.udf.packages": lambda session, packages: session.add_packages(
|
|
169
169
|
*packages.strip("[] ").split(",")
|
|
170
170
|
),
|
|
171
|
+
"snowpark.connect.udf.imports": lambda session, imports: parse_imports(
|
|
172
|
+
session, imports
|
|
173
|
+
),
|
|
171
174
|
}
|
|
172
175
|
|
|
173
176
|
float_config_list = []
|
|
@@ -332,7 +335,7 @@ def route_config_proto(
|
|
|
332
335
|
match op_type:
|
|
333
336
|
case "set":
|
|
334
337
|
logger.info("SET")
|
|
335
|
-
|
|
338
|
+
telemetry.report_config_set(config.operation.set.pairs)
|
|
336
339
|
for pair in config.operation.set.pairs:
|
|
337
340
|
# Check if the value field is present, not present when invalid fields are set in conf.
|
|
338
341
|
if not pair.HasField("value"):
|
|
@@ -342,7 +345,6 @@ def route_config_proto(
|
|
|
342
345
|
f"Cannot set config '{pair.key}' to None"
|
|
343
346
|
)
|
|
344
347
|
|
|
345
|
-
telemetry.report_config_set(pair.key, pair.value)
|
|
346
348
|
set_config_param(
|
|
347
349
|
config.session_id, pair.key, pair.value, snowpark_session
|
|
348
350
|
)
|
|
@@ -350,14 +352,15 @@ def route_config_proto(
|
|
|
350
352
|
return proto_base.ConfigResponse(session_id=config.session_id)
|
|
351
353
|
case "unset":
|
|
352
354
|
logger.info("UNSET")
|
|
355
|
+
telemetry.report_config_unset(config.operation.unset.keys)
|
|
353
356
|
for key in config.operation.unset.keys:
|
|
354
|
-
telemetry.report_config_unset(key)
|
|
355
357
|
unset_config_param(config.session_id, key, snowpark_session)
|
|
356
358
|
|
|
357
359
|
return proto_base.ConfigResponse(session_id=config.session_id)
|
|
358
360
|
case "get":
|
|
359
361
|
logger.info("GET")
|
|
360
362
|
res = proto_base.ConfigResponse(session_id=config.session_id)
|
|
363
|
+
telemetry.report_config_get(config.operation.get.keys)
|
|
361
364
|
for key in config.operation.get.keys:
|
|
362
365
|
pair = res.pairs.add()
|
|
363
366
|
pair.key = key
|
|
@@ -367,6 +370,9 @@ def route_config_proto(
|
|
|
367
370
|
return res
|
|
368
371
|
case "get_with_default":
|
|
369
372
|
logger.info("GET_WITH_DEFAULT")
|
|
373
|
+
telemetry.report_config_get(
|
|
374
|
+
[pair.key for pair in config.operation.get_with_default.pairs]
|
|
375
|
+
)
|
|
370
376
|
result_pairs = [
|
|
371
377
|
proto_base.KeyValue(
|
|
372
378
|
key=pair.key,
|
|
@@ -383,6 +389,7 @@ def route_config_proto(
|
|
|
383
389
|
case "get_option":
|
|
384
390
|
logger.info("GET_OPTION")
|
|
385
391
|
res = proto_base.ConfigResponse(session_id=config.session_id)
|
|
392
|
+
telemetry.report_config_get(config.operation.get_option.keys)
|
|
386
393
|
for key in config.operation.get_option.keys:
|
|
387
394
|
pair = res.pairs.add()
|
|
388
395
|
pair.key = key
|
|
@@ -411,6 +418,7 @@ def route_config_proto(
|
|
|
411
418
|
case "is_modifiable":
|
|
412
419
|
logger.info("IS_MODIFIABLE")
|
|
413
420
|
res = proto_base.ConfigResponse(session_id=config.session_id)
|
|
421
|
+
telemetry.report_config_get(config.operation.is_modifiable.keys)
|
|
414
422
|
for key in config.operation.is_modifiable.keys:
|
|
415
423
|
pair = res.pairs.add()
|
|
416
424
|
pair.key = key
|
|
@@ -588,3 +596,11 @@ def auto_uppercase_non_column_identifiers() -> bool:
|
|
|
588
596
|
return session_config[
|
|
589
597
|
"snowpark.connect.sql.identifiers.auto-uppercase"
|
|
590
598
|
].lower() in ("all", "all_except_columns")
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def parse_imports(session: snowpark.Session, imports: str | None) -> None:
|
|
602
|
+
if not imports:
|
|
603
|
+
return
|
|
604
|
+
|
|
605
|
+
for udf_import in imports.strip("[] ").split(","):
|
|
606
|
+
session.add_import(udf_import)
|
|
@@ -28,7 +28,9 @@ from pyspark.errors.exceptions.base import (
|
|
|
28
28
|
PySparkException,
|
|
29
29
|
PythonException,
|
|
30
30
|
SparkRuntimeException,
|
|
31
|
+
UnsupportedOperationException,
|
|
31
32
|
)
|
|
33
|
+
from pyspark.errors.exceptions.connect import SparkConnectGrpcException
|
|
32
34
|
from snowflake.core.exceptions import NotFoundError
|
|
33
35
|
|
|
34
36
|
from snowflake.connector.errors import ProgrammingError
|
|
@@ -49,7 +51,9 @@ SPARK_PYTHON_TO_JAVA_EXCEPTION = {
|
|
|
49
51
|
ArrayIndexOutOfBoundsException: "java.lang.ArrayIndexOutOfBoundsException",
|
|
50
52
|
NumberFormatException: "java.lang.NumberFormatException",
|
|
51
53
|
SparkRuntimeException: "org.apache.spark.SparkRuntimeException",
|
|
54
|
+
SparkConnectGrpcException: "pyspark.errors.exceptions.connect.SparkConnectGrpcException",
|
|
52
55
|
PythonException: "org.apache.spark.api.python.PythonException",
|
|
56
|
+
UnsupportedOperationException: "java.lang.UnsupportedOperationException",
|
|
53
57
|
}
|
|
54
58
|
|
|
55
59
|
WINDOW_FUNCTION_ANALYSIS_EXCEPTION_SQL_ERROR_CODE = {1005, 2303}
|
|
@@ -68,6 +72,9 @@ init_multi_args_exception_pattern = (
|
|
|
68
72
|
terminate_multi_args_exception_pattern = (
|
|
69
73
|
r"terminate\(\) missing \d+ required positional argument"
|
|
70
74
|
)
|
|
75
|
+
snowpark_connect_exception_pattern = re.compile(
|
|
76
|
+
r"\[snowpark-connect-exception(?::(\w+))?\]\s*(.+?)'\s*is not recognized"
|
|
77
|
+
)
|
|
71
78
|
|
|
72
79
|
|
|
73
80
|
def contains_udtf_select(sql_string):
|
|
@@ -100,6 +107,19 @@ def _get_converted_known_sql_or_custom_exception(
|
|
|
100
107
|
return SparkRuntimeException(
|
|
101
108
|
message="Unexpected value for start in function slice: SQL array indices start at 1."
|
|
102
109
|
)
|
|
110
|
+
match = snowpark_connect_exception_pattern.search(
|
|
111
|
+
ex.message if hasattr(ex, "message") else str(ex)
|
|
112
|
+
)
|
|
113
|
+
if match:
|
|
114
|
+
class_name = match.group(1)
|
|
115
|
+
message = match.group(2)
|
|
116
|
+
exception_class = (
|
|
117
|
+
globals().get(class_name, SparkConnectGrpcException)
|
|
118
|
+
if class_name
|
|
119
|
+
else SparkConnectGrpcException
|
|
120
|
+
)
|
|
121
|
+
return exception_class(message=message)
|
|
122
|
+
|
|
103
123
|
if "select with no columns" in msg and contains_udtf_select(query):
|
|
104
124
|
# We try our best to detect if the SQL string contains a UDTF call and the output schema is empty.
|
|
105
125
|
return PythonException(message=f"[UDTF_RETURN_SCHEMA_MISMATCH] {ex.message}")
|
|
@@ -131,6 +151,11 @@ def _get_converted_known_sql_or_custom_exception(
|
|
|
131
151
|
message=f"[UDTF_EXEC_ERROR] User defined table function encountered an error in the terminate method: {ex.message}"
|
|
132
152
|
)
|
|
133
153
|
|
|
154
|
+
if "failed to split string, provided pattern:" in msg:
|
|
155
|
+
return IllegalArgumentException(
|
|
156
|
+
message=f"Failed to split string using provided pattern. {ex.message}"
|
|
157
|
+
)
|
|
158
|
+
|
|
134
159
|
if "100357" in msg and "wrong tuple size for returned value" in msg:
|
|
135
160
|
return PythonException(
|
|
136
161
|
message=f"[UDTF_RETURN_SCHEMA_MISMATCH] The number of columns in the result does not match the specified schema. {ex.message}"
|
|
@@ -13,10 +13,7 @@ from snowflake.snowpark_connect.config import global_config
|
|
|
13
13
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
14
14
|
from snowflake.snowpark_connect.type_mapping import proto_to_snowpark_type
|
|
15
15
|
from snowflake.snowpark_connect.typed_column import TypedColumn
|
|
16
|
-
from snowflake.snowpark_connect.utils.session import
|
|
17
|
-
get_or_create_snowpark_session,
|
|
18
|
-
get_python_udxf_import_files,
|
|
19
|
-
)
|
|
16
|
+
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
20
17
|
from snowflake.snowpark_connect.utils.udf_helper import (
|
|
21
18
|
SnowparkUDF,
|
|
22
19
|
gen_input_types,
|
|
@@ -28,6 +25,9 @@ from snowflake.snowpark_connect.utils.udf_helper import (
|
|
|
28
25
|
from snowflake.snowpark_connect.utils.udf_utils import (
|
|
29
26
|
ProcessCommonInlineUserDefinedFunction,
|
|
30
27
|
)
|
|
28
|
+
from snowflake.snowpark_connect.utils.udxf_import_utils import (
|
|
29
|
+
get_python_udxf_import_files,
|
|
30
|
+
)
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def process_udf_return_type(
|