snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +150 -25
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +48 -4
- snowflake/snowpark_connect/expression/map_extension.py +25 -5
- snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
- snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
- snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
- snowflake/snowpark_connect/relation/map_extension.py +28 -8
- snowflake/snowpark_connect/relation/map_join.py +21 -10
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +91 -24
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +131 -34
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +72 -19
- snowflake/snowpark_connect/type_mapping.py +54 -17
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +5 -1
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +78 -77
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
|
@@ -56,7 +56,11 @@ from snowflake.snowpark_connect.analyze_plan.map_tree_string import map_tree_str
|
|
|
56
56
|
from snowflake.snowpark_connect.config import route_config_proto
|
|
57
57
|
from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
|
|
58
58
|
from snowflake.snowpark_connect.control_server import ControlServicer
|
|
59
|
-
from snowflake.snowpark_connect.error.
|
|
59
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
60
|
+
from snowflake.snowpark_connect.error.error_utils import (
|
|
61
|
+
attach_custom_error_code,
|
|
62
|
+
build_grpc_error_response,
|
|
63
|
+
)
|
|
60
64
|
from snowflake.snowpark_connect.execute_plan.map_execution_command import (
|
|
61
65
|
map_execution_command,
|
|
62
66
|
)
|
|
@@ -96,7 +100,7 @@ from snowflake.snowpark_connect.utils.interrupt import (
|
|
|
96
100
|
interrupt_queries_with_tag,
|
|
97
101
|
interrupt_query,
|
|
98
102
|
)
|
|
99
|
-
from snowflake.snowpark_connect.utils.profiling import profile_method
|
|
103
|
+
from snowflake.snowpark_connect.utils.profiling import PROFILING_ENABLED, profile_method
|
|
100
104
|
from snowflake.snowpark_connect.utils.session import (
|
|
101
105
|
configure_snowpark_session,
|
|
102
106
|
get_or_create_snowpark_session,
|
|
@@ -157,6 +161,7 @@ def _handle_exception(context, e: Exception):
|
|
|
157
161
|
|
|
158
162
|
if tcm.TCM_MODE:
|
|
159
163
|
# TODO: SNOW-2009834 gracefully return error back in TCM
|
|
164
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
160
165
|
raise e
|
|
161
166
|
|
|
162
167
|
from grpc_status import rpc_status
|
|
@@ -374,9 +379,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
374
379
|
),
|
|
375
380
|
)
|
|
376
381
|
case _:
|
|
377
|
-
|
|
382
|
+
exception = SnowparkConnectNotImplementedError(
|
|
378
383
|
f"ANALYZE PLAN NOT IMPLEMENTED:\n{request}"
|
|
379
384
|
)
|
|
385
|
+
attach_custom_error_code(
|
|
386
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
387
|
+
)
|
|
388
|
+
raise exception
|
|
380
389
|
except Exception as e:
|
|
381
390
|
_handle_exception(context, e)
|
|
382
391
|
finally:
|
|
@@ -527,9 +536,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
527
536
|
),
|
|
528
537
|
)
|
|
529
538
|
case _:
|
|
530
|
-
|
|
539
|
+
exception = ValueError(
|
|
531
540
|
f"Unexpected payload type in AddArtifacts: {request.WhichOneof('payload')}"
|
|
532
541
|
)
|
|
542
|
+
attach_custom_error_code(
|
|
543
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
544
|
+
)
|
|
545
|
+
raise exception
|
|
533
546
|
|
|
534
547
|
for name, data in cache_data.items():
|
|
535
548
|
_try_handle_local_relation(name, bytes(data))
|
|
@@ -635,9 +648,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
635
648
|
case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID:
|
|
636
649
|
interrupted_ids = interrupt_query(request.operation_id)
|
|
637
650
|
case _:
|
|
638
|
-
|
|
651
|
+
exception = SnowparkConnectNotImplementedError(
|
|
639
652
|
f"INTERRUPT NOT IMPLEMENTED:\n{request}"
|
|
640
653
|
)
|
|
654
|
+
attach_custom_error_code(
|
|
655
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
656
|
+
)
|
|
657
|
+
raise exception
|
|
641
658
|
|
|
642
659
|
return proto_base.InterruptResponse(
|
|
643
660
|
session_id=request.session_id,
|
|
@@ -655,9 +672,11 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
655
672
|
continue. If there is a ResultComplete, the client should use ReleaseExecute with
|
|
656
673
|
"""
|
|
657
674
|
logger.info("ReattachExecute")
|
|
658
|
-
|
|
675
|
+
exception = SnowparkConnectNotImplementedError(
|
|
659
676
|
"Spark client has detached, please resubmit request. In a future version, the server will be support the reattach."
|
|
660
677
|
)
|
|
678
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
679
|
+
raise exception
|
|
661
680
|
|
|
662
681
|
def ReleaseExecute(self, request: proto_base.ReleaseExecuteRequest, context):
|
|
663
682
|
"""Release an reattachable execution, or parts thereof.
|
|
@@ -760,8 +779,11 @@ def _serve(
|
|
|
760
779
|
|
|
761
780
|
ChannelBuilder.MAX_MESSAGE_LENGTH = grpc_max_msg_size
|
|
762
781
|
|
|
782
|
+
# cProfile doesn't work correctly with multiple threads
|
|
783
|
+
max_workers = 1 if PROFILING_ENABLED else 10
|
|
784
|
+
|
|
763
785
|
server = grpc.server(
|
|
764
|
-
futures.ThreadPoolExecutor(max_workers=
|
|
786
|
+
futures.ThreadPoolExecutor(max_workers=max_workers), options=server_options
|
|
765
787
|
)
|
|
766
788
|
control_servicer = ControlServicer(session)
|
|
767
789
|
proto_base_grpc.add_SparkConnectServiceServicer_to_server(
|
|
@@ -791,6 +813,7 @@ def _serve(
|
|
|
791
813
|
)
|
|
792
814
|
else:
|
|
793
815
|
logger.error("Error starting up Snowpark Connect server", exc_info=True)
|
|
816
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
794
817
|
raise e
|
|
795
818
|
finally:
|
|
796
819
|
# flush the telemetry queue if possible
|
|
@@ -808,7 +831,9 @@ def _set_remote_url(remote_url: str):
|
|
|
808
831
|
elif parsed_url.scheme == "unix":
|
|
809
832
|
_server_url = remote_url.split("/;")[0]
|
|
810
833
|
else:
|
|
811
|
-
|
|
834
|
+
exception = RuntimeError(f"Invalid Snowpark Connect URL: {remote_url}")
|
|
835
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SPARK_CONNECT_URL)
|
|
836
|
+
raise exception
|
|
812
837
|
|
|
813
838
|
|
|
814
839
|
def _set_server_tcp_port(server_port: int):
|
|
@@ -822,7 +847,9 @@ def _check_port_is_free(port: int) -> None:
|
|
|
822
847
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
823
848
|
s.settimeout(1)
|
|
824
849
|
if s.connect_ex(("127.0.0.1", port)) == 0:
|
|
825
|
-
|
|
850
|
+
exception = RuntimeError(f"TCP port {port} is already in use")
|
|
851
|
+
attach_custom_error_code(exception, ErrorCodes.TCP_PORT_ALREADY_IN_USE)
|
|
852
|
+
raise exception
|
|
826
853
|
|
|
827
854
|
|
|
828
855
|
def _set_server_unix_domain_socket(path: str):
|
|
@@ -834,14 +861,18 @@ def _set_server_unix_domain_socket(path: str):
|
|
|
834
861
|
def get_server_url() -> str:
|
|
835
862
|
global _server_url
|
|
836
863
|
if not _server_url:
|
|
837
|
-
|
|
864
|
+
exception = RuntimeError("Server URL not set")
|
|
865
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
866
|
+
raise exception
|
|
838
867
|
return _server_url
|
|
839
868
|
|
|
840
869
|
|
|
841
870
|
def get_client_url() -> str:
|
|
842
871
|
global _client_url
|
|
843
872
|
if not _client_url:
|
|
844
|
-
|
|
873
|
+
exception = RuntimeError("Client URL not set")
|
|
874
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
875
|
+
raise exception
|
|
845
876
|
return _client_url
|
|
846
877
|
|
|
847
878
|
|
|
@@ -871,12 +902,14 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
|
|
|
871
902
|
if url is None:
|
|
872
903
|
url = get_client_url()
|
|
873
904
|
if url[:6] != "unix:/" or len(url) < 7:
|
|
874
|
-
|
|
905
|
+
exception = PySparkValueError(
|
|
875
906
|
error_class="INVALID_CONNECT_URL",
|
|
876
907
|
message_parameters={
|
|
877
908
|
"detail": "The URL must start with 'unix://'. Please update the URL to follow the correct format, e.g., 'unix://unix_domain_socket_path'.",
|
|
878
909
|
},
|
|
879
910
|
)
|
|
911
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SPARK_CONNECT_URL)
|
|
912
|
+
raise exception
|
|
880
913
|
|
|
881
914
|
# Rewrite the URL to use http as the scheme so that we can leverage
|
|
882
915
|
# Python's built-in parser to parse URL parameters
|
|
@@ -919,7 +952,7 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
|
|
|
919
952
|
for p in parts:
|
|
920
953
|
kv = p.split("=")
|
|
921
954
|
if len(kv) != 2:
|
|
922
|
-
|
|
955
|
+
exception = PySparkValueError(
|
|
923
956
|
error_class="INVALID_CONNECT_URL",
|
|
924
957
|
message_parameters={
|
|
925
958
|
"detail": f"Parameter '{p}' should be provided as a "
|
|
@@ -927,6 +960,10 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
|
|
|
927
960
|
f"the parameter to follow the correct format, e.g., 'key=value'.",
|
|
928
961
|
},
|
|
929
962
|
)
|
|
963
|
+
attach_custom_error_code(
|
|
964
|
+
exception, ErrorCodes.INVALID_SPARK_CONNECT_URL
|
|
965
|
+
)
|
|
966
|
+
raise exception
|
|
930
967
|
self.params[kv[0]] = urllib.parse.unquote(kv[1])
|
|
931
968
|
|
|
932
969
|
netloc = self.url.netloc.split(":")
|
|
@@ -942,7 +979,7 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
|
|
|
942
979
|
self.host = netloc[0]
|
|
943
980
|
self.port = int(netloc[1])
|
|
944
981
|
else:
|
|
945
|
-
|
|
982
|
+
exception = PySparkValueError(
|
|
946
983
|
error_class="INVALID_CONNECT_URL",
|
|
947
984
|
message_parameters={
|
|
948
985
|
"detail": f"Target destination '{self.url.netloc}' should match the "
|
|
@@ -950,6 +987,8 @@ class UnixDomainSocketChannelBuilder(ChannelBuilder):
|
|
|
950
987
|
f"the correct format, e.g., 'hostname:port'.",
|
|
951
988
|
},
|
|
952
989
|
)
|
|
990
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_SPARK_CONNECT_URL)
|
|
991
|
+
raise exception
|
|
953
992
|
|
|
954
993
|
# We override this to enable compatibility with Spark 4.0
|
|
955
994
|
host = None
|
|
@@ -988,9 +1027,11 @@ def start_jvm():
|
|
|
988
1027
|
if tcm.TCM_MODE:
|
|
989
1028
|
# No-op if JVM is already started in TCM mode
|
|
990
1029
|
return
|
|
991
|
-
|
|
1030
|
+
exception = RuntimeError(
|
|
992
1031
|
"JVM must not be running when starting the Spark Connect server"
|
|
993
1032
|
)
|
|
1033
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
1034
|
+
raise exception
|
|
994
1035
|
|
|
995
1036
|
pyspark_jars = (
|
|
996
1037
|
pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
|
|
@@ -1065,14 +1106,18 @@ def start_session(
|
|
|
1065
1106
|
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = max_grpc_message_size
|
|
1066
1107
|
|
|
1067
1108
|
if os.environ.get("SPARK_ENV_LOADED"):
|
|
1068
|
-
|
|
1109
|
+
exception = RuntimeError(
|
|
1069
1110
|
"Snowpark Connect cannot be run inside of a Spark environment"
|
|
1070
1111
|
)
|
|
1112
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_STARTUP_OPERATION)
|
|
1113
|
+
raise exception
|
|
1071
1114
|
if connection_parameters is not None:
|
|
1072
1115
|
if snowpark_session is not None:
|
|
1073
|
-
|
|
1116
|
+
exception = ValueError(
|
|
1074
1117
|
"Only specify one of snowpark_session and connection_parameters"
|
|
1075
1118
|
)
|
|
1119
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_STARTUP_INPUT)
|
|
1120
|
+
raise exception
|
|
1076
1121
|
snowpark_session = snowpark.Session.builder.configs(
|
|
1077
1122
|
connection_parameters
|
|
1078
1123
|
).create()
|
|
@@ -1084,9 +1129,11 @@ def start_session(
|
|
|
1084
1129
|
return
|
|
1085
1130
|
|
|
1086
1131
|
if len(list(filter(None, [remote_url, tcp_port, unix_domain_socket]))) > 1:
|
|
1087
|
-
|
|
1132
|
+
exception = RuntimeError(
|
|
1088
1133
|
"Can only set at most one of remote_url, tcp_port, and unix_domain_socket"
|
|
1089
1134
|
)
|
|
1135
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_STARTUP_INPUT)
|
|
1136
|
+
raise exception
|
|
1090
1137
|
|
|
1091
1138
|
url_from_env = os.environ.get("SPARK_REMOTE", None)
|
|
1092
1139
|
if remote_url:
|
|
@@ -1124,7 +1171,11 @@ def start_session(
|
|
|
1124
1171
|
server_thread.start()
|
|
1125
1172
|
_server_running.wait()
|
|
1126
1173
|
if _server_error:
|
|
1127
|
-
|
|
1174
|
+
exception = RuntimeError("Snowpark Connect session failed to start")
|
|
1175
|
+
attach_custom_error_code(
|
|
1176
|
+
exception, ErrorCodes.STARTUP_CONNECTION_FAILED
|
|
1177
|
+
)
|
|
1178
|
+
raise exception
|
|
1128
1179
|
return server_thread
|
|
1129
1180
|
else:
|
|
1130
1181
|
# Launch in the foreground.
|
|
@@ -1132,6 +1183,7 @@ def start_session(
|
|
|
1132
1183
|
except Exception as e:
|
|
1133
1184
|
_reset_server_run_state()
|
|
1134
1185
|
logger.error(e, exc_info=True)
|
|
1186
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
1135
1187
|
raise e
|
|
1136
1188
|
|
|
1137
1189
|
|
|
@@ -1165,6 +1217,7 @@ def get_session(url: Optional[str] = None, conf: SparkConf = None) -> SparkSessi
|
|
|
1165
1217
|
except Exception as e:
|
|
1166
1218
|
_reset_server_run_state()
|
|
1167
1219
|
logger.error(e, exc_info=True)
|
|
1220
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
1168
1221
|
raise e
|
|
1169
1222
|
|
|
1170
1223
|
|
|
@@ -29,12 +29,17 @@ from snowflake.snowpark_connect.constants import (
|
|
|
29
29
|
from snowflake.snowpark_connect.date_time_format_mapping import (
|
|
30
30
|
convert_spark_format_to_snowflake,
|
|
31
31
|
)
|
|
32
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
33
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
32
34
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
33
35
|
from snowflake.snowpark_connect.expression.map_sql_expression import (
|
|
34
36
|
_INTERVAL_DAYTIME_PATTERN_RE,
|
|
35
37
|
_INTERVAL_YEARMONTH_PATTERN_RE,
|
|
36
38
|
)
|
|
37
|
-
from snowflake.snowpark_connect.utils.context import
|
|
39
|
+
from snowflake.snowpark_connect.utils.context import (
|
|
40
|
+
get_is_evaluating_sql,
|
|
41
|
+
get_jpype_jclass_lock,
|
|
42
|
+
)
|
|
38
43
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
39
44
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
40
45
|
SnowparkConnectNotImplementedError,
|
|
@@ -61,12 +66,14 @@ SNOWPARK_TYPE_NAME_TO_PYSPARK_TYPE_NAME = {
|
|
|
61
66
|
|
|
62
67
|
@cache
|
|
63
68
|
def _get_struct_type_class():
|
|
64
|
-
|
|
69
|
+
with get_jpype_jclass_lock():
|
|
70
|
+
return jpype.JClass("org.apache.spark.sql.types.StructType")
|
|
65
71
|
|
|
66
72
|
|
|
67
73
|
@cache
|
|
68
74
|
def get_python_sql_utils_class():
|
|
69
|
-
|
|
75
|
+
with get_jpype_jclass_lock():
|
|
76
|
+
return jpype.JClass("org.apache.spark.sql.api.python.PythonSQLUtils")
|
|
70
77
|
|
|
71
78
|
|
|
72
79
|
def _parse_ddl_with_spark_scala(ddl_string: str) -> pyspark.sql.types.DataType:
|
|
@@ -291,9 +298,11 @@ def snowpark_to_proto_type(
|
|
|
291
298
|
)
|
|
292
299
|
}
|
|
293
300
|
case _:
|
|
294
|
-
|
|
301
|
+
exception = SnowparkConnectNotImplementedError(
|
|
295
302
|
f"Unsupported snowpark data type: {data_type}"
|
|
296
303
|
)
|
|
304
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
305
|
+
raise exception
|
|
297
306
|
|
|
298
307
|
|
|
299
308
|
def cast_to_match_snowpark_type(
|
|
@@ -333,7 +342,9 @@ def cast_to_match_snowpark_type(
|
|
|
333
342
|
with suppress(TypeError):
|
|
334
343
|
date = datetime.strptime(content, format)
|
|
335
344
|
return date
|
|
336
|
-
|
|
345
|
+
exception = ValueError(f"Date casting error for {str(content)}")
|
|
346
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
347
|
+
raise exception
|
|
337
348
|
case snowpark.types.ShortType:
|
|
338
349
|
return int(content)
|
|
339
350
|
case snowpark.types.StringType:
|
|
@@ -363,9 +374,11 @@ def cast_to_match_snowpark_type(
|
|
|
363
374
|
case snowpark.types.DayTimeIntervalType:
|
|
364
375
|
return str(content)
|
|
365
376
|
case _:
|
|
366
|
-
|
|
377
|
+
exception = SnowparkConnectNotImplementedError(
|
|
367
378
|
f"Unsupported snowpark data type in casting: {data_type}"
|
|
368
379
|
)
|
|
380
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
381
|
+
raise exception
|
|
369
382
|
|
|
370
383
|
|
|
371
384
|
def snowpark_to_iceberg_type(data_type: snowpark.types.DataType) -> str:
|
|
@@ -398,9 +411,11 @@ def snowpark_to_iceberg_type(data_type: snowpark.types.DataType) -> str:
|
|
|
398
411
|
case snowpark.types.TimestampType:
|
|
399
412
|
return "timestamp"
|
|
400
413
|
case _:
|
|
401
|
-
|
|
414
|
+
exception = SnowparkConnectNotImplementedError(
|
|
402
415
|
f"Unsupported snowpark data type for iceber: {data_type}"
|
|
403
416
|
)
|
|
417
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
418
|
+
raise exception
|
|
404
419
|
|
|
405
420
|
|
|
406
421
|
def proto_to_snowpark_type(
|
|
@@ -487,9 +502,11 @@ def map_snowpark_types_to_pyarrow_types(
|
|
|
487
502
|
)
|
|
488
503
|
)
|
|
489
504
|
else:
|
|
490
|
-
|
|
505
|
+
exception = AnalysisException(
|
|
491
506
|
f"Unsupported arrow type {pa_type} for snowpark ArrayType."
|
|
492
507
|
)
|
|
508
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
509
|
+
raise exception
|
|
493
510
|
case snowpark.types.BinaryType:
|
|
494
511
|
return pa.binary()
|
|
495
512
|
case snowpark.types.BooleanType:
|
|
@@ -530,9 +547,11 @@ def map_snowpark_types_to_pyarrow_types(
|
|
|
530
547
|
),
|
|
531
548
|
)
|
|
532
549
|
else:
|
|
533
|
-
|
|
550
|
+
exception = AnalysisException(
|
|
534
551
|
f"Unsupported arrow type {pa_type} for snowpark MapType."
|
|
535
552
|
)
|
|
553
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
554
|
+
raise exception
|
|
536
555
|
case snowpark.types.NullType:
|
|
537
556
|
return pa.string()
|
|
538
557
|
case snowpark.types.ShortType:
|
|
@@ -557,9 +576,11 @@ def map_snowpark_types_to_pyarrow_types(
|
|
|
557
576
|
]
|
|
558
577
|
)
|
|
559
578
|
else:
|
|
560
|
-
|
|
579
|
+
exception = AnalysisException(
|
|
561
580
|
f"Unsupported arrow type {pa_type} for snowpark StructType."
|
|
562
581
|
)
|
|
582
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
583
|
+
raise exception
|
|
563
584
|
case snowpark.types.TimestampType:
|
|
564
585
|
unit = pa_type.unit
|
|
565
586
|
tz = pa_type.tz
|
|
@@ -576,9 +597,11 @@ def map_snowpark_types_to_pyarrow_types(
|
|
|
576
597
|
# Return string type so formatted intervals are preserved in display
|
|
577
598
|
return pa.string()
|
|
578
599
|
case _:
|
|
579
|
-
|
|
600
|
+
exception = SnowparkConnectNotImplementedError(
|
|
580
601
|
f"Unsupported snowpark data type: {snowpark_type}"
|
|
581
602
|
)
|
|
603
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
604
|
+
raise exception
|
|
582
605
|
|
|
583
606
|
|
|
584
607
|
def map_pyarrow_to_snowpark_types(pa_type: pa.DataType) -> snowpark.types.DataType:
|
|
@@ -648,9 +671,11 @@ def map_pyarrow_to_snowpark_types(pa_type: pa.DataType) -> snowpark.types.DataTy
|
|
|
648
671
|
elif pa.types.is_null(pa_type):
|
|
649
672
|
return snowpark.types.NullType()
|
|
650
673
|
else:
|
|
651
|
-
|
|
674
|
+
exception = SnowparkConnectNotImplementedError(
|
|
652
675
|
f"Unsupported PyArrow data type: {pa_type}"
|
|
653
676
|
)
|
|
677
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
678
|
+
raise exception
|
|
654
679
|
|
|
655
680
|
|
|
656
681
|
def map_pyspark_types_to_snowpark_types(
|
|
@@ -736,9 +761,11 @@ def map_pyspark_types_to_snowpark_types(
|
|
|
736
761
|
return snowpark.types.DayTimeIntervalType(
|
|
737
762
|
type_to_map.startField, type_to_map.endField
|
|
738
763
|
)
|
|
739
|
-
|
|
764
|
+
exception = SnowparkConnectNotImplementedError(
|
|
740
765
|
f"Unsupported spark data type: {type_to_map}"
|
|
741
766
|
)
|
|
767
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
768
|
+
raise exception
|
|
742
769
|
|
|
743
770
|
|
|
744
771
|
def map_snowpark_to_pyspark_types(
|
|
@@ -811,7 +838,11 @@ def map_snowpark_to_pyspark_types(
|
|
|
811
838
|
return pyspark.sql.types.DayTimeIntervalType(
|
|
812
839
|
type_to_map.start_field, type_to_map.end_field
|
|
813
840
|
)
|
|
814
|
-
|
|
841
|
+
exception = SnowparkConnectNotImplementedError(
|
|
842
|
+
f"Unsupported data type: {type_to_map}"
|
|
843
|
+
)
|
|
844
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
845
|
+
raise exception
|
|
815
846
|
|
|
816
847
|
|
|
817
848
|
def map_simple_types(simple_type: str) -> snowpark.types.DataType:
|
|
@@ -866,9 +897,11 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
|
|
|
866
897
|
precision = int(simple_type.split("(")[1].split(",")[0])
|
|
867
898
|
scale = int(simple_type.split(",")[1].split(")")[0])
|
|
868
899
|
return snowpark.types.DecimalType(precision, scale)
|
|
869
|
-
|
|
900
|
+
exception = SnowparkConnectNotImplementedError(
|
|
870
901
|
f"Unsupported simple type: {simple_type}"
|
|
871
902
|
)
|
|
903
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
904
|
+
raise exception
|
|
872
905
|
|
|
873
906
|
|
|
874
907
|
def map_json_schema_to_snowpark(
|
|
@@ -1009,9 +1042,11 @@ def map_spark_timestamp_format_expression(
|
|
|
1009
1042
|
lit_value, _ = get_literal_field_and_name(arguments.literal)
|
|
1010
1043
|
return convert_spark_format_to_snowflake(lit_value, timestamp_input_type)
|
|
1011
1044
|
case other:
|
|
1012
|
-
|
|
1045
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1013
1046
|
f"Unsupported expression type {other} in timestamp format argument"
|
|
1014
1047
|
)
|
|
1048
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1049
|
+
raise exception
|
|
1015
1050
|
|
|
1016
1051
|
|
|
1017
1052
|
def map_spark_number_format_expression(
|
|
@@ -1030,9 +1065,11 @@ def map_spark_number_format_expression(
|
|
|
1030
1065
|
case "literal":
|
|
1031
1066
|
lit_value, _ = get_literal_field_and_name(arguments.literal)
|
|
1032
1067
|
case other:
|
|
1033
|
-
|
|
1068
|
+
exception = SnowparkConnectNotImplementedError(
|
|
1034
1069
|
f"Unsupported expression type {other} in number format argument"
|
|
1035
1070
|
)
|
|
1071
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
1072
|
+
raise exception
|
|
1036
1073
|
|
|
1037
1074
|
return _map_spark_to_snowflake_number_format(lit_value)
|
|
1038
1075
|
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
+
import os
|
|
5
6
|
import re
|
|
7
|
+
import threading
|
|
6
8
|
from contextlib import contextmanager
|
|
7
9
|
from contextvars import ContextVar
|
|
8
|
-
from typing import Mapping, Optional
|
|
10
|
+
from typing import Iterator, Mapping, Optional
|
|
9
11
|
|
|
10
12
|
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
11
13
|
|
|
@@ -25,6 +27,7 @@ _is_evaluating_sql = ContextVar[bool]("_is_evaluating_sql", default=False)
|
|
|
25
27
|
_is_evaluating_join_condition = ContextVar(
|
|
26
28
|
"_is_evaluating_join_condition", default=("default", False, [], [])
|
|
27
29
|
)
|
|
30
|
+
_is_processing_order_by = ContextVar[bool]("_is_processing_order_by", default=False)
|
|
28
31
|
|
|
29
32
|
_sql_aggregate_function_count = ContextVar[int](
|
|
30
33
|
"_contains_aggregate_function", default=0
|
|
@@ -56,6 +59,23 @@ _is_in_pivot = ContextVar[bool]("_is_in_pivot", default=False)
|
|
|
56
59
|
_is_in_udtf_context = ContextVar[bool]("_is_in_udtf_context", default=False)
|
|
57
60
|
_accessing_temp_object = ContextVar[bool]("_accessing_temp_object", default=False)
|
|
58
61
|
|
|
62
|
+
# Thread-safe lock for JPype JClass creation to prevent access violations
|
|
63
|
+
_jpype_jclass_lock = threading.Lock()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@contextmanager
|
|
67
|
+
def get_jpype_jclass_lock() -> Iterator[None]:
|
|
68
|
+
"""
|
|
69
|
+
Context manager that acquires the JPype JClass lock on Windows platforms.
|
|
70
|
+
On non-Windows (os.name != 'nt'), it yields without acquiring the lock.
|
|
71
|
+
"""
|
|
72
|
+
if os.name == "nt":
|
|
73
|
+
with _jpype_jclass_lock:
|
|
74
|
+
yield
|
|
75
|
+
else:
|
|
76
|
+
yield
|
|
77
|
+
|
|
78
|
+
|
|
59
79
|
# Lateral Column Alias helpers
|
|
60
80
|
# We keep a thread-local mapping from alias name -> TypedColumn that is
|
|
61
81
|
# populated incrementally while the projection list is being processed.
|
|
@@ -207,6 +227,27 @@ def push_evaluating_sql_scope():
|
|
|
207
227
|
_is_evaluating_sql.set(prev)
|
|
208
228
|
|
|
209
229
|
|
|
230
|
+
def get_is_processing_order_by() -> bool:
|
|
231
|
+
"""
|
|
232
|
+
Gets the value of _is_processing_order_by for the current context, defaults to False.
|
|
233
|
+
"""
|
|
234
|
+
return _is_processing_order_by.get()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@contextmanager
|
|
238
|
+
def push_processing_order_by_scope():
|
|
239
|
+
"""
|
|
240
|
+
Context manager that sets a flag indicating if ORDER BY expressions are being evaluated.
|
|
241
|
+
This enables optimizations like reusing already-computed UDF columns.
|
|
242
|
+
"""
|
|
243
|
+
prev = _is_processing_order_by.get()
|
|
244
|
+
try:
|
|
245
|
+
_is_processing_order_by.set(True)
|
|
246
|
+
yield
|
|
247
|
+
finally:
|
|
248
|
+
_is_processing_order_by.set(prev)
|
|
249
|
+
|
|
250
|
+
|
|
210
251
|
def get_is_evaluating_join_condition() -> tuple[str, bool, list, list]:
|
|
211
252
|
"""
|
|
212
253
|
Gets the value of _is_evaluating_join_condition for the current context, defaults to False.
|
|
@@ -12,6 +12,8 @@ from typing import Any
|
|
|
12
12
|
from snowflake import snowpark
|
|
13
13
|
from snowflake.connector.cursor import ResultMetadataV2
|
|
14
14
|
from snowflake.snowpark._internal.server_connection import ServerConnection
|
|
15
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
16
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
15
17
|
from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
16
18
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
17
19
|
from snowflake.snowpark_connect.utils.telemetry import telemetry
|
|
@@ -148,6 +150,7 @@ def instrument_session_for_describe_cache(session: snowpark.Session):
|
|
|
148
150
|
telemetry.report_query(result, **kwargs)
|
|
149
151
|
except Exception as e:
|
|
150
152
|
telemetry.report_query(e, **kwargs)
|
|
153
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
151
154
|
raise e
|
|
152
155
|
return result
|
|
153
156
|
|
|
@@ -8,6 +8,8 @@ Environment variable utilities for Snowpark Connect.
|
|
|
8
8
|
|
|
9
9
|
import os
|
|
10
10
|
|
|
11
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
12
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
13
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
12
14
|
|
|
13
15
|
|
|
@@ -37,9 +39,11 @@ def get_int_from_env(env_var: str, default: int) -> int:
|
|
|
37
39
|
"""
|
|
38
40
|
# Validate that default is actually an integer
|
|
39
41
|
if not isinstance(default, int):
|
|
40
|
-
|
|
42
|
+
exception = TypeError(
|
|
41
43
|
f"Default value must be an integer, got {type(default).__name__}: {default}"
|
|
42
44
|
)
|
|
45
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
46
|
+
raise exception
|
|
43
47
|
|
|
44
48
|
value = os.getenv(env_var)
|
|
45
49
|
if value is None:
|
|
@@ -13,6 +13,8 @@ from snowflake.snowpark_connect.config import (
|
|
|
13
13
|
auto_uppercase_column_identifiers,
|
|
14
14
|
auto_uppercase_non_column_identifiers,
|
|
15
15
|
)
|
|
16
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
17
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
16
18
|
|
|
17
19
|
QUOTED_SPARK_IDENTIFIER = re.compile(r"^`[^`]*(?:``[^`]*)*`$")
|
|
18
20
|
UNQUOTED_SPARK_IDENTIFIER = re.compile(r"^\w+$")
|
|
@@ -25,7 +27,9 @@ def unquote_spark_identifier_if_quoted(spark_name: str) -> str:
|
|
|
25
27
|
if QUOTED_SPARK_IDENTIFIER.match(spark_name):
|
|
26
28
|
return spark_name[1:-1].replace("``", "`")
|
|
27
29
|
|
|
28
|
-
|
|
30
|
+
exception = AnalysisException(f"Invalid name: {spark_name}")
|
|
31
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
32
|
+
raise exception
|
|
29
33
|
|
|
30
34
|
|
|
31
35
|
def spark_to_sf_single_id_with_unquoting(
|
|
@@ -191,7 +195,9 @@ class FQN:
|
|
|
191
195
|
|
|
192
196
|
def __eq__(self, other: Any) -> bool:
|
|
193
197
|
if not isinstance(other, FQN):
|
|
194
|
-
|
|
198
|
+
exception = AnalysisException(f"{other} is not a valid FQN")
|
|
199
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
200
|
+
raise exception
|
|
195
201
|
return self.identifier == other.identifier
|
|
196
202
|
|
|
197
203
|
@classmethod
|
|
@@ -209,7 +215,9 @@ class FQN:
|
|
|
209
215
|
result = re.fullmatch(qualifier_pattern, identifier)
|
|
210
216
|
|
|
211
217
|
if result is None:
|
|
212
|
-
|
|
218
|
+
exception = AnalysisException(f"{identifier} is not a valid identifier")
|
|
219
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
220
|
+
raise exception
|
|
213
221
|
|
|
214
222
|
unqualified_name = result.group("name")
|
|
215
223
|
if result.group("second_qualifier") is not None:
|