snowpark-connect 0.31.0__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +143 -105
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +4 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +5 -4
- snowflake/snowpark_connect/expression/map_expression.py +5 -4
- snowflake/snowpark_connect/expression/map_extension.py +12 -6
- snowflake/snowpark_connect/expression/map_sql_expression.py +50 -7
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +62 -25
- snowflake/snowpark_connect/expression/map_unresolved_function.py +924 -127
- snowflake/snowpark_connect/expression/map_unresolved_star.py +9 -7
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +1281 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +202 -0
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +4 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +6 -5
- snowflake/snowpark_connect/relation/map_column_ops.py +9 -3
- snowflake/snowpark_connect/relation/map_extension.py +10 -9
- snowflake/snowpark_connect/relation/map_join.py +219 -144
- snowflake/snowpark_connect/relation/map_row_ops.py +136 -54
- snowflake/snowpark_connect/relation/map_sql.py +134 -16
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +87 -2
- snowflake/snowpark_connect/relation/read/map_read_table.py +6 -3
- snowflake/snowpark_connect/relation/utils.py +46 -0
- snowflake/snowpark_connect/relation/write/map_write.py +215 -289
- snowflake/snowpark_connect/resources_initializer.py +25 -13
- snowflake/snowpark_connect/server.py +10 -26
- snowflake/snowpark_connect/type_mapping.py +38 -3
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +27 -4
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/METADATA +7 -2
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/RECORD +46 -105
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
import pathlib
|
|
5
4
|
import threading
|
|
6
5
|
import time
|
|
7
6
|
|
|
@@ -51,11 +50,9 @@ def initialize_resources() -> None:
|
|
|
51
50
|
"""Upload Spark jar files required for creating Scala UDFs."""
|
|
52
51
|
stage = session.get_session_stage()
|
|
53
52
|
resource_path = stage + RESOURCE_PATH
|
|
54
|
-
import
|
|
53
|
+
import snowpark_connect_deps_1
|
|
54
|
+
import snowpark_connect_deps_2
|
|
55
55
|
|
|
56
|
-
pyspark_jars = (
|
|
57
|
-
pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
|
|
58
|
-
)
|
|
59
56
|
jar_files = [
|
|
60
57
|
f"spark-sql_2.12-{SPARK_VERSION}.jar",
|
|
61
58
|
f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
|
|
@@ -64,14 +61,29 @@ def initialize_resources() -> None:
|
|
|
64
61
|
"json4s-ast_2.12-3.7.0-M11.jar",
|
|
65
62
|
]
|
|
66
63
|
|
|
67
|
-
for
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
64
|
+
for jar_name in jar_files:
|
|
65
|
+
# Try to find the JAR in package 1 first, then package 2
|
|
66
|
+
jar_path = None
|
|
67
|
+
try:
|
|
68
|
+
jar_path = snowpark_connect_deps_1.get_jar_path(jar_name)
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
try:
|
|
71
|
+
jar_path = snowpark_connect_deps_2.get_jar_path(jar_name)
|
|
72
|
+
except FileNotFoundError:
|
|
73
|
+
raise FileNotFoundError(
|
|
74
|
+
f"JAR {jar_name} not found in either package"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
session.file.put(
|
|
79
|
+
str(jar_path),
|
|
80
|
+
resource_path,
|
|
81
|
+
auto_compress=False,
|
|
82
|
+
overwrite=False,
|
|
83
|
+
source_compression="NONE",
|
|
84
|
+
)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
raise RuntimeError(f"Failed to upload JAR {jar_name}: {e}")
|
|
75
87
|
|
|
76
88
|
start_time = time.time()
|
|
77
89
|
|
|
@@ -24,12 +24,10 @@
|
|
|
24
24
|
import atexit
|
|
25
25
|
import logging
|
|
26
26
|
import os
|
|
27
|
-
import pathlib
|
|
28
27
|
import socket
|
|
29
28
|
import tempfile
|
|
30
29
|
import threading
|
|
31
30
|
import urllib.parse
|
|
32
|
-
import zipfile
|
|
33
31
|
from concurrent import futures
|
|
34
32
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
35
33
|
|
|
@@ -48,7 +46,6 @@ from pyspark.errors import PySparkValueError
|
|
|
48
46
|
from pyspark.sql.connect.client.core import ChannelBuilder
|
|
49
47
|
from pyspark.sql.connect.session import SparkSession
|
|
50
48
|
|
|
51
|
-
import snowflake.snowpark_connect
|
|
52
49
|
import snowflake.snowpark_connect.proto.control_pb2_grpc as control_grpc
|
|
53
50
|
import snowflake.snowpark_connect.tcm as tcm
|
|
54
51
|
from snowflake import snowpark
|
|
@@ -158,9 +155,8 @@ def _handle_exception(context, e: Exception):
|
|
|
158
155
|
logger.error("Error: %s - %s", type(e).__name__, str(e))
|
|
159
156
|
|
|
160
157
|
telemetry.report_request_failure(e)
|
|
161
|
-
|
|
162
158
|
if tcm.TCM_MODE:
|
|
163
|
-
#
|
|
159
|
+
# spark decoder will catch the error and return it to GS gracefully
|
|
164
160
|
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
165
161
|
raise e
|
|
166
162
|
|
|
@@ -1033,28 +1029,16 @@ def start_jvm():
|
|
|
1033
1029
|
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
1034
1030
|
raise exception
|
|
1035
1031
|
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
if "dataframe_processor.zip" in str(pyspark_jars):
|
|
1041
|
-
# importlib.resource doesn't work when local stage package is used in TCM
|
|
1042
|
-
zip_path = pathlib.Path(
|
|
1043
|
-
snowflake.snowpark_connect.__file__
|
|
1044
|
-
).parent.parent.parent
|
|
1045
|
-
temp_dir = tempfile.gettempdir()
|
|
1046
|
-
|
|
1047
|
-
extract_folder = "snowflake/snowpark_connect/includes/jars/" # Folder to extract (must end with '/')
|
|
1032
|
+
# Import both JAR dependency packages
|
|
1033
|
+
import snowpark_connect_deps_1
|
|
1034
|
+
import snowpark_connect_deps_2
|
|
1048
1035
|
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
for path in pyspark_jars.glob("**/*.jar"):
|
|
1057
|
-
jpype.addClassPath(path)
|
|
1036
|
+
# Load all the jar files from both packages
|
|
1037
|
+
jar_path_list = (
|
|
1038
|
+
snowpark_connect_deps_1.list_jars() + snowpark_connect_deps_2.list_jars()
|
|
1039
|
+
)
|
|
1040
|
+
for jar_path in jar_path_list:
|
|
1041
|
+
jpype.addClassPath(jar_path)
|
|
1058
1042
|
|
|
1059
1043
|
# TODO: Should remove convertStrings, but it breaks the JDBC code.
|
|
1060
1044
|
jvm_settings: list[str] = list(
|
|
@@ -373,6 +373,8 @@ def cast_to_match_snowpark_type(
|
|
|
373
373
|
return str(content)
|
|
374
374
|
case snowpark.types.DayTimeIntervalType:
|
|
375
375
|
return str(content)
|
|
376
|
+
case snowpark.types.MapType:
|
|
377
|
+
return content
|
|
376
378
|
case _:
|
|
377
379
|
exception = SnowparkConnectNotImplementedError(
|
|
378
380
|
f"Unsupported snowpark data type in casting: {data_type}"
|
|
@@ -582,11 +584,14 @@ def map_snowpark_types_to_pyarrow_types(
|
|
|
582
584
|
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
583
585
|
raise exception
|
|
584
586
|
case snowpark.types.TimestampType:
|
|
585
|
-
|
|
586
|
-
|
|
587
|
+
# Check if pa_type has unit attribute (it should be a timestamp type)
|
|
588
|
+
unit = pa_type.unit if hasattr(pa_type, "unit") else "us"
|
|
589
|
+
tz = pa_type.tz if hasattr(pa_type, "tz") else None
|
|
590
|
+
|
|
591
|
+
# Spark truncates nanosecond precision to microseconds
|
|
587
592
|
if unit == "ns":
|
|
588
|
-
# Spark truncates nanosecond precision to microseconds
|
|
589
593
|
unit = "us"
|
|
594
|
+
|
|
590
595
|
return pa.timestamp(unit, tz=tz)
|
|
591
596
|
case snowpark.types.VariantType:
|
|
592
597
|
return pa.string()
|
|
@@ -670,6 +675,9 @@ def map_pyarrow_to_snowpark_types(pa_type: pa.DataType) -> snowpark.types.DataTy
|
|
|
670
675
|
return snowpark.types.TimestampType()
|
|
671
676
|
elif pa.types.is_null(pa_type):
|
|
672
677
|
return snowpark.types.NullType()
|
|
678
|
+
elif pa.types.is_duration(pa_type):
|
|
679
|
+
# Map PyArrow duration[us] to DayTimeIntervalType
|
|
680
|
+
return snowpark.types.DayTimeIntervalType()
|
|
673
681
|
else:
|
|
674
682
|
exception = SnowparkConnectNotImplementedError(
|
|
675
683
|
f"Unsupported PyArrow data type: {pa_type}"
|
|
@@ -892,6 +900,33 @@ def map_simple_types(simple_type: str) -> snowpark.types.DataType:
|
|
|
892
900
|
return snowpark.types.YearMonthIntervalType()
|
|
893
901
|
case type_name if _INTERVAL_DAYTIME_PATTERN_RE.match(type_name):
|
|
894
902
|
return snowpark.types.DayTimeIntervalType()
|
|
903
|
+
# Year-Month interval cases
|
|
904
|
+
case "interval year":
|
|
905
|
+
return snowpark.types.YearMonthIntervalType(0)
|
|
906
|
+
case "interval month":
|
|
907
|
+
return snowpark.types.YearMonthIntervalType(1)
|
|
908
|
+
case "interval year to month":
|
|
909
|
+
return snowpark.types.YearMonthIntervalType(0, 1)
|
|
910
|
+
case "interval day":
|
|
911
|
+
return snowpark.types.DayTimeIntervalType(0)
|
|
912
|
+
case "interval hour":
|
|
913
|
+
return snowpark.types.DayTimeIntervalType(1)
|
|
914
|
+
case "interval minute":
|
|
915
|
+
return snowpark.types.DayTimeIntervalType(2)
|
|
916
|
+
case "interval second":
|
|
917
|
+
return snowpark.types.DayTimeIntervalType(3)
|
|
918
|
+
case "interval day to hour":
|
|
919
|
+
return snowpark.types.DayTimeIntervalType(0, 1)
|
|
920
|
+
case "interval day to minute":
|
|
921
|
+
return snowpark.types.DayTimeIntervalType(0, 2)
|
|
922
|
+
case "interval day to second":
|
|
923
|
+
return snowpark.types.DayTimeIntervalType(0, 3)
|
|
924
|
+
case "interval hour to minute":
|
|
925
|
+
return snowpark.types.DayTimeIntervalType(1, 2)
|
|
926
|
+
case "interval hour to second":
|
|
927
|
+
return snowpark.types.DayTimeIntervalType(1, 3)
|
|
928
|
+
case "interval minute to second":
|
|
929
|
+
return snowpark.types.DayTimeIntervalType(2, 3)
|
|
895
930
|
case _:
|
|
896
931
|
if simple_type.startswith("decimal"):
|
|
897
932
|
precision = int(simple_type.split("(")[1].split(",")[0])
|
|
@@ -8,6 +8,7 @@ from functools import cached_property
|
|
|
8
8
|
import snowflake.snowpark.functions as snowpark_fn
|
|
9
9
|
from snowflake import snowpark
|
|
10
10
|
from snowflake.snowpark.column import Column
|
|
11
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
11
12
|
|
|
12
13
|
_EMPTY_COLUMN = Column("")
|
|
13
14
|
|
|
@@ -44,11 +45,11 @@ class TypedColumn:
|
|
|
44
45
|
def alias(self, alias_name: str):
|
|
45
46
|
return TypedColumn(self.col.alias(alias_name), self._type_resolver)
|
|
46
47
|
|
|
47
|
-
def set_qualifiers(self, qualifiers:
|
|
48
|
+
def set_qualifiers(self, qualifiers: set[ColumnQualifier]) -> None:
|
|
48
49
|
self.qualifiers = qualifiers
|
|
49
50
|
|
|
50
|
-
def get_qualifiers(self) ->
|
|
51
|
-
return getattr(self, "qualifiers",
|
|
51
|
+
def get_qualifiers(self) -> set[ColumnQualifier]:
|
|
52
|
+
return getattr(self, "qualifiers", set())
|
|
52
53
|
|
|
53
54
|
def set_catalog_database_info(self, catalog_database_info: dict[str, str]) -> None:
|
|
54
55
|
self._catalog_database_info = catalog_database_info
|
|
@@ -63,12 +64,13 @@ class TypedColumn:
|
|
|
63
64
|
def get_database(self) -> str | None:
|
|
64
65
|
return self._catalog_database_info.get("database")
|
|
65
66
|
|
|
66
|
-
def set_multi_col_qualifiers(self, qualifiers: list[
|
|
67
|
+
def set_multi_col_qualifiers(self, qualifiers: list[set[ColumnQualifier]]) -> None:
|
|
67
68
|
self.multi_col_qualifiers = qualifiers
|
|
68
69
|
|
|
69
|
-
def get_multi_col_qualifiers(self, num_columns) -> list[
|
|
70
|
+
def get_multi_col_qualifiers(self, num_columns) -> list[set[ColumnQualifier]]:
|
|
70
71
|
if not hasattr(self, "multi_col_qualifiers"):
|
|
71
|
-
|
|
72
|
+
|
|
73
|
+
return [set() for i in range(num_columns)]
|
|
72
74
|
assert (
|
|
73
75
|
len(self.multi_col_qualifiers) == num_columns
|
|
74
76
|
), f"Expected {num_columns} multi-column qualifiers, got {len(self.multi_col_qualifiers)}"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
|
|
8
|
+
from snowflake.snowpark_connect.utils.context import get_session_id
|
|
9
|
+
|
|
10
|
+
# per session number sequences to generate unique snowpark columns
|
|
11
|
+
_session_sequences = defaultdict(int)
|
|
12
|
+
|
|
13
|
+
_lock = threading.Lock()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def next_unique_num():
|
|
17
|
+
session_id = get_session_id()
|
|
18
|
+
with _lock:
|
|
19
|
+
next_num = _session_sequences[session_id]
|
|
20
|
+
_session_sequences[session_id] = next_num + 1
|
|
21
|
+
return next_num
|
|
@@ -23,6 +23,13 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
|
23
23
|
from snowflake.snowpark_connect.utils.telemetry import telemetry
|
|
24
24
|
from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
|
|
25
25
|
|
|
26
|
+
SKIP_SESSION_CONFIGURATION = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def skip_session_configuration(skip: bool):
|
|
30
|
+
global SKIP_SESSION_CONFIGURATION
|
|
31
|
+
SKIP_SESSION_CONFIGURATION = skip
|
|
32
|
+
|
|
26
33
|
|
|
27
34
|
# Suppress experimental warnings from snowflake.snowpark logger
|
|
28
35
|
def _filter_experimental_warnings(record):
|
|
@@ -57,6 +64,8 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
57
64
|
global_config,
|
|
58
65
|
)
|
|
59
66
|
|
|
67
|
+
global SKIP_SESSION_CONFIGURATION
|
|
68
|
+
|
|
60
69
|
logger.info(f"Configuring session {session}")
|
|
61
70
|
|
|
62
71
|
telemetry.initialize(session)
|
|
@@ -85,6 +94,12 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
85
94
|
session.connection.arrow_number_to_decimal_setter = True
|
|
86
95
|
session.custom_package_usage_config["enabled"] = True
|
|
87
96
|
|
|
97
|
+
# Scoped temp objects may not be accessible in stored procedure and cause "object does not exist" error. So disable
|
|
98
|
+
# _use_scoped_temp_objects here and use temp table instead.
|
|
99
|
+
# Note that we also set PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS to false below to enforce the current Snowpark
|
|
100
|
+
# session does not use scoped temp objects too.
|
|
101
|
+
session._use_scoped_temp_objects = False
|
|
102
|
+
|
|
88
103
|
# Configure CTE optimization based on session configuration
|
|
89
104
|
cte_optimization_enabled = get_cte_optimization_enabled()
|
|
90
105
|
session.cte_optimization_enabled = cte_optimization_enabled
|
|
@@ -119,14 +134,22 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
119
134
|
"TIMEZONE": f"'{global_config.spark_sql_session_timeZone}'",
|
|
120
135
|
"QUOTED_IDENTIFIERS_IGNORE_CASE": "false",
|
|
121
136
|
"PYTHON_SNOWPARK_ENABLE_THREAD_SAFE_SESSION": "true",
|
|
122
|
-
|
|
137
|
+
# this is required for creating udfs from sproc and avoid "object does not exist" error
|
|
138
|
+
"PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false",
|
|
123
139
|
"ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
|
|
124
140
|
"QUERY_TAG": f"'{query_tag}'",
|
|
125
141
|
}
|
|
126
142
|
|
|
127
|
-
session.
|
|
128
|
-
|
|
129
|
-
|
|
143
|
+
# SNOW-2245971: Stored procedures inside Native Apps run as Execute As Owner and hence cannot set session params.
|
|
144
|
+
if not SKIP_SESSION_CONFIGURATION:
|
|
145
|
+
session.sql(
|
|
146
|
+
f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
|
|
147
|
+
).collect()
|
|
148
|
+
else:
|
|
149
|
+
session_param_names = ", ".join(session_params.keys())
|
|
150
|
+
logger.info(
|
|
151
|
+
f"Skipping Snowpark Connect session configuration as requested. Please make sure following session parameters are set correctly: {session_param_names}"
|
|
152
|
+
)
|
|
130
153
|
|
|
131
154
|
# Instrument the snowpark session to use a cache for describe queries.
|
|
132
155
|
instrument_session_for_describe_cache(session)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowpark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.33.0
|
|
4
4
|
Summary: Snowpark Connect for Spark
|
|
5
5
|
Author: Snowflake, Inc
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -9,11 +9,13 @@ Description-Content-Type: text/markdown
|
|
|
9
9
|
License-File: LICENSE.txt
|
|
10
10
|
License-File: LICENSE-binary
|
|
11
11
|
License-File: NOTICE-binary
|
|
12
|
+
Requires-Dist: snowpark-connect-deps-1==3.56.2
|
|
13
|
+
Requires-Dist: snowpark-connect-deps-2==3.56.2
|
|
12
14
|
Requires-Dist: certifi>=2025.1.31
|
|
13
15
|
Requires-Dist: cloudpickle
|
|
14
16
|
Requires-Dist: fsspec[http]
|
|
15
17
|
Requires-Dist: jpype1
|
|
16
|
-
Requires-Dist: protobuf<
|
|
18
|
+
Requires-Dist: protobuf<6.32.0,>=4.25.3
|
|
17
19
|
Requires-Dist: s3fs>=2025.3.0
|
|
18
20
|
Requires-Dist: snowflake.core<2,>=1.0.5
|
|
19
21
|
Requires-Dist: snowflake-snowpark-python[pandas]<1.41.0,==1.40.0
|
|
@@ -29,11 +31,14 @@ Requires-Dist: grpcio-status<1.63,>=1.56.0
|
|
|
29
31
|
Requires-Dist: googleapis-common-protos>=1.56.4
|
|
30
32
|
Requires-Dist: numpy<2,>=1.15
|
|
31
33
|
Requires-Dist: gcsfs>=2025.9.0
|
|
34
|
+
Provides-Extra: jdk
|
|
35
|
+
Requires-Dist: jdk4py==17.0.9.2; extra == "jdk"
|
|
32
36
|
Dynamic: author
|
|
33
37
|
Dynamic: description
|
|
34
38
|
Dynamic: description-content-type
|
|
35
39
|
Dynamic: license
|
|
36
40
|
Dynamic: license-file
|
|
41
|
+
Dynamic: provides-extra
|
|
37
42
|
Dynamic: requires-dist
|
|
38
43
|
Dynamic: requires-python
|
|
39
44
|
Dynamic: summary
|