snowpark-connect 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (35) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +3 -93
  2. snowflake/snowpark_connect/config.py +99 -1
  3. snowflake/snowpark_connect/dataframe_container.py +0 -6
  4. snowflake/snowpark_connect/expression/map_expression.py +22 -7
  5. snowflake/snowpark_connect/expression/map_sql_expression.py +22 -18
  6. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +4 -26
  7. snowflake/snowpark_connect/expression/map_unresolved_function.py +12 -3
  8. snowflake/snowpark_connect/expression/map_unresolved_star.py +2 -3
  9. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  10. snowflake/snowpark_connect/relation/map_extension.py +14 -10
  11. snowflake/snowpark_connect/relation/map_join.py +62 -258
  12. snowflake/snowpark_connect/relation/map_relation.py +5 -1
  13. snowflake/snowpark_connect/relation/map_sql.py +353 -16
  14. snowflake/snowpark_connect/relation/write/map_write.py +171 -110
  15. snowflake/snowpark_connect/resources_initializer.py +20 -5
  16. snowflake/snowpark_connect/server.py +16 -17
  17. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  18. snowflake/snowpark_connect/utils/describe_query_cache.py +57 -51
  19. snowflake/snowpark_connect/utils/identifiers.py +120 -0
  20. snowflake/snowpark_connect/utils/io_utils.py +21 -1
  21. snowflake/snowpark_connect/utils/scala_udf_utils.py +34 -43
  22. snowflake/snowpark_connect/utils/session.py +16 -26
  23. snowflake/snowpark_connect/utils/telemetry.py +53 -0
  24. snowflake/snowpark_connect/version.py +1 -1
  25. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/METADATA +2 -2
  26. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/RECORD +34 -35
  27. snowflake/snowpark_connect/hidden_column.py +0 -39
  28. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-connect +0 -0
  29. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-session +0 -0
  30. {snowpark_connect-0.27.0.data → snowpark_connect-0.28.0.data}/scripts/snowpark-submit +0 -0
  31. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/WHEEL +0 -0
  32. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE-binary +0 -0
  33. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/LICENSE.txt +0 -0
  34. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/licenses/NOTICE-binary +0 -0
  35. {snowpark_connect-0.27.0.dist-info → snowpark_connect-0.28.0.dist-info}/top_level.txt +0 -0
@@ -6,20 +6,24 @@ import hashlib
6
6
  import inspect
7
7
  import random
8
8
  import re
9
- import threading
10
9
  import time
11
10
  from typing import Any
12
11
 
13
12
  from snowflake import snowpark
14
13
  from snowflake.connector.cursor import ResultMetadataV2
15
14
  from snowflake.snowpark._internal.server_connection import ServerConnection
15
+ from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
16
16
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
17
17
  from snowflake.snowpark_connect.utils.telemetry import telemetry
18
18
 
19
19
  DESCRIBE_CACHE_TTL_SECONDS = 15
20
20
  USE_DESCRIBE_QUERY_CACHE = True
21
21
 
22
- DDL_DETECTION_PATTERN = re.compile(r"^\s*(CREATE|ALTER|DROP|RENAME)\b", re.IGNORECASE)
22
+ DDL_DETECTION_PATTERN = re.compile(r"\s*(CREATE|ALTER|DROP)\b", re.IGNORECASE)
23
+ PLAIN_CREATE_PATTERN = re.compile(
24
+ r"\s*CREATE\s+((LOCAL|GLOBAL)\s+)?(TRANSIENT\s+)?TABLE\b", re.IGNORECASE
25
+ )
26
+
23
27
  # Pattern for simple constant queries like: SELECT 3 :: INT AS "3-80000030-0" FROM ( SELECT $1 AS "__DUMMY" FROM VALUES (NULL :: STRING))
24
28
  # Using exact spacing pattern from generated SQL for deterministic matching
25
29
  # Column ID format: {original_name}-{8_digit_hex_plan_id}-{column_index}
@@ -32,8 +36,7 @@ SIMPLE_CONSTANT_PATTERN = re.compile(
32
36
 
33
37
  class DescribeQueryCache:
34
38
  def __init__(self) -> None:
35
- self._cache = {}
36
- self._lock = threading.Lock()
39
+ self._cache = SynchronizedDict()
37
40
 
38
41
  @staticmethod
39
42
  def _hash_query(sql_query: str) -> str:
@@ -48,49 +51,49 @@ class DescribeQueryCache:
48
51
  return sql_query
49
52
 
50
53
  def get(self, sql_query: str) -> list[ResultMetadataV2] | None:
54
+ telemetry.report_describe_query_cache_lookup()
55
+
51
56
  cache_key = self._get_cache_key(sql_query)
52
57
  key = self._hash_query(cache_key)
53
58
  current_time = time.monotonic()
54
59
 
55
- # TODO: maybe too much locking, we could use read-write lock also. Or a thread safe dictionary.
56
- with self._lock:
57
- if key in self._cache:
58
- result, timestamp = self._cache[key]
59
- if current_time < timestamp + DESCRIBE_CACHE_TTL_SECONDS:
60
- logger.debug(
61
- f"Returning query result from cache for query: {sql_query[:20]}"
62
- )
63
-
64
- # If this is a constant query, we need to transform the result metadata
65
- # to match the actual query's column name
66
- if (
67
- cache_key != sql_query
68
- ): # Only transform if we normalized the key
69
- match = SIMPLE_CONSTANT_PATTERN.match(sql_query)
70
- if match:
71
- number, column_id = match.groups()
72
- expected_column_name = column_id
73
-
74
- # Transform the cached result to match this query's column name
75
- # There should only be one column in these constant queries
76
- metadata = result[0]
77
- new_metadata = ResultMetadataV2(
78
- name=expected_column_name,
79
- type_code=metadata.type_code,
80
- display_size=metadata.display_size,
81
- internal_size=metadata.internal_size,
82
- precision=metadata.precision,
83
- scale=metadata.scale,
84
- is_nullable=metadata.is_nullable,
85
- )
86
- return [new_metadata]
87
-
88
- return result
89
- else:
90
- logger.debug(
91
- f"Had a cached entry, but it expired for query: {sql_query[:20]}"
92
- )
93
- del self._cache[key]
60
+ if key in self._cache:
61
+ result, timestamp = self._cache[key]
62
+ if current_time < timestamp + DESCRIBE_CACHE_TTL_SECONDS:
63
+ logger.debug(
64
+ f"Returning query result from cache for query: {sql_query[:20]}"
65
+ )
66
+ self._cache[key] = (result, current_time)
67
+
68
+ # If this is a constant query, we need to transform the result metadata
69
+ # to match the actual query's column name
70
+ if cache_key != sql_query: # Only transform if we normalized the key
71
+ match = SIMPLE_CONSTANT_PATTERN.match(sql_query)
72
+ if match:
73
+ number, column_id = match.groups()
74
+ expected_column_name = column_id
75
+
76
+ # Transform the cached result to match this query's column name
77
+ # There should only be one column in these constant queries
78
+ metadata = result[0]
79
+ new_metadata = ResultMetadataV2(
80
+ name=expected_column_name,
81
+ type_code=metadata.type_code,
82
+ display_size=metadata.display_size,
83
+ internal_size=metadata.internal_size,
84
+ precision=metadata.precision,
85
+ scale=metadata.scale,
86
+ is_nullable=metadata.is_nullable,
87
+ )
88
+
89
+ telemetry.report_describe_query_cache_hit()
90
+ return [new_metadata]
91
+
92
+ telemetry.report_describe_query_cache_hit()
93
+ return result
94
+ else:
95
+ telemetry.report_describe_query_cache_expired()
96
+ del self._cache[key]
94
97
  return None
95
98
 
96
99
  def put(self, sql_query: str, result: list[ResultMetadataV2] | None) -> None:
@@ -102,12 +105,18 @@ class DescribeQueryCache:
102
105
 
103
106
  logger.debug(f"Putting query into cache: {sql_query[:50]}...")
104
107
 
105
- with self._lock:
106
- self._cache[key] = (result, time.monotonic())
108
+ self._cache[key] = (result, time.monotonic())
107
109
 
108
110
  def clear(self) -> None:
109
- with self._lock:
110
- self._cache.clear()
111
+ self._cache.clear()
112
+
113
+ def update_cache_for_query(self, query: str) -> None:
114
+ # Clear cache for DDL operations that modify existing objects (exclude CREATE TABLE)
115
+ if DDL_DETECTION_PATTERN.search(query) and not PLAIN_CREATE_PATTERN.search(
116
+ query
117
+ ):
118
+ self.clear()
119
+ telemetry.report_describe_query_cache_clear(query[:100])
111
120
 
112
121
 
113
122
  def instrument_session_for_describe_cache(session: snowpark.Session):
@@ -126,10 +135,7 @@ def instrument_session_for_describe_cache(session: snowpark.Session):
126
135
  if isinstance(cache_instance, DescribeQueryCache):
127
136
  cache = cache_instance
128
137
 
129
- # TODO: This is very broad right now. We should be able to reduce the scope of clearing.
130
- if DDL_DETECTION_PATTERN.search(query):
131
- logger.debug(f"DDL detected, clearing describe query cache: '{query}'")
132
- cache.clear()
138
+ cache.update_cache_for_query(query)
133
139
 
134
140
  def wrap_execute(wrapped_fn):
135
141
  def fn(query: str, **kwargs):
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
  import re
5
+ from typing import Any, TypeVar
5
6
 
6
7
  from pyspark.errors import AnalysisException
7
8
 
@@ -117,3 +118,122 @@ def split_fully_qualified_spark_name(qualified_name: str | None) -> list[str]:
117
118
  parts.append("".join(token_chars))
118
119
 
119
120
  return parts
121
+
122
+
123
+ # See https://docs.snowflake.com/en/sql-reference/identifiers-syntax for identifier syntax
124
+ UNQUOTED_IDENTIFIER_REGEX = r"([a-zA-Z_])([a-zA-Z0-9_$]{0,254})"
125
+ QUOTED_IDENTIFIER_REGEX = r'"((""|[^"]){0,255})"'
126
+ VALID_IDENTIFIER_REGEX = f"(?:{UNQUOTED_IDENTIFIER_REGEX}|{QUOTED_IDENTIFIER_REGEX})"
127
+
128
+
129
+ Self = TypeVar("Self", bound="FQN")
130
+
131
+
132
+ class FQN:
133
+ """Represents an object identifier, supporting fully qualified names.
134
+
135
+ The instance supports builder pattern that allows updating the identifier with database and
136
+ schema from different sources.
137
+
138
+ Examples
139
+ ________
140
+ >>> fqn = FQN.from_string("my_schema.object").using_connection(conn)
141
+
142
+ >>> fqn = FQN.from_string("my_name").set_database("db").set_schema("foo")
143
+ """
144
+
145
+ def __init__(
146
+ self,
147
+ database: str | None,
148
+ schema: str | None,
149
+ name: str,
150
+ signature: str | None = None,
151
+ ) -> None:
152
+ self._database = database
153
+ self._schema = schema
154
+ self._name = name
155
+ self.signature = signature
156
+
157
+ @property
158
+ def database(self) -> str | None:
159
+ return self._database
160
+
161
+ @property
162
+ def schema(self) -> str | None:
163
+ return self._schema
164
+
165
+ @property
166
+ def name(self) -> str:
167
+ return self._name
168
+
169
+ @property
170
+ def prefix(self) -> str:
171
+ if self.database:
172
+ return f"{self.database}.{self.schema if self.schema else 'PUBLIC'}"
173
+ if self.schema:
174
+ return f"{self.schema}"
175
+ return ""
176
+
177
+ @property
178
+ def identifier(self) -> str:
179
+ if self.prefix:
180
+ return f"{self.prefix}.{self.name}"
181
+ return self.name
182
+
183
+ def __str__(self) -> str:
184
+ return self.identifier
185
+
186
+ def __eq__(self, other: Any) -> bool:
187
+ if not isinstance(other, FQN):
188
+ raise AnalysisException(f"{other} is not a valid FQN")
189
+ return self.identifier == other.identifier
190
+
191
+ @classmethod
192
+ def from_string(cls, identifier: str) -> Self:
193
+ """Take in an object name in the form [[database.]schema.]name and return a new :class:`FQN` instance.
194
+
195
+ Raises:
196
+ InvalidIdentifierError: If the object identifier does not meet identifier requirements.
197
+ """
198
+ qualifier_pattern = (
199
+ rf"(?:(?P<first_qualifier>{VALID_IDENTIFIER_REGEX})\.)?"
200
+ rf"(?:(?P<second_qualifier>{VALID_IDENTIFIER_REGEX})\.)?"
201
+ rf"(?P<name>{VALID_IDENTIFIER_REGEX})(?P<signature>\(.*\))?"
202
+ )
203
+ result = re.fullmatch(qualifier_pattern, identifier)
204
+
205
+ if result is None:
206
+ raise AnalysisException(f"{identifier} is not a valid identifier")
207
+
208
+ unqualified_name = result.group("name")
209
+ if result.group("second_qualifier") is not None:
210
+ database = result.group("first_qualifier")
211
+ schema = result.group("second_qualifier")
212
+ else:
213
+ database = None
214
+ schema = result.group("first_qualifier")
215
+
216
+ signature = None
217
+ if result.group("signature"):
218
+ signature = result.group("signature")
219
+ return cls(
220
+ name=unqualified_name, schema=schema, database=database, signature=signature
221
+ )
222
+
223
+ def set_database(self, database: str | None) -> Self:
224
+ if database:
225
+ self._database = database
226
+ return self
227
+
228
+ def set_schema(self, schema: str | None) -> Self:
229
+ if schema:
230
+ self._schema = schema
231
+ return self
232
+
233
+ def set_name(self, name: str) -> Self:
234
+ self._name = name
235
+ return self
236
+
237
+ def to_dict(self) -> dict[str, str | None]:
238
+ """Return the dictionary representation of the instance."""
239
+ return {"name": self.name, "schema": self.schema, "database": self.database}
@@ -1,10 +1,11 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
4
+ import contextlib
5
5
  import functools
6
6
 
7
7
  from snowflake.snowpark import Session
8
+ from snowflake.snowpark_connect.utils.identifiers import FQN
8
9
 
9
10
 
10
11
  @functools.cache
@@ -33,3 +34,22 @@ def file_format(
33
34
  ).collect()
34
35
 
35
36
  return file_format_name
37
+
38
+
39
+ def get_table_type(
40
+ snowpark_table_name: str,
41
+ snowpark_session: Session,
42
+ ) -> str:
43
+ fqn = FQN.from_string(snowpark_table_name)
44
+ with contextlib.suppress(Exception):
45
+ if fqn.database is not None:
46
+ return snowpark_session.catalog.getTable(
47
+ table_name=fqn.name, schema=fqn.schema, database=fqn.database
48
+ ).table_type
49
+ elif fqn.schema is not None:
50
+ return snowpark_session.catalog.getTable(
51
+ table_name=fqn.name, schema=fqn.schema
52
+ ).table_type
53
+ else:
54
+ return snowpark_session.catalog.getTable(table_name=fqn.name).table_type
55
+ return "TABLE"
@@ -171,12 +171,19 @@ class ScalaUDFDef:
171
171
  is_map_return = udf_func_return_type.startswith("Map")
172
172
  wrapper_return_type = "String" if is_map_return else udf_func_return_type
173
173
 
174
+ # For handling Seq type correctly, ensure that the wrapper function always uses Array as its input and
175
+ # return types (when required) and the wrapped function uses Seq.
176
+ udf_func_return_type = udf_func_return_type.replace("Array", "Seq")
177
+ is_seq_return = udf_func_return_type.startswith("Seq")
178
+
174
179
  # Need to call the map to JSON string converter when a map is returned by the user's function.
175
- invoke_udf_func = (
176
- f"write(func({invocation_args}))"
177
- if is_map_return
178
- else f"func({invocation_args})"
179
- )
180
+ if is_map_return:
181
+ invoke_udf_func = f"write(func({invocation_args}))"
182
+ elif is_seq_return:
183
+ # TODO: SNOW-2339385 Handle Array[T] return types correctly. Currently, only Seq[T] is supported.
184
+ invoke_udf_func = f"func({invocation_args}).toArray"
185
+ else:
186
+ invoke_udf_func = f"func({invocation_args})"
180
187
 
181
188
  # The lines of code below are required only when a Map is returned by the UDF. This is needed to serialize the
182
189
  # map output to a JSON string.
@@ -184,9 +191,9 @@ class ScalaUDFDef:
184
191
  ""
185
192
  if not is_map_return
186
193
  else """
187
- import org.json4s._
188
- import org.json4s.native.Serialization._
189
- import org.json4s.native.Serialization
194
+ import shaded_json4s._
195
+ import shaded_json4s.native.Serialization._
196
+ import shaded_json4s.native.Serialization
190
197
  """
191
198
  )
192
199
  map_return_formatter = (
@@ -199,22 +206,12 @@ import org.json4s.native.Serialization
199
206
 
200
207
  return f"""import org.apache.spark.sql.connect.common.UdfPacket
201
208
  {map_return_imports}
202
- import java.io.{{ByteArrayInputStream, ObjectInputStream}}
203
- import java.nio.file.{{Files, Paths}}
209
+ import com.snowflake.sas.scala.Utils
204
210
 
205
211
  object __RecreatedSparkUdf {{
206
212
  {map_return_formatter}
207
- private lazy val func: ({udf_func_input_types}) => {udf_func_return_type} = {{
208
- val importDirectory = System.getProperty("com.snowflake.import_directory")
209
- val fPath = importDirectory + "{self.name}.bin"
210
- val bytes = Files.readAllBytes(Paths.get(fPath))
211
- val ois = new ObjectInputStream(new ByteArrayInputStream(bytes))
212
- try {{
213
- ois.readObject().asInstanceOf[UdfPacket].function.asInstanceOf[({udf_func_input_types}) => {udf_func_return_type}]
214
- }} finally {{
215
- ois.close()
216
- }}
217
- }}
213
+ private lazy val func: ({udf_func_input_types}) => {udf_func_return_type} =
214
+ Utils.deserializeFunc("{self.name}.bin").asInstanceOf[({udf_func_input_types}) => {udf_func_return_type}]
218
215
 
219
216
  def __wrapperFunc({wrapper_arg_and_input_types_str}): {wrapper_return_type} = {{
220
217
  {invoke_udf_func}
@@ -299,29 +296,15 @@ def build_scala_udf_imports(session, payload, udf_name, is_map_return) -> List[s
299
296
  # Remove the stage path since it is not properly formatted.
300
297
  user_jars.append(row[0][row[0].find("/") :])
301
298
 
302
- # Jars used when the return type is a Map.
303
- map_jars = (
304
- []
305
- if not is_map_return
306
- else [
307
- f"{stage_resource_path}/json4s-core_2.12-3.7.0-M11.jar",
308
- f"{stage_resource_path}/json4s-native_2.12-3.7.0-M11.jar",
309
- f"{stage_resource_path}/paranamer-2.8.3.jar",
310
- ]
311
- )
312
-
313
299
  # Format the user jars to be used in the IMPORTS clause of the stored procedure.
314
- return (
315
- [
316
- closure_binary_file,
317
- f"{stage_resource_path}/spark-connect-client-jvm_2.12-3.5.6.jar",
318
- f"{stage_resource_path}/spark-common-utils_2.12-3.5.6.jar",
319
- f"{stage_resource_path}/spark-sql_2.12-3.5.6.jar",
320
- f"{stage_resource_path}/json4s-ast_2.12-3.7.0-M11.jar",
321
- ]
322
- + map_jars
323
- + [f"{stage + jar}" for jar in user_jars]
324
- )
300
+ return [
301
+ closure_binary_file,
302
+ f"{stage_resource_path}/spark-connect-client-jvm_2.12-3.5.6.jar",
303
+ f"{stage_resource_path}/spark-common-utils_2.12-3.5.6.jar",
304
+ f"{stage_resource_path}/spark-sql_2.12-3.5.6.jar",
305
+ f"{stage_resource_path}/json4s-ast_2.12-3.7.0-M11.jar",
306
+ f"{stage_resource_path}/sas-scala-udf_2.12-0.1.0.jar",
307
+ ] + [f"{stage + jar}" for jar in user_jars]
325
308
 
326
309
 
327
310
  def create_scala_udf(pciudf: ProcessCommonInlineUserDefinedFunction) -> ScalaUdf:
@@ -343,6 +326,14 @@ def create_scala_udf(pciudf: ProcessCommonInlineUserDefinedFunction) -> ScalaUdf
343
326
  Returns:
344
327
  A ScalaUdf object representing the created or cached Scala UDF.
345
328
  """
329
+ from snowflake.snowpark_connect.resources_initializer import (
330
+ wait_for_resource_initialization,
331
+ )
332
+
333
+ # Make sure that the resource initializer thread is completed before creating Scala UDFs since we depend on the jars
334
+ # uploaded by it.
335
+ wait_for_resource_initialization()
336
+
346
337
  from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
347
338
 
348
339
  function_name = pciudf._function_name
@@ -8,7 +8,7 @@ from collections.abc import Sequence
8
8
  from typing import Any
9
9
 
10
10
  from snowflake import snowpark
11
- from snowflake.snowpark.exceptions import SnowparkClientException, SnowparkSQLException
11
+ from snowflake.snowpark.exceptions import SnowparkClientException
12
12
  from snowflake.snowpark.session import _get_active_session
13
13
  from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
14
14
  from snowflake.snowpark_connect.utils.describe_query_cache import (
@@ -50,7 +50,10 @@ def _get_current_snowpark_session() -> snowpark.Session | None:
50
50
 
51
51
  def configure_snowpark_session(session: snowpark.Session):
52
52
  """Configure a snowpark session with required parameters and settings."""
53
- from snowflake.snowpark_connect.config import global_config
53
+ from snowflake.snowpark_connect.config import (
54
+ get_cte_optimization_enabled,
55
+ global_config,
56
+ )
54
57
 
55
58
  logger.info(f"Configuring session {session}")
56
59
 
@@ -77,6 +80,14 @@ def configure_snowpark_session(session: snowpark.Session):
77
80
  session.connection.arrow_number_to_decimal_setter = True
78
81
  session.custom_package_usage_config["enabled"] = True
79
82
 
83
+ # Configure CTE optimization based on session configuration
84
+ cte_optimization_enabled = get_cte_optimization_enabled()
85
+ session.cte_optimization_enabled = cte_optimization_enabled
86
+ logger.info(f"CTE optimization enabled: {cte_optimization_enabled}")
87
+
88
+ # Default query tag to be used unless overridden by user using AppName or spark.addTag()
89
+ query_tag = "SNOWPARK_CONNECT_QUERY"
90
+
80
91
  default_fallback_timezone = "UTC"
81
92
  if global_config.spark_sql_session_timeZone is None:
82
93
  try:
@@ -104,35 +115,14 @@ def configure_snowpark_session(session: snowpark.Session):
104
115
  "QUOTED_IDENTIFIERS_IGNORE_CASE": "false",
105
116
  "PYTHON_SNOWPARK_ENABLE_THREAD_SAFE_SESSION": "true",
106
117
  "PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false", # this is required for creating udfs from sproc
118
+ "ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
119
+ "QUERY_TAG": f"'{query_tag}'",
107
120
  }
108
121
 
109
122
  session.sql(
110
123
  f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
111
124
  ).collect()
112
125
 
113
- # Rolling ahead in preparation of GS release 9.22 (ETA 8/5/2025). Once 9.22 is past rollback risk, merge this
114
- # parameter with other in the session_params dictionary above
115
- try:
116
- session.sql(
117
- "ALTER SESSION SET ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE=true"
118
- ).collect()
119
- except SnowparkSQLException:
120
- logger.debug(
121
- "ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE is not defined"
122
- )
123
- try:
124
- session.sql(
125
- "ALTER SESSION SET ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT=true"
126
- ).collect()
127
- except SnowparkSQLException:
128
- logger.debug("ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT is not defined")
129
- try:
130
- session.sql(
131
- "ALTER SESSION SET ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE=true"
132
- ).collect()
133
- except SnowparkSQLException:
134
- logger.debug("ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE is not defined")
135
-
136
126
  # Instrument the snowpark session to use a cache for describe queries.
137
127
  instrument_session_for_describe_cache(session)
138
128
 
@@ -204,5 +194,5 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
204
194
  snowpark_session = get_or_create_snowpark_session()
205
195
  spark_tags_str = ",".join(sorted(spark_tags)) if spark_tags else None
206
196
 
207
- if spark_tags_str != snowpark_session.query_tag:
197
+ if spark_tags_str and spark_tags_str != snowpark_session.query_tag:
208
198
  snowpark_session.query_tag = spark_tags_str
@@ -88,6 +88,7 @@ RECORDED_CONFIG_KEYS = {
88
88
  "spark.sql.session.localRelationCacheThreshold",
89
89
  "spark.sql.mapKeyDedupPolicy",
90
90
  "snowpark.connect.sql.passthrough",
91
+ "snowpark.connect.cte.optimization_enabled",
91
92
  "snowpark.connect.iceberg.external_volume",
92
93
  "snowpark.connect.sql.identifiers.auto-uppercase",
93
94
  "snowpark.connect.udtf.compatibility_mode",
@@ -426,6 +427,58 @@ class Telemetry:
426
427
 
427
428
  summary["internal_queries"] += 1
428
429
 
430
+ @safe
431
+ def report_describe_query_cache_lookup(self):
432
+ """Report a describe query cache lookup."""
433
+ if self._not_in_request():
434
+ return
435
+
436
+ summary = self._request_summary.get()
437
+
438
+ if "describe_cache_lookups" not in summary:
439
+ summary["describe_cache_lookups"] = 0
440
+
441
+ summary["describe_cache_lookups"] += 1
442
+
443
+ @safe
444
+ def report_describe_query_cache_hit(self):
445
+ """Report a describe query cache hit."""
446
+ if self._not_in_request():
447
+ return
448
+
449
+ summary = self._request_summary.get()
450
+
451
+ if "describe_cache_hits" not in summary:
452
+ summary["describe_cache_hits"] = 0
453
+
454
+ summary["describe_cache_hits"] += 1
455
+
456
+ @safe
457
+ def report_describe_query_cache_expired(self):
458
+ """Report a describe query cache hit."""
459
+ if self._not_in_request():
460
+ return
461
+
462
+ summary = self._request_summary.get()
463
+
464
+ if "describe_cache_expired" not in summary:
465
+ summary["describe_cache_expired"] = 0
466
+
467
+ summary["describe_cache_expired"] += 1
468
+
469
+ @safe
470
+ def report_describe_query_cache_clear(self, query_prefix: str):
471
+ """Report a describe query cache clear."""
472
+ if self._not_in_request():
473
+ return
474
+
475
+ summary = self._request_summary.get()
476
+
477
+ if "describe_cache_clears" not in summary:
478
+ summary["describe_cache_clears"] = []
479
+
480
+ summary["describe_cache_clears"].append(query_prefix)
481
+
429
482
  @safe
430
483
  def report_udf_usage(self, udf_name: str):
431
484
  if self._not_in_request():
@@ -2,4 +2,4 @@
2
2
  #
3
3
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
4
4
  #
5
- VERSION = (0,27,0)
5
+ VERSION = (0,28,0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-connect
3
- Version: 0.27.0
3
+ Version: 0.28.0
4
4
  Summary: Snowpark Connect for Spark
5
5
  Author: Snowflake, Inc
6
6
  License: Apache License, Version 2.0
@@ -16,7 +16,7 @@ Requires-Dist: jpype1
16
16
  Requires-Dist: protobuf<5.0,>=4.25.3
17
17
  Requires-Dist: s3fs>=2025.3.0
18
18
  Requires-Dist: snowflake.core<2,>=1.0.5
19
- Requires-Dist: snowflake-snowpark-python[pandas]<1.39.0,==1.38.0
19
+ Requires-Dist: snowflake-snowpark-python[pandas]<1.40.0,==1.39.0
20
20
  Requires-Dist: sqlglot>=26.3.8
21
21
  Requires-Dist: jaydebeapi
22
22
  Requires-Dist: aiobotocore~=2.23.0