snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,43 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
10
+ quote_name_without_upper_casing,
11
+ )
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ColumnQualifier:
16
+ parts: tuple[str, ...]
17
+
18
+ def __post_init__(self) -> None:
19
+ if not all(isinstance(x, str) for x in self.parts):
20
+ raise TypeError("ColumnQualifier.parts must be strings")
21
+
22
+ @property
23
+ def is_empty(self) -> bool:
24
+ return len(self.parts) == 0
25
+
26
+ def all_qualified_names(self, name: str) -> list[str]:
27
+ qualifier_parts = self.parts
28
+ qualifier_prefixes = [
29
+ ".".join(quote_name_without_upper_casing(x) for x in qualifier_parts[i:])
30
+ for i in range(len(qualifier_parts))
31
+ ]
32
+ return [f"{prefix}.{name}" for prefix in qualifier_prefixes]
33
+
34
+ def to_upper(self):
35
+ return ColumnQualifier(tuple(part.upper() for part in self.parts))
36
+
37
+ def matches(self, target: ColumnQualifier) -> bool:
38
+ if self.is_empty or target.is_empty:
39
+ return False
40
+ # If the column has fewer qualifiers than the target, it cannot match
41
+ if len(self.parts) < len(target.parts):
42
+ return False
43
+ return self.parts[-len(target.parts) :] == target.parts
@@ -8,7 +8,7 @@ import re
8
8
  import sys
9
9
  from collections import defaultdict
10
10
  from copy import copy, deepcopy
11
- from typing import Any
11
+ from typing import Any, Dict, Optional
12
12
 
13
13
  import jpype
14
14
  import pyspark.sql.connect.proto.base_pb2 as proto_base
@@ -17,11 +17,18 @@ from tzlocal import get_localzone_name
17
17
  from snowflake import snowpark
18
18
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
19
19
  quote_name_without_upper_casing,
20
+ unquote_if_quoted,
20
21
  )
21
22
  from snowflake.snowpark.exceptions import SnowparkSQLException
22
23
  from snowflake.snowpark.types import TimestampTimeZone, TimestampType
24
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
25
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
26
+ from snowflake.snowpark_connect.type_support import set_integral_types_conversion
23
27
  from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
24
- from snowflake.snowpark_connect.utils.context import get_session_id
28
+ from snowflake.snowpark_connect.utils.context import (
29
+ get_jpype_jclass_lock,
30
+ get_spark_session_id,
31
+ )
25
32
  from snowflake.snowpark_connect.utils.external_udxf_cache import (
26
33
  clear_external_udxf_cache,
27
34
  )
@@ -139,9 +146,30 @@ class GlobalConfig:
139
146
  "spark.sql.parser.quotedRegexColumnNames": "false",
140
147
  # custom configs
141
148
  "snowpark.connect.version": ".".join(map(str, sas_version)),
149
+ "snowpark.connect.temporary.views.create_in_snowflake": "false",
142
150
  # Control whether repartition(n) on a DataFrame forces splitting into n files during writes
143
151
  # This matches spark behavior more closely, but introduces overhead.
144
152
  "snowflake.repartition.for.writes": "false",
153
+ "snowpark.connect.structured_types.fix": "true",
154
+ # Local relation optimization: Use List[Row] for small data, PyArrow for large data
155
+ # Enabled in production by default to improve performance for createDataFrame on small local relations.
156
+ # Disabled in tests by default unless explicitly enabled to stabilize flaky tests that are not applying row ordering.
157
+ # SNOW-2719980: Remove this flag after test fragility issues are resolved
158
+ "snowpark.connect.localRelation.optimizeSmallData": "true",
159
+ "spark.sql.execution.arrow.maxRecordsPerBatch": "10000", # TODO: no-op
160
+ # USE_VECTORIZED_SCANNER will become the default in a future BCR; Snowflake recommends setting it to TRUE for new workloads.
161
+ # This significantly reduces latency for loading Parquet files by downloading only relevant columnar sections into memory.
162
+ "snowpark.connect.parquet.useVectorizedScanner": "true",
163
+ # USE_LOGICAL_TYPE enables proper handling of Parquet logical types (TIMESTAMP, DATE, DECIMAL).
164
+ # Without useLogicalType set to "true", Parquet TIMESTAMP (INT64 physical) is incorrectly read as NUMBER(38,0).
165
+ "snowpark.connect.parquet.useLogicalType": "false",
166
+ "spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue": "false",
167
+ "spark.sql.parquet.outputTimestampType": "TIMESTAMP_MILLIS",
168
+ "snowpark.connect.handleIntegralOverflow": "false",
169
+ "snowpark.connect.scala.version": "2.12",
170
+ # Control whether to convert decimal - to integral types and vice versa: DecimalType(p,0) <-> ByteType/ShortType/IntegerType/LongType
171
+ # Values: "client_default" (behavior based on client type), "enabled", "disabled"
172
+ "snowpark.connect.integralTypesEmulation": "client_default",
145
173
  }
146
174
 
147
175
  boolean_config_list = [
@@ -150,11 +178,16 @@ class GlobalConfig:
150
178
  "spark.sql.repl.eagerEval.enabled",
151
179
  "spark.sql.crossJoin.enabled",
152
180
  "spark.sql.caseSensitive",
181
+ "snowpark.connect.localRelation.optimizeSmallData",
182
+ "snowpark.connect.parquet.useVectorizedScanner",
183
+ "snowpark.connect.parquet.useLogicalType",
153
184
  "spark.sql.ansi.enabled",
154
185
  "spark.sql.legacy.allowHashOnMapType",
155
186
  "spark.Catalog.databaseFilterInformationSchema",
156
187
  "spark.sql.parser.quotedRegexColumnNames",
157
188
  "snowflake.repartition.for.writes",
189
+ "spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue",
190
+ "snowpark.connect.handleIntegralOverflow",
158
191
  ]
159
192
 
160
193
  int_config_list = [
@@ -171,8 +204,15 @@ class GlobalConfig:
171
204
  "spark.app.name": lambda session, name: setattr(
172
205
  session, "query_tag", f"Spark-Connect-App-Name={name}"
173
206
  ),
207
+ # TODO SNOW-2896871: Remove with version 1.10.0
174
208
  "snowpark.connect.udf.imports": lambda session, imports: parse_imports(
175
- session, imports
209
+ session, imports, "python"
210
+ ),
211
+ "snowpark.connect.udf.python.imports": lambda session, imports: parse_imports(
212
+ session, imports, "python"
213
+ ),
214
+ "snowpark.connect.udf.java.imports": lambda session, imports: parse_imports(
215
+ session, imports, "java"
176
216
  ),
177
217
  }
178
218
 
@@ -257,21 +297,34 @@ SESSION_CONFIG_KEY_WHITELIST = {
257
297
  "spark.sql.execution.pythonUDTF.arrow.enabled",
258
298
  "spark.sql.tvf.allowMultipleTableArguments.enabled",
259
299
  "snowpark.connect.sql.passthrough",
300
+ "snowpark.connect.cte.optimization_enabled",
260
301
  "snowpark.connect.iceberg.external_volume",
261
302
  "snowpark.connect.sql.identifiers.auto-uppercase",
303
+ "snowpark.connect.sql.partition.external_table_location",
262
304
  "snowpark.connect.udtf.compatibility_mode",
263
305
  "snowpark.connect.views.duplicate_column_names_handling_mode",
264
- "enable_snowflake_extension_behavior",
306
+ "snowpark.connect.temporary.views.create_in_snowflake",
307
+ "snowpark.connect.enable_snowflake_extension_behavior",
308
+ "spark.hadoop.fs.s3a.server-side-encryption.key",
309
+ "spark.hadoop.fs.s3a.assumed.role.arn",
310
+ "snowpark.connect.describe_cache_ttl_seconds",
311
+ "mapreduce.fileoutputcommitter.marksuccessfuljobs",
312
+ "spark.sql.parquet.enable.summary-metadata",
313
+ "parquet.enable.summary-metadata",
265
314
  }
266
- AZURE_SAS_KEY = re.compile(
315
+ AZURE_ACCOUNT_KEY = re.compile(
267
316
  r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
268
317
  )
318
+ AZURE_SAS_KEY = re.compile(
319
+ r"^fs\.azure\.sas\.fixed\.token\.[^\.]+\.dfs\.core\.windows\.net$"
320
+ )
269
321
 
270
322
 
271
323
  def valid_session_config_key(key: str):
272
324
  return (
273
325
  key in SESSION_CONFIG_KEY_WHITELIST # AWS session keys
274
326
  or AZURE_SAS_KEY.match(key) # Azure session keys
327
+ or AZURE_ACCOUNT_KEY.match(key) # Azure account keys
275
328
  )
276
329
 
277
330
 
@@ -279,17 +332,23 @@ class SessionConfig:
279
332
  """This class contains the session configuration for the Spark Server."""
280
333
 
281
334
  default_session_config = {
282
- "snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
283
335
  "snowpark.connect.sql.passthrough": "false",
336
+ "snowpark.connect.cte.optimization_enabled": "false",
284
337
  "snowpark.connect.udtf.compatibility_mode": "false",
285
338
  "snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
286
339
  "spark.sql.execution.pythonUDTF.arrow.enabled": "false",
287
340
  "spark.sql.tvf.allowMultipleTableArguments.enabled": "true",
288
- "enable_snowflake_extension_behavior": "false",
341
+ "snowpark.connect.enable_snowflake_extension_behavior": "false",
342
+ "snowpark.connect.describe_cache_ttl_seconds": "300",
343
+ "snowpark.connect.sql.partition.external_table_location": None,
344
+ "mapreduce.fileoutputcommitter.marksuccessfuljobs": "false",
345
+ "spark.sql.parquet.enable.summary-metadata": "false",
346
+ "parquet.enable.summary-metadata": "false",
289
347
  }
290
348
 
291
349
  def __init__(self) -> None:
292
350
  self.config = deepcopy(self.default_session_config)
351
+ self.table_metadata: Dict[str, Dict[str, Any]] = {}
293
352
 
294
353
  def __getitem__(self, item: str) -> str:
295
354
  return self.get(item)
@@ -319,6 +378,11 @@ CONFIG_ALLOWED_VALUES: dict[str, tuple] = {
319
378
  "all",
320
379
  "none",
321
380
  ),
381
+ "snowpark.connect.integralTypesEmulation": (
382
+ "client_default",
383
+ "enabled",
384
+ "disabled",
385
+ ),
322
386
  }
323
387
 
324
388
  # Set some default configuration that are necessary for the driver.
@@ -344,9 +408,11 @@ def route_config_proto(
344
408
  if not pair.HasField("value"):
345
409
  from pyspark.errors import IllegalArgumentException
346
410
 
347
- raise IllegalArgumentException(
411
+ exception = IllegalArgumentException(
348
412
  f"Cannot set config '{pair.key}' to None"
349
413
  )
414
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
415
+ raise exception
350
416
 
351
417
  set_config_param(
352
418
  config.session_id, pair.key, pair.value, snowpark_session
@@ -429,7 +495,11 @@ def route_config_proto(
429
495
  pair.value = str(global_config.is_modifiable(key)).lower()
430
496
  return res
431
497
  case _:
432
- raise SnowparkConnectNotImplementedError(f"Unexpected request {config}")
498
+ exception = SnowparkConnectNotImplementedError(
499
+ f"Unexpected request {config}"
500
+ )
501
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
502
+ raise exception
433
503
 
434
504
 
435
505
  def set_config_param(
@@ -469,19 +539,27 @@ def _verify_static_config_not_modified(key: str) -> None:
469
539
  # https://github.com/apache/spark/blob/v3.5.3/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala#L161
470
540
  # Spark does not allow to modify static configurations at runtime.
471
541
  if global_config.is_static_config(key) and global_config.is_set(key):
472
- raise ValueError(f"Cannot modify the value of a static config: {key}")
542
+ exception = ValueError(f"Cannot modify the value of a static config: {key}")
543
+ attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
544
+ raise exception
473
545
 
474
546
 
475
547
  def _verify_is_valid_config_value(key: str, value: Any) -> None:
476
548
  if key in CONFIG_ALLOWED_VALUES and value not in CONFIG_ALLOWED_VALUES[key]:
477
- raise ValueError(
549
+ exception = ValueError(
478
550
  f"Invalid value '{value}' for key '{key}'. Allowed values: {', '.join(CONFIG_ALLOWED_VALUES[key])}."
479
551
  )
552
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
553
+ raise exception
480
554
 
481
555
 
482
556
  def _verify_is_not_readonly_config(key):
483
557
  if key in global_config.readonly_config_list:
484
- raise ValueError(f"Config with key {key} is read-only and cannot be modified.")
558
+ exception = ValueError(
559
+ f"Config with key {key} is read-only and cannot be modified."
560
+ )
561
+ attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
562
+ raise exception
485
563
 
486
564
 
487
565
  def set_jvm_timezone(timezone_id: str):
@@ -498,10 +576,13 @@ def set_jvm_timezone(timezone_id: str):
498
576
  RuntimeError: If JVM is not started
499
577
  """
500
578
  if not jpype.isJVMStarted():
501
- raise RuntimeError("JVM must be started before setting timezone")
579
+ exception = RuntimeError("JVM must be started before setting timezone")
580
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
581
+ raise exception
502
582
 
503
583
  try:
504
- TimeZone = jpype.JClass("java.util.TimeZone")
584
+ with get_jpype_jclass_lock():
585
+ TimeZone = jpype.JClass("java.util.TimeZone")
505
586
  new_timezone = TimeZone.getTimeZone(timezone_id)
506
587
  TimeZone.setDefault(new_timezone)
507
588
 
@@ -513,7 +594,9 @@ def set_jvm_timezone(timezone_id: str):
513
594
  def reset_jvm_timezone_to_system_default():
514
595
  """Reset JVM timezone to the system's default timezone"""
515
596
  if not jpype.isJVMStarted():
516
- raise RuntimeError("JVM must be started first")
597
+ exception = RuntimeError("JVM must be started first")
598
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
599
+ raise exception
517
600
 
518
601
  try:
519
602
  TimeZone = jpype.JClass("java.util.TimeZone")
@@ -522,9 +605,13 @@ def reset_jvm_timezone_to_system_default():
522
605
  f"Reset JVM timezone to system default: {TimeZone.getDefault().getID()}"
523
606
  )
524
607
  except jpype.JException as e:
525
- raise RuntimeError(f"Java exception while resetting timezone: {e}")
608
+ exception = RuntimeError(f"Java exception while resetting timezone: {e}")
609
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
610
+ raise exception
526
611
  except Exception as e:
527
- raise RuntimeError(f"Unexpected error resetting JVM timezone: {e}")
612
+ exception = RuntimeError(f"Unexpected error resetting JVM timezone: {e}")
613
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
614
+ raise exception
528
615
 
529
616
 
530
617
  def set_snowflake_parameters(
@@ -569,38 +656,137 @@ def set_snowflake_parameters(
569
656
  snowpark_session.use_database(db)
570
657
  case (prev, curr) if prev != curr:
571
658
  snowpark_session.use_schema(prev)
659
+ case "snowpark.connect.cte.optimization_enabled":
660
+ # Set CTE optimization on the snowpark session
661
+ cte_enabled = str_to_bool(value)
662
+ snowpark_session.cte_optimization_enabled = cte_enabled
663
+ logger.info(f"Updated snowpark session CTE optimization: {cte_enabled}")
664
+ case "snowpark.connect.structured_types.fix":
665
+ # TODO: SNOW-2367714 Remove this once the fix is automatically enabled in Snowpark
666
+ snowpark.context._enable_fix_2360274 = str_to_bool(value)
667
+ logger.info(f"Updated snowpark session structured types fix: {value}")
668
+ case "spark.sql.parquet.outputTimestampType":
669
+ if value == "TIMESTAMP_MICROS":
670
+ snowpark_session.sql(
671
+ "ALTER SESSION SET UNLOAD_PARQUET_TIME_TIMESTAMP_MILLIS = false"
672
+ ).collect()
673
+ else:
674
+ # Default: TIMESTAMP_MILLIS (or any other value)
675
+ snowpark_session.sql(
676
+ "ALTER SESSION SET UNLOAD_PARQUET_TIME_TIMESTAMP_MILLIS = true"
677
+ ).collect()
678
+ logger.info(f"Updated parquet timestamp output type to: {value}")
679
+ case "snowpark.connect.scala.version":
680
+ # force java udf helper recreation
681
+ set_java_udf_creator_initialized_state(False)
682
+ case "snowpark.connect.integralTypesEmulation":
683
+ # "client_default" - don't change, let set_spark_version handle it
684
+ # "enabled" / "disabled" - explicitly set
685
+ if value.lower() == "enabled":
686
+ set_integral_types_conversion(True)
687
+ elif value.lower() == "disabled":
688
+ set_integral_types_conversion(False)
572
689
  case _:
573
690
  pass
574
691
 
575
692
 
576
693
  def get_boolean_session_config_param(name: str) -> bool:
577
- session_config = sessions_config[get_session_id()]
694
+ session_config = sessions_config[get_spark_session_id()]
578
695
  return str_to_bool(session_config[name])
579
696
 
580
697
 
698
+ def get_string_session_config_param(name: str) -> str:
699
+ session_config = sessions_config[get_spark_session_id()]
700
+ return str(session_config[name])
701
+
702
+
703
+ def get_cte_optimization_enabled() -> bool:
704
+ """Get the CTE optimization configuration setting."""
705
+ return get_boolean_session_config_param("snowpark.connect.cte.optimization_enabled")
706
+
707
+
708
+ def get_success_file_generation_enabled() -> bool:
709
+ """Get the _SUCCESS file generation configuration setting."""
710
+ return get_boolean_session_config_param(
711
+ "mapreduce.fileoutputcommitter.marksuccessfuljobs"
712
+ )
713
+
714
+
715
+ def get_parquet_metadata_generation_enabled() -> bool:
716
+ """
717
+ Get the Parquet metadata file generation configuration setting.
718
+ """
719
+ return get_boolean_session_config_param(
720
+ "spark.sql.parquet.enable.summary-metadata"
721
+ ) or get_boolean_session_config_param("parquet.enable.summary-metadata")
722
+
723
+
724
+ def get_describe_cache_ttl_seconds() -> int:
725
+ """Get the describe query cache TTL from session config, with a default fallback."""
726
+ session_config: SessionConfig = sessions_config[get_spark_session_id()]
727
+ default_ttl: str = SessionConfig.default_session_config[
728
+ "snowpark.connect.describe_cache_ttl_seconds"
729
+ ]
730
+ try:
731
+ ttl_str = session_config.get(
732
+ "snowpark.connect.describe_cache_ttl_seconds", default_ttl
733
+ )
734
+ return int(ttl_str)
735
+ except ValueError: # fallback to default ttl
736
+ return int(default_ttl)
737
+
738
+
739
+ def should_create_temporary_view_in_snowflake() -> bool:
740
+ return str_to_bool(
741
+ global_config["snowpark.connect.temporary.views.create_in_snowflake"]
742
+ )
743
+
744
+
581
745
  def auto_uppercase_column_identifiers() -> bool:
582
- session_config = sessions_config[get_session_id()]
583
- return session_config[
746
+ session_config = sessions_config[get_spark_session_id()]
747
+ auto_upper_case_config = session_config[
584
748
  "snowpark.connect.sql.identifiers.auto-uppercase"
585
- ].lower() in ("all", "only_columns")
749
+ ]
750
+ if auto_upper_case_config:
751
+ return auto_upper_case_config.lower() in ("all", "only_columns")
752
+
753
+ return not global_config.spark_sql_caseSensitive
586
754
 
587
755
 
588
756
  def auto_uppercase_non_column_identifiers() -> bool:
589
- session_config = sessions_config[get_session_id()]
590
- return session_config[
757
+ session_config = sessions_config[get_spark_session_id()]
758
+ auto_upper_case_config = session_config[
591
759
  "snowpark.connect.sql.identifiers.auto-uppercase"
592
- ].lower() in ("all", "all_except_columns")
760
+ ]
761
+ if auto_upper_case_config:
762
+ return auto_upper_case_config.lower() in ("all", "all_except_columns")
763
+
764
+ return not global_config.spark_sql_caseSensitive
593
765
 
594
766
 
595
- def parse_imports(session: snowpark.Session, imports: str | None) -> None:
767
+ def external_table_location() -> Optional[str]:
768
+ session_config = sessions_config[get_spark_session_id()]
769
+ return session_config.get(
770
+ "snowpark.connect.sql.partition.external_table_location", None
771
+ )
772
+
773
+
774
+ def parse_imports(
775
+ session: snowpark.Session, imports: str | None, language: str
776
+ ) -> None:
596
777
  if not imports:
597
778
  return
598
779
 
599
780
  # UDF needs to be recreated to include new imports
600
781
  clear_external_udxf_cache(session)
782
+ if language == "java":
783
+
784
+ set_java_udf_creator_initialized_state(False)
601
785
 
602
786
  for udf_import in imports.strip("[] ").split(","):
603
- session.add_import(udf_import)
787
+ udf_import = udf_import.strip()
788
+ if udf_import:
789
+ session.add_import(udf_import)
604
790
 
605
791
 
606
792
  def get_timestamp_type():
@@ -613,3 +799,100 @@ def get_timestamp_type():
613
799
  # shouldn't happen since `spark.sql.timestampType` is always defined, and `spark.conf.unset` sets it to default (TIMESTAMP_LTZ)
614
800
  timestamp_type = TimestampType(TimestampTimeZone.LTZ)
615
801
  return timestamp_type
802
+
803
+
804
+ def record_table_metadata(
805
+ table_identifier: str,
806
+ table_type: str,
807
+ data_source: str,
808
+ supports_column_rename: bool = True,
809
+ ) -> None:
810
+ """
811
+ Record metadata about a table for Spark compatibility checks.
812
+
813
+ Args:
814
+ table_identifier: Full table identifier (catalog.database.table)
815
+ table_type: "v1" or "v2"
816
+ data_source: Source format (parquet, csv, iceberg, etc.)
817
+ supports_column_rename: Whether the table supports RENAME COLUMN
818
+ """
819
+ session_id = get_spark_session_id()
820
+ session_config = sessions_config[session_id]
821
+
822
+ # Normalize table identifier for consistent lookup
823
+ # Use the full catalog.database.table identifier to avoid conflicts
824
+ normalized_identifier = table_identifier.upper().strip('"')
825
+
826
+ session_config.table_metadata[normalized_identifier] = {
827
+ "table_type": table_type,
828
+ "data_source": data_source,
829
+ "supports_column_rename": supports_column_rename,
830
+ }
831
+
832
+
833
+ def get_table_metadata(table_identifier: str) -> Dict[str, Any] | None:
834
+ """
835
+ Get stored metadata for a table.
836
+
837
+ Args:
838
+ table_identifier: Full table identifier (catalog.database.table)
839
+
840
+ Returns:
841
+ Table metadata dict or None if not found
842
+ """
843
+ session_id = get_spark_session_id()
844
+ session_config = sessions_config[session_id]
845
+
846
+ normalized_identifier = unquote_if_quoted(table_identifier).upper()
847
+
848
+ return session_config.table_metadata.get(normalized_identifier)
849
+
850
+
851
+ def check_table_supports_operation(table_identifier: str, operation: str) -> bool:
852
+ """
853
+ Check if a table supports a given operation based on metadata and config.
854
+
855
+ Args:
856
+ table_identifier: Full table identifier (catalog.database.table)
857
+ operation: Operation to check (e.g., "rename_column")
858
+
859
+ Returns:
860
+ True if operation is supported, False if should be blocked
861
+ """
862
+ table_metadata = get_table_metadata(table_identifier)
863
+
864
+ if not table_metadata:
865
+ return True
866
+
867
+ session_id = get_spark_session_id()
868
+ session_config = sessions_config[session_id]
869
+ enable_extensions = str_to_bool(
870
+ session_config.get(
871
+ "snowpark.connect.enable_snowflake_extension_behavior", "false"
872
+ )
873
+ )
874
+
875
+ if enable_extensions:
876
+ return True
877
+
878
+ if operation == "rename_column":
879
+ return table_metadata.get("supports_column_rename", True)
880
+
881
+ return True
882
+
883
+
884
+ def get_scala_version() -> str:
885
+ return global_config.get("snowpark.connect.scala.version")
886
+
887
+
888
+ _java_udf_creator_initialized = False
889
+
890
+
891
+ def is_java_udf_creator_initialized() -> bool:
892
+ global _java_udf_creator_initialized
893
+ return _java_udf_creator_initialized
894
+
895
+
896
+ def set_java_udf_creator_initialized_state(value: bool) -> None:
897
+ global _java_udf_creator_initialized
898
+ _java_udf_creator_initialized = value
@@ -16,3 +16,5 @@ MAP_IN_ARROW_EVAL_TYPE = 207 # eval_type for mapInArrow operations
16
16
  COLUMN_METADATA_COLLISION_KEY = "{expr_id}_{key}"
17
17
 
18
18
  DUPLICATE_KEY_FOUND_ERROR_TEMPLATE = "Duplicate key found: {key}. You can set spark.sql.mapKeyDedupPolicy to LAST_WIN to deduplicate map keys with last wins policy."
19
+
20
+ SPARK_VERSION = "3.5.3"