snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -4,13 +4,17 @@
4
4
 
5
5
  import logging
6
6
  import os
7
+ import threading
7
8
  from collections.abc import Sequence
8
9
  from typing import Any
9
10
 
10
11
  from snowflake import snowpark
11
- from snowflake.snowpark.exceptions import SnowparkClientException, SnowparkSQLException
12
+ from snowflake.connector.description import PLATFORM
13
+ from snowflake.snowpark.exceptions import SnowparkClientException
12
14
  from snowflake.snowpark.session import _get_active_session
13
15
  from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
16
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
17
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
18
  from snowflake.snowpark_connect.utils.describe_query_cache import (
15
19
  instrument_session_for_describe_cache,
16
20
  )
@@ -21,6 +25,13 @@ from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
21
25
  from snowflake.snowpark_connect.utils.telemetry import telemetry
22
26
  from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
23
27
 
28
+ SKIP_SESSION_CONFIGURATION = False
29
+
30
+
31
+ def skip_session_configuration(skip: bool):
32
+ global SKIP_SESSION_CONFIGURATION
33
+ SKIP_SESSION_CONFIGURATION = skip
34
+
24
35
 
25
36
  # Suppress experimental warnings from snowflake.snowpark logger
26
37
  def _filter_experimental_warnings(record):
@@ -50,7 +61,12 @@ def _get_current_snowpark_session() -> snowpark.Session | None:
50
61
 
51
62
  def configure_snowpark_session(session: snowpark.Session):
52
63
  """Configure a snowpark session with required parameters and settings."""
53
- from snowflake.snowpark_connect.config import global_config
64
+ from snowflake.snowpark_connect.config import (
65
+ get_cte_optimization_enabled,
66
+ global_config,
67
+ )
68
+
69
+ global SKIP_SESSION_CONFIGURATION
54
70
 
55
71
  logger.info(f"Configuring session {session}")
56
72
 
@@ -63,11 +79,28 @@ def configure_snowpark_session(session: snowpark.Session):
63
79
  # custom udf imports
64
80
  session._python_files = set()
65
81
  session._import_files = set()
82
+ session._artifact_jars = set()
83
+
84
+ # custom artifact attributes
85
+ # track current chunk
86
+ # key: session_id, value: dict of (name, num_chunks, current_chunk_index)
87
+ session._current_chunk: dict[str, dict] = {}
88
+ # Use thread-safe access when modifying current chunk dictionary
89
+ session._current_chunk_lock = threading.RLock()
90
+
91
+ # track filenames to be uploaded to stage
92
+ # key: session_id, value: dict of (name, filename)
93
+ session._filenames: dict[str, dict[str, str]] = {}
94
+ # Use thread-safe access when modifying filenames dictionary
95
+ session._filenames_lock = threading.RLock()
66
96
 
67
97
  # built-in udf cache
68
98
  init_builtin_udf_cache(session)
69
99
  init_external_udxf_cache(session)
70
100
 
101
+ # file format cache
102
+ session._file_formats = set()
103
+
71
104
  # Set experimental parameters (warnings globally suppressed)
72
105
  session.ast_enabled = False
73
106
  session.eliminate_numeric_sql_value_cast_enabled = False
@@ -77,6 +110,18 @@ def configure_snowpark_session(session: snowpark.Session):
77
110
  session.connection.arrow_number_to_decimal_setter = True
78
111
  session.custom_package_usage_config["enabled"] = True
79
112
 
113
+ # Scoped temp objects may not be accessible in stored procedure and cause "object does not exist" error. So disable
114
+ # _use_scoped_temp_objects here and use temp table instead.
115
+ session._use_scoped_temp_objects = False
116
+
117
+ # Configure CTE optimization based on session configuration
118
+ cte_optimization_enabled = get_cte_optimization_enabled()
119
+ session.cte_optimization_enabled = cte_optimization_enabled
120
+ logger.info(f"CTE optimization enabled: {cte_optimization_enabled}")
121
+
122
+ # Default query tag to be used unless overridden by user using AppName or spark.addTag()
123
+ query_tag = "SNOWPARK_CONNECT_QUERY"
124
+
80
125
  default_fallback_timezone = "UTC"
81
126
  if global_config.spark_sql_session_timeZone is None:
82
127
  try:
@@ -103,35 +148,20 @@ def configure_snowpark_session(session: snowpark.Session):
103
148
  "TIMEZONE": f"'{global_config.spark_sql_session_timeZone}'",
104
149
  "QUOTED_IDENTIFIERS_IGNORE_CASE": "false",
105
150
  "PYTHON_SNOWPARK_ENABLE_THREAD_SAFE_SESSION": "true",
106
- "PYTHON_SNOWPARK_USE_SCOPED_TEMP_OBJECTS": "false", # this is required for creating udfs from sproc
151
+ "ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE": "true",
152
+ "QUERY_TAG": f"'{query_tag}'",
107
153
  }
108
154
 
109
- session.sql(
110
- f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
111
- ).collect()
112
-
113
- # Rolling ahead in preparation of GS release 9.22 (ETA 8/5/2025). Once 9.22 is past rollback risk, merge this
114
- # parameter with other in the session_params dictionary above
115
- try:
155
+ # SNOW-2245971: Stored procedures inside Native Apps run as Execute As Owner and hence cannot set session params.
156
+ if not SKIP_SESSION_CONFIGURATION:
116
157
  session.sql(
117
- "ALTER SESSION SET ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE=true"
158
+ f"ALTER SESSION SET {', '.join([f'{k} = {v}' for k, v in session_params.items()])}"
118
159
  ).collect()
119
- except SnowparkSQLException:
120
- logger.debug(
121
- "ENABLE_STRUCTURED_TYPES_IN_SNOWPARK_CONNECT_RESPONSE is not defined"
160
+ else:
161
+ session_param_names = ", ".join(session_params.keys())
162
+ logger.info(
163
+ f"Skipping Snowpark Connect session configuration as requested. Please make sure following session parameters are set correctly: {session_param_names}"
122
164
  )
123
- try:
124
- session.sql(
125
- "ALTER SESSION SET ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT=true"
126
- ).collect()
127
- except SnowparkSQLException:
128
- logger.debug("ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT is not defined")
129
- try:
130
- session.sql(
131
- "ALTER SESSION SET ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE=true"
132
- ).collect()
133
- except SnowparkSQLException:
134
- logger.debug("ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE is not defined")
135
165
 
136
166
  # Instrument the snowpark session to use a cache for describe queries.
137
167
  instrument_session_for_describe_cache(session)
@@ -145,6 +175,10 @@ def _is_running_in_SPCS():
145
175
  )
146
176
 
147
177
 
178
+ def _is_running_in_stored_procedure_or_notebook():
179
+ return PLATFORM == "XP"
180
+
181
+
148
182
  def _get_session_configs_from_ENV() -> dict[str, Any]:
149
183
  session_configs = {
150
184
  "account": os.getenv("SNOWFLAKE_ACCOUNT"),
@@ -198,11 +232,13 @@ def set_query_tags(spark_tags: Sequence[str]) -> None:
198
232
  """Sets Snowpark session query_tag value to the tag from the Spark request."""
199
233
 
200
234
  if any("," in tag for tag in spark_tags):
201
- raise ValueError("Tags cannot contain ','.")
235
+ exception = ValueError("Tags cannot contain ','.")
236
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
237
+ raise exception
202
238
 
203
239
  # TODO: Tags might not be set correctly in parallel workloads or multi-threaded code.
204
240
  snowpark_session = get_or_create_snowpark_session()
205
241
  spark_tags_str = ",".join(sorted(spark_tags)) if spark_tags else None
206
242
 
207
- if spark_tags_str != snowpark_session.query_tag:
243
+ if spark_tags_str and spark_tags_str != snowpark_session.query_tag:
208
244
  snowpark_session.query_tag = spark_tags_str
@@ -6,17 +6,59 @@ import logging
6
6
 
7
7
  from pyspark import StorageLevel
8
8
 
9
- logger = logging.getLogger("snowflake_connect_server")
10
- logger.setLevel(logging.WARN)
11
9
 
12
- console_handler = logging.StreamHandler()
13
- console_handler.setLevel(logging.INFO)
14
- formatter = logging.Formatter(
15
- "%(asctime)s - %(name)s - %(levelname)s - [Thread %(thread)d] - %(message)s"
10
+ def ensure_logger_has_handler(
11
+ logger_name: str, log_level: int = logging.INFO, force_level: bool = False
12
+ ):
13
+ """
14
+ Ensure a logger has a StreamHandler, add one if missing.
15
+ Checks both the specific logger and root logger for existing handlers.
16
+
17
+ Args:
18
+ logger_name: Name of the logger to configure
19
+ log_level: Log level to set on both logger and handler
20
+ force_level: If True, always set the log level. If False, only set if logger level is NOTSET
21
+
22
+ Returns:
23
+ The configured logger
24
+ """
25
+ target_logger = logging.getLogger(logger_name)
26
+
27
+ # Only set level if forced or if logger hasn't been configured yet
28
+ if force_level or target_logger.level == logging.NOTSET:
29
+ target_logger.setLevel(log_level)
30
+ else:
31
+ log_level = target_logger.level
32
+
33
+ # Check if the logger already has a StreamHandler
34
+ has_stream_handler = any(
35
+ isinstance(h, logging.StreamHandler) for h in target_logger.handlers
36
+ )
37
+
38
+ # Check if root logger has handlers (from basicConfig or manual setup)
39
+ root_logger = logging.getLogger()
40
+ has_root_handlers = len(root_logger.handlers) > 0
41
+
42
+ # Only add handler if:
43
+ # 1. Logger doesn't have its own StreamHandler AND
44
+ # 2. Root logger doesn't have handlers (to avoid duplication)
45
+ if not has_stream_handler and not has_root_handlers:
46
+ handler = logging.StreamHandler()
47
+ handler.setLevel(log_level)
48
+ formatter = logging.Formatter(
49
+ "%(asctime)s - %(name)s - %(levelname)s - [Thread %(thread)d] - %(message)s"
50
+ )
51
+ handler.setFormatter(formatter)
52
+ target_logger.addHandler(handler)
53
+
54
+ return target_logger
55
+
56
+
57
+ # Initialize the main logger using the helper function
58
+ # force_level=False means it will respect any existing log level configuration
59
+ logger = ensure_logger_has_handler(
60
+ "snowflake_connect_server", logging.INFO, force_level=False
16
61
  )
17
- console_handler.setFormatter(formatter)
18
- # Display the logs to the console
19
- logger.addHandler(console_handler)
20
62
 
21
63
 
22
64
  def run_once_decorator(func):
@@ -0,0 +1,290 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ """
6
+ SPCS Logger - Adapted from ExecPlatform/src/coprocessor/python/telemetry/py/logger.py
7
+ Outputs flat JSON format compatible with SPCS OpenTelemetry collector with proper trace context.
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ import sys
13
+ import traceback
14
+ from typing import Any, Mapping, Sequence
15
+
16
+
17
+ class SPCSLoggerConfig:
18
+ """Configuration for SPCS logger."""
19
+
20
+ MESSAGE_SIZE_LIMIT_BYTES = 524288 # 512KB
21
+ ELLIPSIS = "..."
22
+
23
+ # Set to True if initialized
24
+ is_initialized = False
25
+
26
+
27
+ def _encode_value_simple(value: Any) -> Any:
28
+ """
29
+ Encode a value to simple JSON format (not OpenTelemetry nested format).
30
+ SPCS expects flat JSON values, not the {stringValue: ...} format.
31
+ """
32
+ if isinstance(value, (bool, str, int, float)):
33
+ return value
34
+ if isinstance(value, Sequence) and not isinstance(value, str):
35
+ return [_encode_value_simple(v) for v in value]
36
+ if isinstance(value, Mapping):
37
+ return {str(k): _encode_value_simple(v) for k, v in value.items()}
38
+ # Stringify anything else
39
+ return str(value)
40
+
41
+
42
+ # Skip Python's built-in LogRecord attributes
43
+ _RESERVED_ATTRS = frozenset(
44
+ (
45
+ "asctime",
46
+ "args",
47
+ "created",
48
+ "exc_info",
49
+ "exc_text",
50
+ "filename",
51
+ "funcName",
52
+ "message",
53
+ "levelname",
54
+ "levelno",
55
+ "lineno",
56
+ "module",
57
+ "msecs",
58
+ "msg",
59
+ "name",
60
+ "pathname",
61
+ "process",
62
+ "processName",
63
+ "relativeCreated",
64
+ "stack_info",
65
+ "thread",
66
+ "threadName",
67
+ "taskName",
68
+ )
69
+ )
70
+
71
+
72
+ def _extract_attributes(record: logging.LogRecord) -> dict:
73
+ """Extract log record attributes to flat dict format for SPCS."""
74
+ attributes = {}
75
+
76
+ # Extract custom attributes from extra={}
77
+ for k, v in vars(record).items():
78
+ if k not in _RESERVED_ATTRS:
79
+ attributes[k] = _encode_value_simple(v)
80
+
81
+ # Add standard code location attributes
82
+ attributes["code.lineno"] = record.lineno
83
+ attributes["code.function"] = record.funcName
84
+ attributes["code.filepath"] = record.pathname
85
+
86
+ # Add exception info if present
87
+ if record.exc_info is not None:
88
+ exctype, value, tb = record.exc_info
89
+ if exctype is not None:
90
+ attributes["exception.type"] = exctype.__name__
91
+ if value is not None and value.args:
92
+ attributes["exception.message"] = str(value.args[0])
93
+ if tb is not None:
94
+ attributes["exception.stacktrace"] = "".join(
95
+ traceback.format_exception(*record.exc_info)
96
+ )
97
+
98
+ return attributes
99
+
100
+
101
+ def get_snowflake_log_level_name(py_level_name: str) -> str:
102
+ """
103
+ Convert Python log level to Snowflake log level.
104
+ This matches the original UDF logger implementation.
105
+ """
106
+ level = py_level_name.upper()
107
+ if level == "WARNING":
108
+ return "WARN"
109
+ elif level == "CRITICAL":
110
+ return "FATAL"
111
+ elif level == "NOTSET":
112
+ return "TRACE"
113
+ else:
114
+ return level
115
+
116
+
117
+ def get_severity_number(snowflake_level: str) -> int:
118
+ """
119
+ Get OTLP severity number (integer) for a Snowflake log level.
120
+
121
+ OTLP Spec: https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber
122
+ This returns INTEGER values (not strings like the buggy UDF code).
123
+ """
124
+ if snowflake_level == "TRACE":
125
+ return 1 # SEVERITY_NUMBER_TRACE
126
+ elif snowflake_level == "DEBUG":
127
+ return 5 # SEVERITY_NUMBER_DEBUG
128
+ elif snowflake_level == "INFO":
129
+ return 9 # SEVERITY_NUMBER_INFO
130
+ elif snowflake_level == "WARN":
131
+ return 13 # SEVERITY_NUMBER_WARN
132
+ elif snowflake_level == "ERROR":
133
+ return 17 # SEVERITY_NUMBER_ERROR
134
+ elif snowflake_level == "FATAL":
135
+ return 21 # SEVERITY_NUMBER_FATAL
136
+ else:
137
+ return 0 # SEVERITY_NUMBER_UNSPECIFIED
138
+
139
+
140
+ def _encode_spcs_log_record(record: logging.LogRecord) -> dict:
141
+ """
142
+ Encode a log record to the FLAT JSON format expected by SPCS.
143
+
144
+ SPCS OpenTelemetry collector expects:
145
+ {
146
+ "body": "message",
147
+ "severity_text": "INFO",
148
+ "severity_number": 9, # INTEGER, not string!
149
+ "attributes": {...},
150
+ "scope": {"name": "logger_name"}
151
+ }
152
+ """
153
+ # Format the message
154
+ message = str(record.msg)
155
+ if record.args:
156
+ try:
157
+ message = message % record.args
158
+ except (TypeError, ValueError):
159
+ message = str(record.msg)
160
+
161
+ # Truncate message if it exceeds size limit
162
+ message_bytes = message.encode("utf-8", errors="replace")
163
+ if sys.getsizeof(message_bytes) > SPCSLoggerConfig.MESSAGE_SIZE_LIMIT_BYTES:
164
+ truncate_length = SPCSLoggerConfig.MESSAGE_SIZE_LIMIT_BYTES - len(
165
+ SPCSLoggerConfig.ELLIPSIS.encode()
166
+ )
167
+ # Ensure we don't cut in the middle of a UTF-8 multibyte sequence
168
+ while truncate_length > 0 and (message_bytes[truncate_length] & 0xC0) == 0x80:
169
+ truncate_length -= 1
170
+ message_bytes = message_bytes[0:truncate_length]
171
+ message = (
172
+ message_bytes.decode("utf-8", errors="replace") + SPCSLoggerConfig.ELLIPSIS
173
+ )
174
+
175
+ # Map to Snowflake log level
176
+ snowflake_level = get_snowflake_log_level_name(record.levelname)
177
+
178
+ # Construct the FLAT log record (NOT nested OpenTelemetry structure)
179
+ log_record = {
180
+ "body": message,
181
+ "severity_text": snowflake_level,
182
+ "severity_number": get_severity_number(snowflake_level), # INTEGER!
183
+ "attributes": _extract_attributes(record),
184
+ "scope": {"name": record.name},
185
+ }
186
+
187
+ return log_record
188
+
189
+
190
+ # =============================================================================
191
+ # SPCS-SPECIFIC HANDLER
192
+ # =============================================================================
193
+
194
+
195
+ class SPCSStreamHandler(logging.StreamHandler):
196
+ """
197
+ Custom handler for SPCS that writes flat JSON format to stdout.
198
+
199
+ The SPCS OpenTelemetry collector will:
200
+ 1. Capture stdout
201
+ 2. Parse JSON if line matches ^{.*}$
202
+ 3. Extract body, severity_text, severity_number, attributes, scope, trace_id, span_id fields
203
+ 4. Map trace_id/span_id to LogRecord protobuf fields
204
+ 5. Backend creates TRACE column from protobuf trace_id/span_id
205
+ 6. Route to Event Table
206
+ """
207
+
208
+ def __init__(self, stream=None) -> None:
209
+ """
210
+ Initialize the handler.
211
+
212
+ Args:
213
+ stream: Output stream (default: sys.stdout)
214
+ """
215
+ super().__init__(stream or sys.stdout)
216
+
217
+ def emit(self, record: logging.LogRecord):
218
+ """
219
+ Emit a log record as single-line JSON to stdout.
220
+ """
221
+ try:
222
+ # Encode to SPCS-compatible flat JSON format
223
+ log_record = _encode_spcs_log_record(record)
224
+
225
+ # Convert to compact JSON string (single line, no spaces)
226
+ log_json = json.dumps(log_record, separators=(",", ":"))
227
+
228
+ # Write to stdout (SPCS captures this)
229
+ self.stream.write(log_json + "\n")
230
+ self.flush()
231
+
232
+ except Exception:
233
+ self.handleError(record)
234
+
235
+
236
+ # =============================================================================
237
+ # INITIALIZATION FUNCTIONS
238
+ # =============================================================================
239
+
240
+
241
+ def setup_spcs_logger(
242
+ log_level: int = logging.INFO,
243
+ logger_name: str = None,
244
+ enable_console_output: bool = False,
245
+ ) -> logging.Logger:
246
+ """
247
+ Set up the root logger for SPCS with flat JSON formatting.
248
+
249
+ Args:
250
+ log_level: Python logging level (e.g., logging.INFO)
251
+ logger_name: Optional logger name (None for root logger)
252
+ enable_console_output: If True, also adds a human-readable console handler to stderr
253
+
254
+ Returns:
255
+ Configured logger instance
256
+
257
+ Example:
258
+ >>> logger = setup_spcs_logger(logging.INFO, enable_console_output=True)
259
+ >>> logger.info("Hello from SPCS", extra={"user_id": 123, "action": "login"})
260
+
261
+ # Output to stdout (captured by SPCS):
262
+ {"body":"Hello from SPCS","severity_text":"INFO","severity_number":9,"attributes":{"user_id":123,"action":"login","code.lineno":42,"code.function":"main","code.filepath":"/app/main.py"},"scope":{"name":"root"}}
263
+
264
+ # Output to stderr (if enable_console_output=True):
265
+ 2024-01-15 10:30:45,123 - root - INFO - Hello from SPCS
266
+ """
267
+ # Mark as initialized
268
+ SPCSLoggerConfig.is_initialized = True
269
+
270
+ # Get logger (root or named)
271
+ logger = logging.getLogger(logger_name)
272
+ logger.setLevel(log_level)
273
+ logger.handlers.clear()
274
+
275
+ # Add SPCS flat JSON handler (writes JSON to stdout)
276
+ spcs_handler = SPCSStreamHandler(sys.stdout)
277
+ spcs_handler.setLevel(log_level)
278
+ logger.addHandler(spcs_handler)
279
+
280
+ # Optionally add human-readable console handler (to stderr to avoid mixing with JSON logs)
281
+ if enable_console_output:
282
+ console_handler = logging.StreamHandler(sys.stderr)
283
+ console_handler.setLevel(log_level)
284
+ formatter = logging.Formatter(
285
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
286
+ )
287
+ console_handler.setFormatter(formatter)
288
+ logger.addHandler(console_handler)
289
+
290
+ return logger