snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -11,10 +11,11 @@ from abc import ABC, abstractmethod
11
11
  from collections import defaultdict
12
12
  from collections.abc import Iterable
13
13
  from contextvars import ContextVar
14
+ from dataclasses import dataclass
14
15
  from enum import Enum, unique
15
- from typing import Dict
16
16
 
17
17
  import google.protobuf.message
18
+ import pyspark.sql.connect.proto.base_pb2 as proto_base
18
19
 
19
20
  from snowflake.connector.cursor import SnowflakeCursor
20
21
  from snowflake.connector.telemetry import (
@@ -26,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
26
27
  from snowflake.snowpark import Session
27
28
  from snowflake.snowpark._internal.utils import get_os_name, get_python_version
28
29
  from snowflake.snowpark.version import VERSION as snowpark_version
30
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
29
31
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
30
32
  from snowflake.snowpark_connect.version import VERSION as sas_version
31
33
 
@@ -56,22 +58,16 @@ class TelemetryType(Enum):
56
58
 
57
59
  class EventType(Enum):
58
60
  SERVER_STARTED = "scos_server_started"
61
+ WARNING = "scos_warning"
59
62
 
60
63
 
61
64
  # global labels
62
- SOURCE = "SparkConnectForSnowpark"
65
+ DEFAULT_SOURCE = "SparkConnectForSnowpark"
63
66
  SCOS_VERSION = ".".join([str(d) for d in sas_version if d is not None])
64
67
  SNOWPARK_VERSION = ".".join([str(d) for d in snowpark_version if d is not None])
65
68
  PYTHON_VERSION = get_python_version()
66
69
  OS = get_os_name()
67
70
 
68
- STATIC_TELEMETRY_DATA = {
69
- TelemetryField.KEY_SOURCE.value: SOURCE,
70
- TelemetryField.KEY_VERSION.value: SCOS_VERSION,
71
- TelemetryField.KEY_SNOWPARK_VERSION.value: SNOWPARK_VERSION,
72
- TelemetryField.KEY_PYTHON_VERSION.value: PYTHON_VERSION,
73
- TelemetryField.KEY_OS.value: OS,
74
- }
75
71
 
76
72
  # list of config keys for which we record values, other config values are not recorded
77
73
  RECORDED_CONFIG_KEYS = {
@@ -88,6 +84,7 @@ RECORDED_CONFIG_KEYS = {
88
84
  "spark.sql.session.localRelationCacheThreshold",
89
85
  "spark.sql.mapKeyDedupPolicy",
90
86
  "snowpark.connect.sql.passthrough",
87
+ "snowpark.connect.cte.optimization_enabled",
91
88
  "snowpark.connect.iceberg.external_volume",
92
89
  "snowpark.connect.sql.identifiers.auto-uppercase",
93
90
  "snowpark.connect.udtf.compatibility_mode",
@@ -104,11 +101,13 @@ REDACTED_PLAN_SUFFIXES = [
104
101
  ]
105
102
 
106
103
 
107
- def _basic_telemetry_data() -> Dict:
108
- return {
109
- **STATIC_TELEMETRY_DATA,
110
- TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
111
- }
104
+ @dataclass
105
+ class TelemetryMessage:
106
+ """Container for telemetry messages in the processing queue."""
107
+
108
+ message: dict
109
+ timestamp: int
110
+ is_warning: bool
112
111
 
113
112
 
114
113
  def safe(func):
@@ -121,9 +120,11 @@ def safe(func):
121
120
  def wrap(*args, **kwargs):
122
121
  try:
123
122
  func(*args, **kwargs)
124
- except Exception:
125
- # We don't really care if telemetry fails, just want to be safe for the user
126
- logger.warning(f"Telemetry operation failed: {func}", exc_info=True)
123
+ except Exception as e:
124
+ # report failed operation to telemetry
125
+ telemetry.send_warning_msg(
126
+ f"Telemetry operation {func} failed due to exception", e
127
+ )
127
128
 
128
129
  return wrap
129
130
 
@@ -238,6 +239,7 @@ class Telemetry:
238
239
  self._is_enabled = is_enabled
239
240
  self._is_initialized = False
240
241
  self._lock = threading.Lock()
242
+ self._source = DEFAULT_SOURCE
241
243
 
242
244
  # Async processing setup
243
245
  self._message_queue = queue.Queue(maxsize=10000)
@@ -246,11 +248,32 @@ class Telemetry:
246
248
  def __del__(self):
247
249
  self.shutdown()
248
250
 
249
- def initialize(self, session: Session):
251
+ def _get_static_telemetry_data(self) -> dict:
252
+ """Get static telemetry data with current configuration."""
253
+ return {
254
+ TelemetryField.KEY_SOURCE.value: self._source,
255
+ TelemetryField.KEY_VERSION.value: SCOS_VERSION,
256
+ TelemetryField.KEY_SNOWPARK_VERSION.value: SNOWPARK_VERSION,
257
+ TelemetryField.KEY_PYTHON_VERSION.value: PYTHON_VERSION,
258
+ TelemetryField.KEY_OS.value: OS,
259
+ }
260
+
261
+ def _basic_telemetry_data(self) -> dict:
262
+ return {
263
+ **self._get_static_telemetry_data(),
264
+ TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
265
+ }
266
+
267
+ def initialize(self, session: Session, source: str = None):
250
268
  """
251
269
  Must be called after the session is created to initialize telemetry.
252
270
  Gets the telemetry client from the session's connection and uses it
253
271
  to report telemetry data.
272
+
273
+ Args:
274
+ session: Snowpark Session to use for telemetry
275
+ source: Optional source identifier for telemetry (e.g., "SparkConnectThinClient").
276
+ Defaults to "SparkConnectForSnowpark".
254
277
  """
255
278
  if not self._is_enabled:
256
279
  return
@@ -261,12 +284,15 @@ class Telemetry:
261
284
  return
262
285
  self._is_initialized = True
263
286
 
264
- telemetry = getattr(session._conn._conn, "_telemetry", None)
265
- if telemetry is None:
287
+ if source is not None:
288
+ self._source = source
289
+
290
+ telemetry_client = getattr(session._conn._conn, "_telemetry", None)
291
+ if telemetry_client is None:
266
292
  # no telemetry client available, so we export with queries
267
293
  self._sink = QueryTelemetrySink(session)
268
294
  else:
269
- self._sink = ClientTelemetrySink(telemetry)
295
+ self._sink = ClientTelemetrySink(telemetry_client)
270
296
 
271
297
  self._start_worker_thread()
272
298
  logger.info(f"Telemetry initialized with {type(self._sink)}")
@@ -289,10 +315,7 @@ class Telemetry:
289
315
 
290
316
  self._request_summary.set(summary)
291
317
 
292
- if hasattr(request, "plan"):
293
- summary["query_plan"] = _protobuf_to_json_with_redaction(
294
- request.plan, REDACTED_PLAN_SUFFIXES
295
- )
318
+ _set_query_plan(request, summary)
296
319
 
297
320
  def _not_in_request(self):
298
321
  # we don't want to add things to the summary if it's not initialized
@@ -335,6 +358,11 @@ class Telemetry:
335
358
  summary["error_message"] = str(e)
336
359
  summary["error_type"] = type(e).__name__
337
360
 
361
+ if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
362
+ summary["error_code"] = ErrorCodes.INTERNAL_ERROR
363
+ else:
364
+ summary["error_code"] = e.custom_error_code
365
+
338
366
  error_location = _error_location(e)
339
367
  if error_location:
340
368
  summary["error_location"] = error_location
@@ -426,6 +454,63 @@ class Telemetry:
426
454
 
427
455
  summary["internal_queries"] += 1
428
456
 
457
+ @safe
458
+ def report_describe_query_cache_lookup(self):
459
+ """Report a describe query cache lookup."""
460
+ if self._not_in_request():
461
+ return
462
+
463
+ summary = self._request_summary.get()
464
+
465
+ if "describe_cache_lookups" not in summary:
466
+ summary["describe_cache_lookups"] = 0
467
+
468
+ summary["describe_cache_lookups"] += 1
469
+
470
+ @safe
471
+ def report_describe_query_cache_hit(self):
472
+ """Report a describe query cache hit."""
473
+ if self._not_in_request():
474
+ return
475
+
476
+ summary = self._request_summary.get()
477
+
478
+ if "describe_cache_hits" not in summary:
479
+ summary["describe_cache_hits"] = 0
480
+
481
+ summary["describe_cache_hits"] += 1
482
+
483
+ @safe
484
+ def report_describe_query_cache_expired(self, expired_by: float):
485
+ """Report a describe query cache hit."""
486
+ if self._not_in_request():
487
+ return
488
+
489
+ summary = self._request_summary.get()
490
+
491
+ if "describe_cache_expired" not in summary:
492
+ summary["describe_cache_expired"] = 0
493
+
494
+ summary["describe_cache_expired"] += 1
495
+
496
+ if "describe_cache_expired_by" not in summary:
497
+ summary["describe_cache_expired_by"] = []
498
+
499
+ summary["describe_cache_expired_by"].append(expired_by)
500
+
501
+ @safe
502
+ def report_describe_query_cache_clear(self):
503
+ """Report a describe query cache clear."""
504
+ if self._not_in_request():
505
+ return
506
+
507
+ summary = self._request_summary.get()
508
+
509
+ if "describe_cache_cleared" not in summary:
510
+ summary["describe_cache_cleared"] = 0
511
+
512
+ summary["describe_cache_cleared"] += 1
513
+
429
514
  @safe
430
515
  def report_udf_usage(self, udf_name: str):
431
516
  if self._not_in_request():
@@ -460,7 +545,7 @@ class Telemetry:
460
545
  @safe
461
546
  def send_server_started_telemetry(self):
462
547
  message = {
463
- **_basic_telemetry_data(),
548
+ **self._basic_telemetry_data(),
464
549
  TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
465
550
  TelemetryType.EVENT_TYPE.value: EventType.SERVER_STARTED.value,
466
551
  TelemetryField.KEY_DATA.value: {
@@ -472,27 +557,69 @@ class Telemetry:
472
557
  @safe
473
558
  def send_request_summary_telemetry(self):
474
559
  if self._not_in_request():
475
- logger.warning(
476
- "Truing to send request summary telemetry without initializing it"
560
+ self.send_warning_msg(
561
+ "Trying to send request summary telemetry without initializing it"
477
562
  )
478
563
  return
479
564
 
480
565
  summary = self._request_summary.get()
481
566
  message = {
482
- **_basic_telemetry_data(),
567
+ **self._basic_telemetry_data(),
483
568
  TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_REQUEST_SUMMARY.value,
484
569
  TelemetryField.KEY_DATA.value: summary,
485
570
  }
486
571
  self._send(message)
487
572
 
488
- def _send(self, msg: Dict) -> None:
573
+ def send_warning_msg(self, msg: str, e: Exception = None) -> None:
574
+ # using this within @safe decorator may result in recursive loop
575
+ try:
576
+ message = self._build_warning_message(msg, e)
577
+ if not message:
578
+ return
579
+
580
+ self._send(message, is_warning=True)
581
+ except Exception:
582
+ # if there's an exception here, there's nothing we can really do about it
583
+ pass
584
+
585
+ def _build_warning_message(self, warning_msg: str, e: Exception = None) -> dict:
586
+ try:
587
+ data = {"warning_message": warning_msg}
588
+ if e is not None:
589
+ data["exception"] = repr(e)
590
+
591
+ # add session and operation id if available
592
+ spark_session_id = self._request_summary.get().get("spark_session_id", None)
593
+ if spark_session_id is not None:
594
+ data["spark_session_id"] = spark_session_id
595
+
596
+ spark_operation_id = self._request_summary.get().get(
597
+ "spark_operation_id", None
598
+ )
599
+ if spark_operation_id is not None:
600
+ data["spark_operation_id"] = spark_operation_id
601
+
602
+ message = {
603
+ **self._basic_telemetry_data(),
604
+ TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
605
+ TelemetryType.EVENT_TYPE.value: EventType.WARNING.value,
606
+ TelemetryField.KEY_DATA.value: data,
607
+ }
608
+ return message
609
+ except Exception:
610
+ return {}
611
+
612
+ def _send(self, msg: dict, is_warning: bool = False) -> None:
489
613
  """Queue a telemetry message for asynchronous processing."""
490
614
  if not self._is_enabled:
491
615
  return
492
616
 
493
617
  timestamp = get_time_millis()
494
618
  try:
495
- self._message_queue.put_nowait((msg, timestamp))
619
+ telemetry_msg = TelemetryMessage(
620
+ message=msg, timestamp=timestamp, is_warning=is_warning
621
+ )
622
+ self._message_queue.put_nowait(telemetry_msg)
496
623
  except queue.Full:
497
624
  # If queue is full, drop the message to avoid blocking
498
625
  logger.warning("Telemetry queue is full, dropping message")
@@ -510,13 +637,16 @@ class Telemetry:
510
637
  while True:
511
638
  try:
512
639
  # block to allow the GIL to switch threads
513
- message, timestamp = self._message_queue.get()
514
- if timestamp is None and message is None:
515
- # shutdown
640
+ telemetry_msg = self._message_queue.get()
641
+ if telemetry_msg is None:
642
+ # shutdown signal
516
643
  break
517
- self._sink.add_telemetry_data(message, timestamp)
518
- except Exception:
519
- logger.warning("Failed to add telemetry message to sink", exc_info=True)
644
+ self._sink.add_telemetry_data(
645
+ telemetry_msg.message, telemetry_msg.timestamp
646
+ )
647
+ except Exception as e:
648
+ if not telemetry_msg.is_warning:
649
+ self.send_warning_msg("Failed to add telemetry message to sink", e)
520
650
  finally:
521
651
  self._message_queue.task_done()
522
652
 
@@ -529,7 +659,7 @@ class Telemetry:
529
659
  return
530
660
 
531
661
  try:
532
- self._message_queue.put_nowait((None, None))
662
+ self._message_queue.put_nowait(None)
533
663
  # Wait for worker thread to finish
534
664
  self._worker_thread.join(timeout=3.0)
535
665
  except Exception:
@@ -538,7 +668,7 @@ class Telemetry:
538
668
  )
539
669
 
540
670
 
541
- def _error_location(e: Exception) -> Dict | None:
671
+ def _error_location(e: Exception) -> dict | None:
542
672
  """
543
673
  Inspect the exception traceback and extract the file name, line number, and function name
544
674
  from the last frame (the one that raised the exception).
@@ -619,7 +749,7 @@ def _protobuf_to_json_with_redaction(
619
749
  """Recursively convert protobuf message to dict"""
620
750
 
621
751
  if not isinstance(msg, google.protobuf.message.Message):
622
- logger.warning("Expected a protobuf message, got: %s", type(msg))
752
+ telemetry.send_warning_msg(f"Expected a protobuf message, got: {type(msg)}")
623
753
  return {}
624
754
 
625
755
  result = {}
@@ -644,6 +774,28 @@ def _protobuf_to_json_with_redaction(
644
774
  )
645
775
 
646
776
 
777
+ def _set_query_plan(request: google.protobuf.message.Message, summary: dict) -> None:
778
+ if isinstance(request, proto_base.ExecutePlanRequest):
779
+ # ExecutePlanRequest has plan at top level
780
+ if hasattr(request, "plan"):
781
+ summary["query_plan"] = (
782
+ _protobuf_to_json_with_redaction(request.plan, REDACTED_PLAN_SUFFIXES),
783
+ )
784
+
785
+ elif isinstance(request, proto_base.AnalyzePlanRequest):
786
+ # AnalyzePlanRequest has plan under oneof analyze
787
+ analyze_type = request.WhichOneof("analyze")
788
+ if not analyze_type:
789
+ return
790
+
791
+ summary["analyze_type"] = analyze_type
792
+ analyze_field = getattr(request, analyze_type)
793
+ if hasattr(analyze_field, "plan"):
794
+ summary["query_plan"] = _protobuf_to_json_with_redaction(
795
+ analyze_field.plan, REDACTED_PLAN_SUFFIXES
796
+ )
797
+
798
+
647
799
  # global telemetry client
648
800
  telemetry = Telemetry(is_enabled="SNOWPARK_CONNECT_DISABLE_TELEMETRY" not in os.environ)
649
801
 
@@ -0,0 +1,67 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from typing import Optional, Tuple
6
+
7
+ from pyspark.errors import AnalysisException
8
+
9
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
11
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
12
+ from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
13
+ from snowflake.snowpark_connect.utils.context import get_spark_session_id
14
+
15
+ _temp_views = SynchronizedDict[Tuple[str, str], DataFrameContainer]()
16
+
17
+
18
+ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None:
19
+ normalized_name = _normalize(name)
20
+ current_session_id = get_spark_session_id()
21
+ for key in list(_temp_views.keys()):
22
+ if _normalize(key[0]) == normalized_name and key[1] == current_session_id:
23
+ if replace:
24
+ _temp_views.remove(key)
25
+ break
26
+ else:
27
+ exception = AnalysisException(
28
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
29
+ )
30
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
31
+ raise exception
32
+
33
+ _temp_views[(name, current_session_id)] = df
34
+
35
+
36
+ def unregister_temp_view(name: str) -> bool:
37
+ normalized_name = _normalize(name)
38
+
39
+ for key in _temp_views.keys():
40
+ normalized_key = _normalize(key[0])
41
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
42
+ pop_result = _temp_views.remove(key)
43
+ return pop_result is not None
44
+ return False
45
+
46
+
47
+ def get_temp_view(name: str) -> Optional[DataFrameContainer]:
48
+ normalized_name = _normalize(name)
49
+ for key in _temp_views.keys():
50
+ normalized_key = _normalize(key[0])
51
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
52
+ return _temp_views.get(key)
53
+ return None
54
+
55
+
56
+ def get_temp_view_normalized_names() -> list[str]:
57
+ return [
58
+ _normalize(key[0])
59
+ for key in _temp_views.keys()
60
+ if key[1] == get_spark_session_id()
61
+ ]
62
+
63
+
64
+ def _normalize(name: str) -> str:
65
+ from snowflake.snowpark_connect.config import global_config
66
+
67
+ return name if global_config.spark_sql_caseSensitive else name.lower()