snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -11,10 +11,11 @@ from abc import ABC, abstractmethod
11
11
  from collections import defaultdict
12
12
  from collections.abc import Iterable
13
13
  from contextvars import ContextVar
14
+ from dataclasses import dataclass
14
15
  from enum import Enum, unique
15
- from typing import Dict
16
16
 
17
17
  import google.protobuf.message
18
+ import pyspark.sql.connect.proto.base_pb2 as proto_base
18
19
 
19
20
  from snowflake.connector.cursor import SnowflakeCursor
20
21
  from snowflake.connector.telemetry import (
@@ -26,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
26
27
  from snowflake.snowpark import Session
27
28
  from snowflake.snowpark._internal.utils import get_os_name, get_python_version
28
29
  from snowflake.snowpark.version import VERSION as snowpark_version
30
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
29
31
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
30
32
  from snowflake.snowpark_connect.version import VERSION as sas_version
31
33
 
@@ -56,6 +58,7 @@ class TelemetryType(Enum):
56
58
 
57
59
  class EventType(Enum):
58
60
  SERVER_STARTED = "scos_server_started"
61
+ WARNING = "scos_warning"
59
62
 
60
63
 
61
64
  # global labels
@@ -88,6 +91,7 @@ RECORDED_CONFIG_KEYS = {
88
91
  "spark.sql.session.localRelationCacheThreshold",
89
92
  "spark.sql.mapKeyDedupPolicy",
90
93
  "snowpark.connect.sql.passthrough",
94
+ "snowpark.connect.cte.optimization_enabled",
91
95
  "snowpark.connect.iceberg.external_volume",
92
96
  "snowpark.connect.sql.identifiers.auto-uppercase",
93
97
  "snowpark.connect.udtf.compatibility_mode",
@@ -104,7 +108,16 @@ REDACTED_PLAN_SUFFIXES = [
104
108
  ]
105
109
 
106
110
 
107
- def _basic_telemetry_data() -> Dict:
111
+ @dataclass
112
+ class TelemetryMessage:
113
+ """Container for telemetry messages in the processing queue."""
114
+
115
+ message: dict
116
+ timestamp: int
117
+ is_warning: bool
118
+
119
+
120
+ def _basic_telemetry_data() -> dict:
108
121
  return {
109
122
  **STATIC_TELEMETRY_DATA,
110
123
  TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
@@ -121,9 +134,11 @@ def safe(func):
121
134
  def wrap(*args, **kwargs):
122
135
  try:
123
136
  func(*args, **kwargs)
124
- except Exception:
125
- # We don't really care if telemetry fails, just want to be safe for the user
126
- logger.warning(f"Telemetry operation failed: {func}", exc_info=True)
137
+ except Exception as e:
138
+ # report failed operation to telemetry
139
+ telemetry.send_warning_msg(
140
+ f"Telemetry operation {func} failed due to exception", e
141
+ )
127
142
 
128
143
  return wrap
129
144
 
@@ -289,10 +304,7 @@ class Telemetry:
289
304
 
290
305
  self._request_summary.set(summary)
291
306
 
292
- if hasattr(request, "plan"):
293
- summary["query_plan"] = _protobuf_to_json_with_redaction(
294
- request.plan, REDACTED_PLAN_SUFFIXES
295
- )
307
+ _set_query_plan(request, summary)
296
308
 
297
309
  def _not_in_request(self):
298
310
  # we don't want to add things to the summary if it's not initialized
@@ -335,6 +347,11 @@ class Telemetry:
335
347
  summary["error_message"] = str(e)
336
348
  summary["error_type"] = type(e).__name__
337
349
 
350
+ if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
351
+ summary["error_code"] = ErrorCodes.INTERNAL_ERROR
352
+ else:
353
+ summary["error_code"] = e.custom_error_code
354
+
338
355
  error_location = _error_location(e)
339
356
  if error_location:
340
357
  summary["error_location"] = error_location
@@ -426,6 +443,63 @@ class Telemetry:
426
443
 
427
444
  summary["internal_queries"] += 1
428
445
 
446
+ @safe
447
+ def report_describe_query_cache_lookup(self):
448
+ """Report a describe query cache lookup."""
449
+ if self._not_in_request():
450
+ return
451
+
452
+ summary = self._request_summary.get()
453
+
454
+ if "describe_cache_lookups" not in summary:
455
+ summary["describe_cache_lookups"] = 0
456
+
457
+ summary["describe_cache_lookups"] += 1
458
+
459
+ @safe
460
+ def report_describe_query_cache_hit(self):
461
+ """Report a describe query cache hit."""
462
+ if self._not_in_request():
463
+ return
464
+
465
+ summary = self._request_summary.get()
466
+
467
+ if "describe_cache_hits" not in summary:
468
+ summary["describe_cache_hits"] = 0
469
+
470
+ summary["describe_cache_hits"] += 1
471
+
472
+ @safe
473
+ def report_describe_query_cache_expired(self, expired_by: float):
474
+ """Report a describe query cache hit."""
475
+ if self._not_in_request():
476
+ return
477
+
478
+ summary = self._request_summary.get()
479
+
480
+ if "describe_cache_expired" not in summary:
481
+ summary["describe_cache_expired"] = 0
482
+
483
+ summary["describe_cache_expired"] += 1
484
+
485
+ if "describe_cache_expired_by" not in summary:
486
+ summary["describe_cache_expired_by"] = []
487
+
488
+ summary["describe_cache_expired_by"].append(expired_by)
489
+
490
+ @safe
491
+ def report_describe_query_cache_clear(self):
492
+ """Report a describe query cache clear."""
493
+ if self._not_in_request():
494
+ return
495
+
496
+ summary = self._request_summary.get()
497
+
498
+ if "describe_cache_cleared" not in summary:
499
+ summary["describe_cache_cleared"] = 0
500
+
501
+ summary["describe_cache_cleared"] += 1
502
+
429
503
  @safe
430
504
  def report_udf_usage(self, udf_name: str):
431
505
  if self._not_in_request():
@@ -472,8 +546,8 @@ class Telemetry:
472
546
  @safe
473
547
  def send_request_summary_telemetry(self):
474
548
  if self._not_in_request():
475
- logger.warning(
476
- "Truing to send request summary telemetry without initializing it"
549
+ self.send_warning_msg(
550
+ "Trying to send request summary telemetry without initializing it"
477
551
  )
478
552
  return
479
553
 
@@ -485,14 +559,56 @@ class Telemetry:
485
559
  }
486
560
  self._send(message)
487
561
 
488
- def _send(self, msg: Dict) -> None:
562
+ def send_warning_msg(self, msg: str, e: Exception = None) -> None:
563
+ # using this within @safe decorator may result in recursive loop
564
+ try:
565
+ message = self._build_warning_message(msg, e)
566
+ if not message:
567
+ return
568
+
569
+ self._send(message, is_warning=True)
570
+ except Exception:
571
+ # if there's an exception here, there's nothing we can really do about it
572
+ pass
573
+
574
+ def _build_warning_message(self, warning_msg: str, e: Exception = None) -> dict:
575
+ try:
576
+ data = {"warning_message": warning_msg}
577
+ if e is not None:
578
+ data["exception"] = repr(e)
579
+
580
+ # add session and operation id if available
581
+ spark_session_id = self._request_summary.get().get("spark_session_id", None)
582
+ if spark_session_id is not None:
583
+ data["spark_session_id"] = spark_session_id
584
+
585
+ spark_operation_id = self._request_summary.get().get(
586
+ "spark_operation_id", None
587
+ )
588
+ if spark_operation_id is not None:
589
+ data["spark_operation_id"] = spark_operation_id
590
+
591
+ message = {
592
+ **_basic_telemetry_data(),
593
+ TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
594
+ TelemetryType.EVENT_TYPE.value: EventType.WARNING.value,
595
+ TelemetryField.KEY_DATA.value: data,
596
+ }
597
+ return message
598
+ except Exception:
599
+ return {}
600
+
601
+ def _send(self, msg: dict, is_warning: bool = False) -> None:
489
602
  """Queue a telemetry message for asynchronous processing."""
490
603
  if not self._is_enabled:
491
604
  return
492
605
 
493
606
  timestamp = get_time_millis()
494
607
  try:
495
- self._message_queue.put_nowait((msg, timestamp))
608
+ telemetry_msg = TelemetryMessage(
609
+ message=msg, timestamp=timestamp, is_warning=is_warning
610
+ )
611
+ self._message_queue.put_nowait(telemetry_msg)
496
612
  except queue.Full:
497
613
  # If queue is full, drop the message to avoid blocking
498
614
  logger.warning("Telemetry queue is full, dropping message")
@@ -510,13 +626,16 @@ class Telemetry:
510
626
  while True:
511
627
  try:
512
628
  # block to allow the GIL to switch threads
513
- message, timestamp = self._message_queue.get()
514
- if timestamp is None and message is None:
515
- # shutdown
629
+ telemetry_msg = self._message_queue.get()
630
+ if telemetry_msg is None:
631
+ # shutdown signal
516
632
  break
517
- self._sink.add_telemetry_data(message, timestamp)
518
- except Exception:
519
- logger.warning("Failed to add telemetry message to sink", exc_info=True)
633
+ self._sink.add_telemetry_data(
634
+ telemetry_msg.message, telemetry_msg.timestamp
635
+ )
636
+ except Exception as e:
637
+ if not telemetry_msg.is_warning:
638
+ self.send_warning_msg("Failed to add telemetry message to sink", e)
520
639
  finally:
521
640
  self._message_queue.task_done()
522
641
 
@@ -529,7 +648,7 @@ class Telemetry:
529
648
  return
530
649
 
531
650
  try:
532
- self._message_queue.put_nowait((None, None))
651
+ self._message_queue.put_nowait(None)
533
652
  # Wait for worker thread to finish
534
653
  self._worker_thread.join(timeout=3.0)
535
654
  except Exception:
@@ -538,7 +657,7 @@ class Telemetry:
538
657
  )
539
658
 
540
659
 
541
- def _error_location(e: Exception) -> Dict | None:
660
+ def _error_location(e: Exception) -> dict | None:
542
661
  """
543
662
  Inspect the exception traceback and extract the file name, line number, and function name
544
663
  from the last frame (the one that raised the exception).
@@ -619,7 +738,7 @@ def _protobuf_to_json_with_redaction(
619
738
  """Recursively convert protobuf message to dict"""
620
739
 
621
740
  if not isinstance(msg, google.protobuf.message.Message):
622
- logger.warning("Expected a protobuf message, got: %s", type(msg))
741
+ telemetry.send_warning_msg(f"Expected a protobuf message, got: {type(msg)}")
623
742
  return {}
624
743
 
625
744
  result = {}
@@ -644,6 +763,28 @@ def _protobuf_to_json_with_redaction(
644
763
  )
645
764
 
646
765
 
766
+ def _set_query_plan(request: google.protobuf.message.Message, summary: dict) -> None:
767
+ if isinstance(request, proto_base.ExecutePlanRequest):
768
+ # ExecutePlanRequest has plan at top level
769
+ if hasattr(request, "plan"):
770
+ summary["query_plan"] = (
771
+ _protobuf_to_json_with_redaction(request.plan, REDACTED_PLAN_SUFFIXES),
772
+ )
773
+
774
+ elif isinstance(request, proto_base.AnalyzePlanRequest):
775
+ # AnalyzePlanRequest has plan under oneof analyze
776
+ analyze_type = request.WhichOneof("analyze")
777
+ if not analyze_type:
778
+ return
779
+
780
+ summary["analyze_type"] = analyze_type
781
+ analyze_field = getattr(request, analyze_type)
782
+ if hasattr(analyze_field, "plan"):
783
+ summary["query_plan"] = _protobuf_to_json_with_redaction(
784
+ analyze_field.plan, REDACTED_PLAN_SUFFIXES
785
+ )
786
+
787
+
647
788
  # global telemetry client
648
789
  telemetry = Telemetry(is_enabled="SNOWPARK_CONNECT_DISABLE_TELEMETRY" not in os.environ)
649
790
 
@@ -0,0 +1,67 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from typing import Optional, Tuple
6
+
7
+ from pyspark.errors import AnalysisException
8
+
9
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
10
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
11
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
12
+ from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
13
+ from snowflake.snowpark_connect.utils.context import get_spark_session_id
14
+
15
+ _temp_views = SynchronizedDict[Tuple[str, str], DataFrameContainer]()
16
+
17
+
18
+ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None:
19
+ normalized_name = _normalize(name)
20
+ current_session_id = get_spark_session_id()
21
+ for key in list(_temp_views.keys()):
22
+ if _normalize(key[0]) == normalized_name and key[1] == current_session_id:
23
+ if replace:
24
+ _temp_views.remove(key)
25
+ break
26
+ else:
27
+ exception = AnalysisException(
28
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
29
+ )
30
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
31
+ raise exception
32
+
33
+ _temp_views[(name, current_session_id)] = df
34
+
35
+
36
+ def unregister_temp_view(name: str) -> bool:
37
+ normalized_name = _normalize(name)
38
+
39
+ for key in _temp_views.keys():
40
+ normalized_key = _normalize(key[0])
41
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
42
+ pop_result = _temp_views.remove(key)
43
+ return pop_result is not None
44
+ return False
45
+
46
+
47
+ def get_temp_view(name: str) -> Optional[DataFrameContainer]:
48
+ normalized_name = _normalize(name)
49
+ for key in _temp_views.keys():
50
+ normalized_key = _normalize(key[0])
51
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
52
+ return _temp_views.get(key)
53
+ return None
54
+
55
+
56
+ def get_temp_view_normalized_names() -> list[str]:
57
+ return [
58
+ _normalize(key[0])
59
+ for key in _temp_views.keys()
60
+ if key[1] == get_spark_session_id()
61
+ ]
62
+
63
+
64
+ def _normalize(name: str) -> str:
65
+ from snowflake.snowpark_connect.config import global_config
66
+
67
+ return name if global_config.spark_sql_caseSensitive else name.lower()
@@ -0,0 +1,334 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+ import re
5
+ import uuid
6
+ from collections import Counter
7
+ from typing import Optional, Tuple
8
+
9
+ from pyspark.errors import AnalysisException
10
+ from pyspark.errors.exceptions.base import TempTableAlreadyExistsException
11
+
12
+ from snowflake.snowpark import DataFrame, Session
13
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
14
+ from snowflake.snowpark.exceptions import SnowparkSQLException
15
+ from snowflake.snowpark.types import StructField, StructType
16
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap, ColumnNames
17
+ from snowflake.snowpark_connect.config import (
18
+ global_config,
19
+ sessions_config,
20
+ should_create_temporary_view_in_snowflake,
21
+ )
22
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
23
+ from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
24
+ from snowflake.snowpark_connect.utils.context import get_spark_session_id
25
+ from snowflake.snowpark_connect.utils.identifiers import (
26
+ spark_to_sf_single_id,
27
+ spark_to_sf_single_id_with_unquoting,
28
+ )
29
+
30
+ _INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
31
+
32
+ _CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
33
+
34
+ _temp_views = SynchronizedDict[Tuple[str, str], DataFrameContainer]()
35
+
36
+
37
+ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None:
38
+ normalized_name = _normalize(name)
39
+ current_session_id = get_spark_session_id()
40
+ for key in list(_temp_views.keys()):
41
+ if _normalize(key[0]) == normalized_name and key[1] == current_session_id:
42
+ if replace:
43
+ _temp_views.remove(key)
44
+ break
45
+ else:
46
+ raise TempTableAlreadyExistsException(
47
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
48
+ )
49
+
50
+ _temp_views[(name, current_session_id)] = df
51
+
52
+
53
+ def unregister_temp_view(name: str) -> bool:
54
+ normalized_name = _normalize(name)
55
+
56
+ for key in _temp_views.keys():
57
+ normalized_key = _normalize(key[0])
58
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
59
+ pop_result = _temp_views.remove(key)
60
+ return pop_result is not None
61
+ return False
62
+
63
+
64
+ def get_temp_view(name: str) -> Optional[DataFrameContainer]:
65
+ normalized_name = _normalize(name)
66
+ for key in _temp_views.keys():
67
+ normalized_key = _normalize(key[0])
68
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
69
+ return _temp_views.get(key)
70
+ return None
71
+
72
+
73
+ def get_temp_view_normalized_names() -> list[str]:
74
+ return [
75
+ _normalize(key[0])
76
+ for key in _temp_views.keys()
77
+ if key[1] == get_spark_session_id()
78
+ ]
79
+
80
+
81
+ def _normalize(name: str) -> str:
82
+ return name if global_config.spark_sql_caseSensitive else name.lower()
83
+
84
+
85
+ def assert_snowflake_view_does_not_exist_in_cache(name: str, replace: bool):
86
+ temp_view = get_temp_view(name)
87
+ if temp_view is not None and not replace:
88
+ raise AnalysisException(
89
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
90
+ )
91
+
92
+
93
+ def assert_cached_view_does_not_exist_in_snowflake(
94
+ snowflake_view_name: list[str], replace: bool
95
+ ):
96
+ if len(snowflake_view_name) == 1:
97
+ name = unquote_if_quoted(snowflake_view_name[0])
98
+ sql_statement = f"SHOW VIEWS LIKE '{name}'"
99
+ else:
100
+ name = unquote_if_quoted(snowflake_view_name[1])
101
+ sql_statement = f"SHOW VIEWS LIKE '{name}' IN SCHEMA {snowflake_view_name[0]}"
102
+ if (
103
+ not replace
104
+ and len(Session.get_active_session().sql(sql_statement).collect()) > 0
105
+ ):
106
+ raise AnalysisException(
107
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
108
+ )
109
+
110
+
111
+ def create_temporary_view_from_dataframe(
112
+ input_df_container: DataFrameContainer,
113
+ request_view_name: str,
114
+ is_global: bool,
115
+ replace: bool,
116
+ ) -> None:
117
+ input_df = input_df_container.dataframe
118
+
119
+ if is_global:
120
+ view_name = [global_config.spark_sql_globalTempDatabase, request_view_name]
121
+ else:
122
+ view_name = [request_view_name]
123
+ case_sensitive_view_name = ".".join(
124
+ [spark_to_sf_single_id_with_unquoting(part) for part in view_name]
125
+ )
126
+ snowflake_view_name = [
127
+ spark_to_sf_single_id_with_unquoting(part, True) for part in view_name
128
+ ]
129
+
130
+ if should_create_temporary_view_in_snowflake():
131
+ _create_snowflake_temporary_view(
132
+ input_df_container, snowflake_view_name, case_sensitive_view_name, replace
133
+ )
134
+ else:
135
+ store_temporary_view_as_dataframe(
136
+ input_df,
137
+ input_df_container.column_map,
138
+ input_df_container.column_map.get_spark_columns(),
139
+ input_df_container.column_map.get_snowpark_columns(),
140
+ case_sensitive_view_name,
141
+ snowflake_view_name,
142
+ replace,
143
+ )
144
+
145
+
146
+ def _create_snowflake_temporary_view(
147
+ input_df_container: DataFrameContainer,
148
+ snowflake_view_name: list[str],
149
+ stored_view_name: str,
150
+ replace: bool,
151
+ ):
152
+ column_map = input_df_container.column_map
153
+ input_df = input_df_container.dataframe
154
+
155
+ session_config = sessions_config[get_spark_session_id()]
156
+ duplicate_column_names_handling_mode = session_config[
157
+ "snowpark.connect.views.duplicate_column_names_handling_mode"
158
+ ]
159
+
160
+ # rename columns to match spark names
161
+ if duplicate_column_names_handling_mode == "rename":
162
+ # deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
163
+ rename_map = _create_column_rename_map(column_map.columns, True)
164
+ input_df = input_df.rename(rename_map)
165
+ elif duplicate_column_names_handling_mode == "drop":
166
+ # Drop duplicate column names by removing all but the first occurrence.
167
+ duplicated_columns, remaining_columns = _find_duplicated_columns(
168
+ column_map.columns
169
+ )
170
+ rename_map = _create_column_rename_map(remaining_columns, False)
171
+ if len(duplicated_columns) > 0:
172
+ input_df = input_df.drop(*duplicated_columns)
173
+ input_df = input_df.rename(rename_map)
174
+ else:
175
+ # rename columns without deduplication
176
+ rename_map = _create_column_rename_map(column_map.columns, False)
177
+ input_df = input_df.rename(rename_map)
178
+
179
+ try:
180
+ create_snowflake_temporary_view(
181
+ input_df, snowflake_view_name, stored_view_name, replace
182
+ )
183
+ except SnowparkSQLException as exc:
184
+ if _is_error_caused_by_view_referencing_itself(exc) and replace:
185
+ # This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
186
+ _create_chained_view(input_df, snowflake_view_name)
187
+ else:
188
+ raise
189
+
190
+
191
+ def _create_column_rename_map(
192
+ columns: list[ColumnNames], rename_duplicated: bool
193
+ ) -> dict:
194
+ if rename_duplicated is False:
195
+ # if we are not renaming duplicated columns, we can just return the original names
196
+ return {
197
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
198
+ for col in columns
199
+ }
200
+
201
+ column_counts = Counter()
202
+ not_renamed_cols = []
203
+ renamed_cols = []
204
+
205
+ for col in columns:
206
+ new_column_name = col.spark_name
207
+ normalized_name = new_column_name.lower()
208
+ column_counts[normalized_name] += 1
209
+
210
+ if column_counts[normalized_name] > 1:
211
+ new_column_name = (
212
+ f"{new_column_name}_DEDUP_{column_counts[normalized_name] - 1}"
213
+ )
214
+ renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
215
+ else:
216
+ not_renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
217
+
218
+ if len(renamed_cols) == 0:
219
+ return {
220
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
221
+ for col in not_renamed_cols
222
+ }
223
+
224
+ # we need to make sure that we don't have duplicated names after renaming
225
+ # columns that were not renamed in this iteration should have priority over renamed duplicates
226
+ return _create_column_rename_map(not_renamed_cols + renamed_cols, True)
227
+
228
+
229
+ def _find_duplicated_columns(
230
+ columns: list[ColumnNames],
231
+ ) -> (list[str], list[ColumnNames]):
232
+ duplicates = []
233
+ remaining_columns = []
234
+ seen = set()
235
+ for col in columns:
236
+ if col.spark_name in seen:
237
+ duplicates.append(col.snowpark_name)
238
+ else:
239
+ seen.add(col.spark_name)
240
+ remaining_columns.append(col)
241
+ return duplicates, remaining_columns
242
+
243
+
244
+ def _generate_random_builtin_view_name() -> str:
245
+ return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
246
+
247
+
248
+ def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
249
+ return "view definition refers to view being defined" in str(exc).lower()
250
+
251
+
252
+ def _create_chained_view(input_df: DataFrame, view_name: list[str]) -> None:
253
+ """
254
+ In order to create a view, which references itself, Spark would here take the previous
255
+ definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
256
+ as a workaround, we create a chain of internal views instead. This function:
257
+ 1. Renames previous definition of A to some internal name (instead of deleting).
258
+ 2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
259
+ """
260
+
261
+ session = Session.get_active_session()
262
+
263
+ view_name = ".".join(view_name)
264
+
265
+ tmp_name = _generate_random_builtin_view_name()
266
+ old_name_replacement = _generate_random_builtin_view_name()
267
+
268
+ input_df.create_or_replace_temp_view(tmp_name)
269
+
270
+ session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
271
+
272
+ ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
273
+
274
+ ddl = ddl.replace(view_name, old_name_replacement)
275
+
276
+ # GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
277
+ ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
278
+
279
+ session.sql(ddl).collect()
280
+
281
+ session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
282
+
283
+
284
+ def store_temporary_view_as_dataframe(
285
+ input_df: DataFrame,
286
+ parent_column_map: ColumnNameMap,
287
+ spark_columns: list[str],
288
+ snowpark_columns: list[str],
289
+ view_name: str,
290
+ snowflake_view_name: list[str],
291
+ replace: bool,
292
+ ):
293
+ assert_cached_view_does_not_exist_in_snowflake(snowflake_view_name, replace)
294
+ schema = StructType(
295
+ [StructField(field.name, field.datatype) for field in input_df.schema.fields]
296
+ )
297
+ input_df_container = DataFrameContainer.create_with_column_mapping(
298
+ dataframe=input_df,
299
+ spark_column_names=spark_columns,
300
+ snowpark_column_names=snowpark_columns,
301
+ parent_column_name_map=parent_column_map,
302
+ cached_schema_getter=lambda: schema,
303
+ )
304
+
305
+ if replace:
306
+ try:
307
+ Session.get_active_session().sql(
308
+ "DROP VIEW IF EXISTS " + ".".join(snowflake_view_name)
309
+ ).collect()
310
+ except SnowparkSQLException as e:
311
+ # Spark allows for both table and temporary view to exist with the same name.
312
+ # Snowflake throws exception if we try to drop the view with doesn't exist but a table with the same name exists.
313
+ if (
314
+ "SQL compilation error: Object found is of type 'TABLE', not specified type 'VIEW'"
315
+ not in str(e)
316
+ ):
317
+ raise
318
+
319
+ register_temp_view(view_name, input_df_container, replace)
320
+
321
+
322
+ def create_snowflake_temporary_view(
323
+ input_df: DataFrame,
324
+ snowflake_view_name: list[str],
325
+ stored_view_name: str,
326
+ replace: bool,
327
+ comment: Optional[str] = None,
328
+ ) -> None:
329
+ assert_snowflake_view_does_not_exist_in_cache(stored_view_name, replace)
330
+ if replace:
331
+ unregister_temp_view(stored_view_name)
332
+ input_df.create_or_replace_temp_view(snowflake_view_name, comment=comment)
333
+ else:
334
+ input_df.create_temp_view(snowflake_view_name, comment=comment)