snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (84) hide show
  1. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
  2. snowflake/snowpark_connect/column_name_handler.py +6 -65
  3. snowflake/snowpark_connect/config.py +47 -17
  4. snowflake/snowpark_connect/dataframe_container.py +242 -0
  5. snowflake/snowpark_connect/error/error_utils.py +25 -0
  6. snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
  7. snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
  8. snowflake/snowpark_connect/expression/map_extension.py +2 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +4 -4
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
  13. snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
  14. snowflake/snowpark_connect/expression/typer.py +6 -6
  15. snowflake/snowpark_connect/proto/control_pb2.py +17 -16
  16. snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
  17. snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
  18. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
  19. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
  20. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
  21. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
  22. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
  23. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
  24. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
  25. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
  26. snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
  27. snowflake/snowpark_connect/relation/map_catalog.py +2 -2
  28. snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
  29. snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
  30. snowflake/snowpark_connect/relation/map_extension.py +81 -56
  31. snowflake/snowpark_connect/relation/map_join.py +72 -63
  32. snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
  33. snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
  34. snowflake/snowpark_connect/relation/map_relation.py +22 -16
  35. snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
  36. snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
  37. snowflake/snowpark_connect/relation/map_show_string.py +42 -5
  38. snowflake/snowpark_connect/relation/map_sql.py +141 -237
  39. snowflake/snowpark_connect/relation/map_stats.py +88 -39
  40. snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
  41. snowflake/snowpark_connect/relation/map_udtf.py +10 -13
  42. snowflake/snowpark_connect/relation/read/map_read.py +8 -3
  43. snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
  44. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
  45. snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
  46. snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
  47. snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
  48. snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
  49. snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
  50. snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
  51. snowflake/snowpark_connect/relation/utils.py +11 -5
  52. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
  53. snowflake/snowpark_connect/relation/write/map_write.py +259 -56
  54. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
  55. snowflake/snowpark_connect/server.py +43 -4
  56. snowflake/snowpark_connect/type_mapping.py +6 -23
  57. snowflake/snowpark_connect/utils/cache.py +27 -22
  58. snowflake/snowpark_connect/utils/context.py +33 -17
  59. snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
  60. snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
  61. snowflake/snowpark_connect/utils/session.py +41 -38
  62. snowflake/snowpark_connect/utils/telemetry.py +214 -63
  63. snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
  64. snowflake/snowpark_connect/version.py +1 -1
  65. snowflake/snowpark_decoder/__init__.py +0 -0
  66. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
  67. snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
  68. snowflake/snowpark_decoder/dp_session.py +111 -0
  69. snowflake/snowpark_decoder/spark_decoder.py +76 -0
  70. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
  71. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
  72. snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
  73. snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
  74. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
  75. spark/__init__.py +0 -0
  76. spark/connect/__init__.py +0 -0
  77. spark/connect/envelope_pb2.py +31 -0
  78. spark/connect/envelope_pb2.pyi +46 -0
  79. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  80. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
  81. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
  82. {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
  83. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
  84. {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,19 +1,22 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
4
+ import functools
5
5
  import json
6
6
  import os
7
7
  import queue
8
8
  import threading
9
+ import uuid
9
10
  from abc import ABC, abstractmethod
10
11
  from collections import defaultdict
12
+ from collections.abc import Iterable
11
13
  from contextvars import ContextVar
12
14
  from enum import Enum, unique
13
15
  from typing import Dict
14
16
 
15
17
  import google.protobuf.message
16
18
 
19
+ from snowflake.connector.cursor import SnowflakeCursor
17
20
  from snowflake.connector.telemetry import (
18
21
  TelemetryClient as PCTelemetryClient,
19
22
  TelemetryData as PCTelemetryData,
@@ -21,7 +24,6 @@ from snowflake.connector.telemetry import (
21
24
  )
22
25
  from snowflake.connector.time_util import get_time_millis
23
26
  from snowflake.snowpark import Session
24
- from snowflake.snowpark._internal.telemetry import safe_telemetry
25
27
  from snowflake.snowpark._internal.utils import get_os_name, get_python_version
26
28
  from snowflake.snowpark.version import VERSION as snowpark_version
27
29
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
@@ -43,6 +45,7 @@ class TelemetryField(Enum):
43
45
  KEY_OS = "operating_system"
44
46
  KEY_DATA = "data"
45
47
  KEY_START_TIME = "start_time"
48
+ KEY_EVENT_ID = "event_id"
46
49
 
47
50
 
48
51
  class TelemetryType(Enum):
@@ -86,8 +89,7 @@ RECORDED_CONFIG_KEYS = {
86
89
  "spark.sql.mapKeyDedupPolicy",
87
90
  "snowpark.connect.sql.passthrough",
88
91
  "snowpark.connect.iceberg.external_volume",
89
- "snowpark.connect.auto-uppercase.ddl",
90
- "snowpark.connect.auto-uppercase.dml",
92
+ "snowpark.connect.sql.identifiers.auto-uppercase",
91
93
  "snowpark.connect.udtf.compatibility_mode",
92
94
  "snowpark.connect.views.duplicate_column_names_handling_mode",
93
95
  }
@@ -108,7 +110,34 @@ REDACTED_PLAN_SUFFIXES = [
108
110
  ]
109
111
 
110
112
 
113
+ def _basic_telemetry_data() -> Dict:
114
+ return {
115
+ **STATIC_TELEMETRY_DATA,
116
+ TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
117
+ }
118
+
119
+
120
+ def safe(func):
121
+ """
122
+ Decorator to safely execute telemetry functions, catching and logging exceptions
123
+ without affecting the main application flow.
124
+ """
125
+
126
+ @functools.wraps(func)
127
+ def wrap(*args, **kwargs):
128
+ try:
129
+ func(*args, **kwargs)
130
+ except Exception:
131
+ # We don't really care if telemetry fails, just want to be safe for the user
132
+ logger.warning(f"Telemetry operation failed: {func}", exc_info=True)
133
+
134
+ return wrap
135
+
136
+
111
137
  class TelemetrySink(ABC):
138
+ MAX_BUFFER_ELEMENTS = 20
139
+ MAX_WAIT_MS = 10000 # 10 seconds
140
+
112
141
  @abstractmethod
113
142
  def add_telemetry_data(self, message: dict, timestamp: int) -> None:
114
143
  pass
@@ -129,23 +158,44 @@ class NoOpTelemetrySink(TelemetrySink):
129
158
  class ClientTelemetrySink(TelemetrySink):
130
159
  def __init__(self, telemetry_client: PCTelemetryClient) -> None:
131
160
  self._telemetry_client = telemetry_client
161
+ self._lock = threading.Lock()
162
+ self._reset()
132
163
 
133
164
  def add_telemetry_data(self, message: dict, timestamp: int) -> None:
134
165
  telemetry_data = PCTelemetryData(message=message, timestamp=timestamp)
135
166
  self._telemetry_client.try_add_log_to_batch(telemetry_data)
167
+ with self._lock:
168
+ self._events_since_last_flush += 1
169
+ # flush more often than the underlying telemetry client
170
+ if self._should_flush():
171
+ self.flush()
136
172
 
137
173
  def flush(self) -> None:
174
+ with self._lock:
175
+ self._reset()
138
176
  self._telemetry_client.send_batch()
139
177
 
178
+ def _should_flush(self) -> bool:
179
+ current_time = get_time_millis()
180
+
181
+ return (
182
+ self._events_since_last_flush >= TelemetrySink.MAX_BUFFER_ELEMENTS
183
+ or (current_time - self._last_flush_time) >= TelemetrySink.MAX_WAIT_MS
184
+ )
185
+
186
+ def _reset(self):
187
+ self._events_since_last_flush = 0
188
+ self._last_flush_time = get_time_millis()
189
+
140
190
 
141
191
  class QueryTelemetrySink(TelemetrySink):
142
192
 
143
- MAX_BUFFER_SIZE = 100 * 1024 # 100KB
144
- MAX_WAIT_MS = 10000 # 10 seconds
193
+ MAX_BUFFER_SIZE = 20 * 1024 # 20KB
145
194
  TELEMETRY_JOB_ID = "43e72d9b-56d0-4cdb-a615-6b5b5059d6df"
146
195
 
147
196
  def __init__(self, session: Session) -> None:
148
197
  self._session = session
198
+ self._lock = threading.Lock()
149
199
  self._reset()
150
200
 
151
201
  def add_telemetry_data(self, message: dict, timestamp: int) -> None:
@@ -153,31 +203,37 @@ class QueryTelemetrySink(TelemetrySink):
153
203
 
154
204
  # stringify entry, and escape single quotes
155
205
  entry_str = json.dumps(telemetry_entry).replace("'", "''")
156
- self._buffer.append(entry_str)
157
- self._buffer_size += len(entry_str)
158
206
 
159
- current_time = get_time_millis()
160
- if (
161
- self._buffer_size > QueryTelemetrySink.MAX_BUFFER_SIZE
162
- or (current_time - self._last_export_time) > QueryTelemetrySink.MAX_WAIT_MS
163
- ):
207
+ with self._lock:
208
+ self._buffer.append(entry_str)
209
+ self._buffer_size += len(entry_str)
210
+
211
+ if self._should_flush():
164
212
  self.flush()
165
213
 
166
214
  def flush(self) -> None:
167
- if not self._buffer:
168
- return
215
+ with self._lock:
216
+ if not self._buffer:
217
+ return
218
+ # prefix query with a unique identifier for easier tracking
219
+ query = f"select '{self.TELEMETRY_JOB_ID}' as scos_telemetry_export, '[{','.join(self._buffer)}]'"
220
+ self._reset()
169
221
 
170
- # prefix query with a unique identifier for easier tracking
171
- query = f"select '{self.TELEMETRY_JOB_ID}' as scos_telemetry_export, '[{','.join(self._buffer)}]'"
172
222
  self._session.sql(query).collect_nowait()
173
223
 
174
- self._reset()
175
-
176
224
  def _reset(self) -> None:
177
225
  self._buffer = []
178
226
  self._buffer_size = 0
179
227
  self._last_export_time = get_time_millis()
180
228
 
229
+ def _should_flush(self):
230
+ current_time = get_time_millis()
231
+ return (
232
+ self._buffer_size >= QueryTelemetrySink.MAX_BUFFER_SIZE
233
+ or len(self._buffer) >= TelemetrySink.MAX_BUFFER_ELEMENTS
234
+ or (current_time - self._last_export_time) >= TelemetrySink.MAX_WAIT_MS
235
+ )
236
+
181
237
 
182
238
  class Telemetry:
183
239
  def __init__(self, is_enabled=True) -> None:
@@ -186,6 +242,8 @@ class Telemetry:
186
242
  "request_summary", default={}
187
243
  )
188
244
  self._is_enabled = is_enabled
245
+ self._is_initialized = False
246
+ self._lock = threading.Lock()
189
247
 
190
248
  # Async processing setup
191
249
  self._message_queue = queue.Queue(maxsize=10000)
@@ -203,6 +261,12 @@ class Telemetry:
203
261
  if not self._is_enabled:
204
262
  return
205
263
 
264
+ with self._lock:
265
+ if self._is_initialized:
266
+ logger.warning("Telemetry is already initialized")
267
+ return
268
+ self._is_initialized = True
269
+
206
270
  telemetry = getattr(session._conn._conn, "_telemetry", None)
207
271
  if telemetry is None:
208
272
  # no telemetry client available, so we export with queries
@@ -211,8 +275,9 @@ class Telemetry:
211
275
  self._sink = ClientTelemetrySink(telemetry)
212
276
 
213
277
  self._start_worker_thread()
278
+ logger.info(f"Telemetry initialized with {type(self._sink)}")
214
279
 
215
- @safe_telemetry
280
+ @safe
216
281
  def initialize_request_summary(
217
282
  self, request: google.protobuf.message.Message
218
283
  ) -> None:
@@ -235,8 +300,29 @@ class Telemetry:
235
300
  request.plan, REDACTED_PLAN_SUFFIXES
236
301
  )
237
302
 
238
- @safe_telemetry
303
+ def _not_in_request(self):
304
+ # we don't want to add things to the summary if it's not initialized
305
+ return "created_on" not in self._request_summary.get()
306
+
307
+ @safe
308
+ def report_parsed_sql_plan(self, plan: google.protobuf.message.Message) -> None:
309
+ if self._not_in_request():
310
+ return
311
+
312
+ summary = self._request_summary.get()
313
+
314
+ if "parsed_sql_plans" not in summary:
315
+ summary["parsed_sql_plans"] = []
316
+
317
+ summary["parsed_sql_plans"].append(
318
+ _protobuf_to_json_with_redaction(plan, REDACTED_PLAN_SUFFIXES)
319
+ )
320
+
321
+ @safe
239
322
  def report_function_usage(self, function_name: str) -> None:
323
+ if self._not_in_request():
324
+ return
325
+
240
326
  summary = self._request_summary.get()
241
327
 
242
328
  if "used_functions" not in summary:
@@ -244,8 +330,11 @@ class Telemetry:
244
330
 
245
331
  summary["used_functions"][function_name] += 1
246
332
 
247
- @safe_telemetry
333
+ @safe
248
334
  def report_request_failure(self, e: Exception) -> None:
335
+ if self._not_in_request():
336
+ return
337
+
249
338
  summary = self._request_summary.get()
250
339
 
251
340
  summary["was_successful"] = False
@@ -256,37 +345,78 @@ class Telemetry:
256
345
  if error_location:
257
346
  summary["error_location"] = error_location
258
347
 
259
- @safe_telemetry
260
- def report_config_set(self, key, value):
348
+ @safe
349
+ def report_config_set(self, pairs: Iterable) -> None:
350
+ if self._not_in_request():
351
+ return
352
+
261
353
  summary = self._request_summary.get()
262
354
 
263
355
  if "config_set" not in summary:
264
356
  summary["config_set"] = []
265
357
 
266
- summary["config_set"].append(
267
- {
268
- "key": key,
269
- "value": value if key in RECORDED_CONFIG_KEYS else "<redacted>",
270
- }
271
- )
358
+ for p in pairs:
359
+ summary["config_set"].append(
360
+ {
361
+ "key": p.key,
362
+ "value": p.value if p.key in RECORDED_CONFIG_KEYS else "<redacted>",
363
+ }
364
+ )
365
+
366
+ @safe
367
+ def report_config_unset(self, keys: Iterable[str]) -> None:
368
+ if self._not_in_request():
369
+ return
272
370
 
273
- @safe_telemetry
274
- def report_config_unset(self, key):
275
371
  summary = self._request_summary.get()
276
372
 
277
373
  if "config_unset" not in summary:
278
374
  summary["config_unset"] = []
279
375
 
280
- summary["config_unset"].append(key)
376
+ summary["config_unset"].extend(keys)
377
+
378
+ @safe
379
+ def report_config_get(self, keys: Iterable[str]) -> None:
380
+ if self._not_in_request():
381
+ return
281
382
 
282
- @safe_telemetry
283
- def report_config_op_type(self, op_type: str):
284
383
  summary = self._request_summary.get()
285
384
 
385
+ if "config_get" not in summary:
386
+ summary["config_get"] = []
387
+
388
+ summary["config_get"].extend(keys)
389
+
390
+ @safe
391
+ def report_config_op_type(self, op_type: str):
392
+ if self._not_in_request():
393
+ return
394
+
395
+ summary = self._request_summary.get()
286
396
  summary["config_op_type"] = op_type
287
397
 
288
- @safe_telemetry
289
- def report_query_id(self, query_id: str):
398
+ @safe
399
+ def report_query(
400
+ self, result: SnowflakeCursor | dict | Exception, **kwargs
401
+ ) -> None:
402
+ if result is None or isinstance(result, dict) or self._not_in_request():
403
+ return
404
+
405
+ # SnowflakeCursor and SQL errors will have sfqid
406
+ # other exceptions will not have it
407
+ # TODO: handle async queries, but filter out telemetry export queries
408
+ qid = getattr(result, "sfqid", None)
409
+
410
+ if qid is None:
411
+ logger.warning("Missing query id in result: %s", result)
412
+
413
+ is_internal = kwargs.get("_is_internal", False)
414
+ if is_internal:
415
+ self._report_internal_query()
416
+ elif qid:
417
+ self._report_query_id(qid)
418
+
419
+ def _report_query_id(self, query_id: str):
290
420
  summary = self._request_summary.get()
291
421
 
292
422
  if "queries" not in summary:
@@ -294,13 +424,19 @@ class Telemetry:
294
424
 
295
425
  summary["queries"].append(query_id)
296
426
 
297
- @safe_telemetry
298
- def report_internal_query(self):
427
+ def _report_internal_query(self):
299
428
  summary = self._request_summary.get()
429
+
430
+ if "internal_queries" not in summary:
431
+ summary["internal_queries"] = 0
432
+
300
433
  summary["internal_queries"] += 1
301
434
 
302
- @safe_telemetry
435
+ @safe
303
436
  def report_udf_usage(self, udf_name: str):
437
+ if self._not_in_request():
438
+ return
439
+
304
440
  summary = self._request_summary.get()
305
441
 
306
442
  if "udf_usage" not in summary:
@@ -308,8 +444,10 @@ class Telemetry:
308
444
 
309
445
  summary["udf_usage"][udf_name] += 1
310
446
 
311
- @safe_telemetry
312
- def report_io(self, op: str, type: str, options: dict | None):
447
+ def _report_io(self, op: str, type: str, options: dict | None):
448
+ if self._not_in_request():
449
+ return
450
+
313
451
  summary = self._request_summary.get()
314
452
 
315
453
  if "io" not in summary:
@@ -322,16 +460,18 @@ class Telemetry:
322
460
 
323
461
  summary["io"].append(io)
324
462
 
463
+ @safe
325
464
  def report_io_read(self, type: str, options: dict | None):
326
- self.report_io("read", type, options)
465
+ self._report_io("read", type, options)
327
466
 
467
+ @safe
328
468
  def report_io_write(self, type: str, options: dict | None):
329
- self.report_io("write", type, options)
469
+ self._report_io("write", type, options)
330
470
 
331
- @safe_telemetry
471
+ @safe
332
472
  def send_server_started_telemetry(self):
333
473
  message = {
334
- **STATIC_TELEMETRY_DATA,
474
+ **_basic_telemetry_data(),
335
475
  TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
336
476
  TelemetryType.EVENT_TYPE.value: EventType.SERVER_STARTED.value,
337
477
  TelemetryField.KEY_DATA.value: {
@@ -340,17 +480,22 @@ class Telemetry:
340
480
  }
341
481
  self._send(message)
342
482
 
343
- @safe_telemetry
483
+ @safe
344
484
  def send_request_summary_telemetry(self):
485
+ if self._not_in_request():
486
+ logger.warning(
487
+ "Truing to send request summary telemetry without initializing it"
488
+ )
489
+ return
490
+
345
491
  summary = self._request_summary.get()
346
492
  message = {
347
- **STATIC_TELEMETRY_DATA,
493
+ **_basic_telemetry_data(),
348
494
  TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_REQUEST_SUMMARY.value,
349
495
  TelemetryField.KEY_DATA.value: summary,
350
496
  }
351
497
  self._send(message)
352
498
 
353
- @safe_telemetry
354
499
  def _send(self, msg: Dict) -> None:
355
500
  """Queue a telemetry message for asynchronous processing."""
356
501
  if not self._is_enabled:
@@ -386,19 +531,6 @@ class Telemetry:
386
531
  finally:
387
532
  self._message_queue.task_done()
388
533
 
389
- # Process any remaining messages
390
- while not self._message_queue.empty():
391
- try:
392
- message, timestamp = self._message_queue.get_nowait()
393
- self._sink.add_telemetry_data(message, timestamp)
394
- self._message_queue.task_done()
395
- except Exception:
396
- logger.warning(
397
- "Failed to add remaining telemetry messages to sink during shutdown",
398
- exc_info=True,
399
- )
400
- break
401
-
402
534
  # Flush the sink
403
535
  self._sink.flush()
404
536
 
@@ -440,6 +572,18 @@ def _error_location(e: Exception) -> Dict | None:
440
572
  }
441
573
 
442
574
 
575
+ def _is_map_field(field_descriptor) -> bool:
576
+ """
577
+ Check if a protobuf field is a map.
578
+ """
579
+ return (
580
+ field_descriptor.label == field_descriptor.LABEL_REPEATED
581
+ and field_descriptor.message_type is not None
582
+ and field_descriptor.message_type.has_options
583
+ and field_descriptor.message_type.GetOptions().map_entry
584
+ )
585
+
586
+
443
587
  def _protobuf_to_json_with_redaction(
444
588
  message: google.protobuf.message.Message, redacted_suffixes: list[str]
445
589
  ) -> dict:
@@ -464,7 +608,9 @@ def _protobuf_to_json_with_redaction(
464
608
  return "<redacted>"
465
609
 
466
610
  # Handle different field types
467
- if field_descriptor.type == field_descriptor.TYPE_MESSAGE:
611
+ if _is_map_field(field_descriptor):
612
+ return dict(value)
613
+ elif field_descriptor.type == field_descriptor.TYPE_MESSAGE:
468
614
  if field_descriptor.label == field_descriptor.LABEL_REPEATED:
469
615
  # Repeated message field
470
616
  return [_protobuf_to_json_recursive(item, field_path) for item in value]
@@ -482,6 +628,11 @@ def _protobuf_to_json_with_redaction(
482
628
  msg: google.protobuf.message.Message, current_path: str = ""
483
629
  ) -> dict:
484
630
  """Recursively convert protobuf message to dict"""
631
+
632
+ if not isinstance(msg, google.protobuf.message.Message):
633
+ logger.warning("Expected a protobuf message, got: %s", type(msg))
634
+ return {}
635
+
485
636
  result = {}
486
637
 
487
638
  # Use ListFields() to get all set fields
@@ -0,0 +1,14 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from snowflake import snowpark
6
+ from snowflake.snowpark_connect.config import global_config
7
+
8
+
9
+ def get_python_udxf_import_files(session: snowpark.Session) -> str:
10
+ config_imports = global_config.get("snowpark.connect.udf.imports", "")
11
+ config_imports = config_imports.strip("[] ").split(",") if config_imports else []
12
+ imports = {*session._python_files, *session._import_files, *config_imports}
13
+
14
+ return ",".join([file for file in imports if file])
@@ -3,4 +3,4 @@
3
3
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
4
4
  #
5
5
 
6
- VERSION = (0, 20, 2)
6
+ VERSION = (0, 22, 1)
File without changes
@@ -0,0 +1,36 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: DataframeProcessorMsg.proto
4
+ """Generated protocol buffer code."""
5
+ from google.protobuf.internal import builder as _builder
6
+ from google.protobuf import descriptor as _descriptor
7
+ from google.protobuf import descriptor_pool as _descriptor_pool
8
+ from google.protobuf import symbol_database as _symbol_database
9
+ # @@protoc_insertion_point(imports)
10
+
11
+ _sym_db = _symbol_database.Default()
12
+
13
+
14
+ from google.protobuf import descriptor_pb2 as google_dot_protobuf_dot_descriptor__pb2
15
+ from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
16
+
17
+
18
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x44\x61taframeProcessorMsg.proto\x12\x13\x64\x61taframe_processor\x1a google/protobuf/descriptor.proto\x1a\x19google/protobuf/any.proto\"\x80\x01\n\x07Request\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12:\n\x0e\x64\x61taframe_type\x18\x02 \x01(\x0e\x32\".dataframe_processor.DataframeType\x12%\n\x07payload\x18\x03 \x01(\x0b\x32\x14.google.protobuf.Any\"\xd4\x02\n\x08Response\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12:\n\x0e\x64\x61taframe_type\x18\x02 \x01(\x0e\x32\".dataframe_processor.DataframeType\x12\x36\n\x04\x63ode\x18\x03 \x01(\x0e\x32(.dataframe_processor.Response.StatusCode\x12\x19\n\x0fresult_job_uuid\x18\x04 \x01(\tH\x00\x12\'\n\x07payload\x18\x05 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x12\x15\n\rerror_message\x18\x06 \x01(\t\"[\n\nStatusCode\x12\x06\n\x02OK\x10\x00\x12\x0e\n\nINIT_ERROR\x10\x01\x12\x13\n\x0f\x45XECUTION_ERROR\x10\x02\x12 \n\x1cSESSION_RESET_REQUIRED_ERROR\x10\x03\x42\x08\n\x06result\"\x18\n\x16TruncatedSparkRelation*C\n\rDataframeType\x12\r\n\tUNDEFINED\x10\x00\x12\x10\n\x0cSNOWPARK_API\x10\x01\x12\x11\n\rSPARK_CONNECT\x10\x02\x42$\n\x17\x63om.snowflake.dataframeB\x06Protos\x80\x01\x00\x62\x06proto3')
19
+
20
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
21
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'DataframeProcessorMsg_pb2', globals())
22
+ if _descriptor._USE_C_DESCRIPTORS == False:
23
+
24
+ DESCRIPTOR._options = None
25
+ DESCRIPTOR._serialized_options = b'\n\027com.snowflake.dataframeB\006Protos\200\001\000'
26
+ _DATAFRAMETYPE._serialized_start=613
27
+ _DATAFRAMETYPE._serialized_end=680
28
+ _REQUEST._serialized_start=114
29
+ _REQUEST._serialized_end=242
30
+ _RESPONSE._serialized_start=245
31
+ _RESPONSE._serialized_end=585
32
+ _RESPONSE_STATUSCODE._serialized_start=484
33
+ _RESPONSE_STATUSCODE._serialized_end=575
34
+ _TRUNCATEDSPARKRELATION._serialized_start=587
35
+ _TRUNCATEDSPARKRELATION._serialized_end=611
36
+ # @@protoc_insertion_point(module_scope)