snowpark-connect 0.20.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +3 -2
- snowflake/snowpark_connect/column_name_handler.py +6 -65
- snowflake/snowpark_connect/config.py +47 -17
- snowflake/snowpark_connect/dataframe_container.py +242 -0
- snowflake/snowpark_connect/error/error_utils.py +25 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +13 -23
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +9 -5
- snowflake/snowpark_connect/expression/map_extension.py +2 -1
- snowflake/snowpark_connect/expression/map_udf.py +4 -4
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +8 -7
- snowflake/snowpark_connect/expression/map_unresolved_function.py +481 -170
- snowflake/snowpark_connect/expression/map_unresolved_star.py +8 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +1 -1
- snowflake/snowpark_connect/expression/typer.py +6 -6
- snowflake/snowpark_connect/proto/control_pb2.py +17 -16
- snowflake/snowpark_connect/proto/control_pb2.pyi +17 -17
- snowflake/snowpark_connect/proto/control_pb2_grpc.py +12 -63
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +15 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +19 -14
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +27 -26
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +74 -68
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +4 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +5 -5
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +25 -17
- snowflake/snowpark_connect/relation/map_aggregate.py +170 -61
- snowflake/snowpark_connect/relation/map_catalog.py +2 -2
- snowflake/snowpark_connect/relation/map_column_ops.py +227 -145
- snowflake/snowpark_connect/relation/map_crosstab.py +25 -6
- snowflake/snowpark_connect/relation/map_extension.py +81 -56
- snowflake/snowpark_connect/relation/map_join.py +72 -63
- snowflake/snowpark_connect/relation/map_local_relation.py +35 -20
- snowflake/snowpark_connect/relation/map_map_partitions.py +24 -17
- snowflake/snowpark_connect/relation/map_relation.py +22 -16
- snowflake/snowpark_connect/relation/map_row_ops.py +232 -146
- snowflake/snowpark_connect/relation/map_sample_by.py +15 -8
- snowflake/snowpark_connect/relation/map_show_string.py +42 -5
- snowflake/snowpark_connect/relation/map_sql.py +141 -237
- snowflake/snowpark_connect/relation/map_stats.py +88 -39
- snowflake/snowpark_connect/relation/map_subquery_alias.py +13 -14
- snowflake/snowpark_connect/relation/map_udtf.py +10 -13
- snowflake/snowpark_connect/relation/read/map_read.py +8 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_json.py +19 -8
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +7 -7
- snowflake/snowpark_connect/relation/read/map_read_socket.py +7 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +25 -16
- snowflake/snowpark_connect/relation/read/map_read_text.py +7 -7
- snowflake/snowpark_connect/relation/read/reader_config.py +1 -0
- snowflake/snowpark_connect/relation/utils.py +11 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +15 -12
- snowflake/snowpark_connect/relation/write/map_write.py +259 -56
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +3 -2
- snowflake/snowpark_connect/server.py +43 -4
- snowflake/snowpark_connect/type_mapping.py +6 -23
- snowflake/snowpark_connect/utils/cache.py +27 -22
- snowflake/snowpark_connect/utils/context.py +33 -17
- snowflake/snowpark_connect/utils/describe_query_cache.py +2 -9
- snowflake/snowpark_connect/utils/{attribute_handling.py → identifiers.py} +47 -0
- snowflake/snowpark_connect/utils/session.py +41 -38
- snowflake/snowpark_connect/utils/telemetry.py +214 -63
- snowflake/snowpark_connect/utils/udxf_import_utils.py +14 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/__init__.py +0 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.py +36 -0
- snowflake/snowpark_decoder/_internal/proto/generated/DataframeProcessorMsg_pb2.pyi +156 -0
- snowflake/snowpark_decoder/dp_session.py +111 -0
- snowflake/snowpark_decoder/spark_decoder.py +76 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/METADATA +6 -4
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/RECORD +83 -69
- snowpark_connect-0.22.1.dist-info/licenses/LICENSE-binary +568 -0
- snowpark_connect-0.22.1.dist-info/licenses/NOTICE-binary +1533 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/top_level.txt +1 -0
- spark/__init__.py +0 -0
- spark/connect/__init__.py +0 -0
- spark/connect/envelope_pb2.py +31 -0
- spark/connect/envelope_pb2.pyi +46 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.20.2.data → snowpark_connect-0.22.1.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.20.2.dist-info → snowpark_connect-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
import functools
|
|
5
5
|
import json
|
|
6
6
|
import os
|
|
7
7
|
import queue
|
|
8
8
|
import threading
|
|
9
|
+
import uuid
|
|
9
10
|
from abc import ABC, abstractmethod
|
|
10
11
|
from collections import defaultdict
|
|
12
|
+
from collections.abc import Iterable
|
|
11
13
|
from contextvars import ContextVar
|
|
12
14
|
from enum import Enum, unique
|
|
13
15
|
from typing import Dict
|
|
14
16
|
|
|
15
17
|
import google.protobuf.message
|
|
16
18
|
|
|
19
|
+
from snowflake.connector.cursor import SnowflakeCursor
|
|
17
20
|
from snowflake.connector.telemetry import (
|
|
18
21
|
TelemetryClient as PCTelemetryClient,
|
|
19
22
|
TelemetryData as PCTelemetryData,
|
|
@@ -21,7 +24,6 @@ from snowflake.connector.telemetry import (
|
|
|
21
24
|
)
|
|
22
25
|
from snowflake.connector.time_util import get_time_millis
|
|
23
26
|
from snowflake.snowpark import Session
|
|
24
|
-
from snowflake.snowpark._internal.telemetry import safe_telemetry
|
|
25
27
|
from snowflake.snowpark._internal.utils import get_os_name, get_python_version
|
|
26
28
|
from snowflake.snowpark.version import VERSION as snowpark_version
|
|
27
29
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
@@ -43,6 +45,7 @@ class TelemetryField(Enum):
|
|
|
43
45
|
KEY_OS = "operating_system"
|
|
44
46
|
KEY_DATA = "data"
|
|
45
47
|
KEY_START_TIME = "start_time"
|
|
48
|
+
KEY_EVENT_ID = "event_id"
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
class TelemetryType(Enum):
|
|
@@ -86,8 +89,7 @@ RECORDED_CONFIG_KEYS = {
|
|
|
86
89
|
"spark.sql.mapKeyDedupPolicy",
|
|
87
90
|
"snowpark.connect.sql.passthrough",
|
|
88
91
|
"snowpark.connect.iceberg.external_volume",
|
|
89
|
-
"snowpark.connect.auto-uppercase
|
|
90
|
-
"snowpark.connect.auto-uppercase.dml",
|
|
92
|
+
"snowpark.connect.sql.identifiers.auto-uppercase",
|
|
91
93
|
"snowpark.connect.udtf.compatibility_mode",
|
|
92
94
|
"snowpark.connect.views.duplicate_column_names_handling_mode",
|
|
93
95
|
}
|
|
@@ -108,7 +110,34 @@ REDACTED_PLAN_SUFFIXES = [
|
|
|
108
110
|
]
|
|
109
111
|
|
|
110
112
|
|
|
113
|
+
def _basic_telemetry_data() -> Dict:
|
|
114
|
+
return {
|
|
115
|
+
**STATIC_TELEMETRY_DATA,
|
|
116
|
+
TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def safe(func):
|
|
121
|
+
"""
|
|
122
|
+
Decorator to safely execute telemetry functions, catching and logging exceptions
|
|
123
|
+
without affecting the main application flow.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
@functools.wraps(func)
|
|
127
|
+
def wrap(*args, **kwargs):
|
|
128
|
+
try:
|
|
129
|
+
func(*args, **kwargs)
|
|
130
|
+
except Exception:
|
|
131
|
+
# We don't really care if telemetry fails, just want to be safe for the user
|
|
132
|
+
logger.warning(f"Telemetry operation failed: {func}", exc_info=True)
|
|
133
|
+
|
|
134
|
+
return wrap
|
|
135
|
+
|
|
136
|
+
|
|
111
137
|
class TelemetrySink(ABC):
|
|
138
|
+
MAX_BUFFER_ELEMENTS = 20
|
|
139
|
+
MAX_WAIT_MS = 10000 # 10 seconds
|
|
140
|
+
|
|
112
141
|
@abstractmethod
|
|
113
142
|
def add_telemetry_data(self, message: dict, timestamp: int) -> None:
|
|
114
143
|
pass
|
|
@@ -129,23 +158,44 @@ class NoOpTelemetrySink(TelemetrySink):
|
|
|
129
158
|
class ClientTelemetrySink(TelemetrySink):
|
|
130
159
|
def __init__(self, telemetry_client: PCTelemetryClient) -> None:
|
|
131
160
|
self._telemetry_client = telemetry_client
|
|
161
|
+
self._lock = threading.Lock()
|
|
162
|
+
self._reset()
|
|
132
163
|
|
|
133
164
|
def add_telemetry_data(self, message: dict, timestamp: int) -> None:
|
|
134
165
|
telemetry_data = PCTelemetryData(message=message, timestamp=timestamp)
|
|
135
166
|
self._telemetry_client.try_add_log_to_batch(telemetry_data)
|
|
167
|
+
with self._lock:
|
|
168
|
+
self._events_since_last_flush += 1
|
|
169
|
+
# flush more often than the underlying telemetry client
|
|
170
|
+
if self._should_flush():
|
|
171
|
+
self.flush()
|
|
136
172
|
|
|
137
173
|
def flush(self) -> None:
|
|
174
|
+
with self._lock:
|
|
175
|
+
self._reset()
|
|
138
176
|
self._telemetry_client.send_batch()
|
|
139
177
|
|
|
178
|
+
def _should_flush(self) -> bool:
|
|
179
|
+
current_time = get_time_millis()
|
|
180
|
+
|
|
181
|
+
return (
|
|
182
|
+
self._events_since_last_flush >= TelemetrySink.MAX_BUFFER_ELEMENTS
|
|
183
|
+
or (current_time - self._last_flush_time) >= TelemetrySink.MAX_WAIT_MS
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def _reset(self):
|
|
187
|
+
self._events_since_last_flush = 0
|
|
188
|
+
self._last_flush_time = get_time_millis()
|
|
189
|
+
|
|
140
190
|
|
|
141
191
|
class QueryTelemetrySink(TelemetrySink):
|
|
142
192
|
|
|
143
|
-
MAX_BUFFER_SIZE =
|
|
144
|
-
MAX_WAIT_MS = 10000 # 10 seconds
|
|
193
|
+
MAX_BUFFER_SIZE = 20 * 1024 # 20KB
|
|
145
194
|
TELEMETRY_JOB_ID = "43e72d9b-56d0-4cdb-a615-6b5b5059d6df"
|
|
146
195
|
|
|
147
196
|
def __init__(self, session: Session) -> None:
|
|
148
197
|
self._session = session
|
|
198
|
+
self._lock = threading.Lock()
|
|
149
199
|
self._reset()
|
|
150
200
|
|
|
151
201
|
def add_telemetry_data(self, message: dict, timestamp: int) -> None:
|
|
@@ -153,31 +203,37 @@ class QueryTelemetrySink(TelemetrySink):
|
|
|
153
203
|
|
|
154
204
|
# stringify entry, and escape single quotes
|
|
155
205
|
entry_str = json.dumps(telemetry_entry).replace("'", "''")
|
|
156
|
-
self._buffer.append(entry_str)
|
|
157
|
-
self._buffer_size += len(entry_str)
|
|
158
206
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
self._buffer_size
|
|
162
|
-
|
|
163
|
-
):
|
|
207
|
+
with self._lock:
|
|
208
|
+
self._buffer.append(entry_str)
|
|
209
|
+
self._buffer_size += len(entry_str)
|
|
210
|
+
|
|
211
|
+
if self._should_flush():
|
|
164
212
|
self.flush()
|
|
165
213
|
|
|
166
214
|
def flush(self) -> None:
|
|
167
|
-
|
|
168
|
-
|
|
215
|
+
with self._lock:
|
|
216
|
+
if not self._buffer:
|
|
217
|
+
return
|
|
218
|
+
# prefix query with a unique identifier for easier tracking
|
|
219
|
+
query = f"select '{self.TELEMETRY_JOB_ID}' as scos_telemetry_export, '[{','.join(self._buffer)}]'"
|
|
220
|
+
self._reset()
|
|
169
221
|
|
|
170
|
-
# prefix query with a unique identifier for easier tracking
|
|
171
|
-
query = f"select '{self.TELEMETRY_JOB_ID}' as scos_telemetry_export, '[{','.join(self._buffer)}]'"
|
|
172
222
|
self._session.sql(query).collect_nowait()
|
|
173
223
|
|
|
174
|
-
self._reset()
|
|
175
|
-
|
|
176
224
|
def _reset(self) -> None:
|
|
177
225
|
self._buffer = []
|
|
178
226
|
self._buffer_size = 0
|
|
179
227
|
self._last_export_time = get_time_millis()
|
|
180
228
|
|
|
229
|
+
def _should_flush(self):
|
|
230
|
+
current_time = get_time_millis()
|
|
231
|
+
return (
|
|
232
|
+
self._buffer_size >= QueryTelemetrySink.MAX_BUFFER_SIZE
|
|
233
|
+
or len(self._buffer) >= TelemetrySink.MAX_BUFFER_ELEMENTS
|
|
234
|
+
or (current_time - self._last_export_time) >= TelemetrySink.MAX_WAIT_MS
|
|
235
|
+
)
|
|
236
|
+
|
|
181
237
|
|
|
182
238
|
class Telemetry:
|
|
183
239
|
def __init__(self, is_enabled=True) -> None:
|
|
@@ -186,6 +242,8 @@ class Telemetry:
|
|
|
186
242
|
"request_summary", default={}
|
|
187
243
|
)
|
|
188
244
|
self._is_enabled = is_enabled
|
|
245
|
+
self._is_initialized = False
|
|
246
|
+
self._lock = threading.Lock()
|
|
189
247
|
|
|
190
248
|
# Async processing setup
|
|
191
249
|
self._message_queue = queue.Queue(maxsize=10000)
|
|
@@ -203,6 +261,12 @@ class Telemetry:
|
|
|
203
261
|
if not self._is_enabled:
|
|
204
262
|
return
|
|
205
263
|
|
|
264
|
+
with self._lock:
|
|
265
|
+
if self._is_initialized:
|
|
266
|
+
logger.warning("Telemetry is already initialized")
|
|
267
|
+
return
|
|
268
|
+
self._is_initialized = True
|
|
269
|
+
|
|
206
270
|
telemetry = getattr(session._conn._conn, "_telemetry", None)
|
|
207
271
|
if telemetry is None:
|
|
208
272
|
# no telemetry client available, so we export with queries
|
|
@@ -211,8 +275,9 @@ class Telemetry:
|
|
|
211
275
|
self._sink = ClientTelemetrySink(telemetry)
|
|
212
276
|
|
|
213
277
|
self._start_worker_thread()
|
|
278
|
+
logger.info(f"Telemetry initialized with {type(self._sink)}")
|
|
214
279
|
|
|
215
|
-
@
|
|
280
|
+
@safe
|
|
216
281
|
def initialize_request_summary(
|
|
217
282
|
self, request: google.protobuf.message.Message
|
|
218
283
|
) -> None:
|
|
@@ -235,8 +300,29 @@ class Telemetry:
|
|
|
235
300
|
request.plan, REDACTED_PLAN_SUFFIXES
|
|
236
301
|
)
|
|
237
302
|
|
|
238
|
-
|
|
303
|
+
def _not_in_request(self):
|
|
304
|
+
# we don't want to add things to the summary if it's not initialized
|
|
305
|
+
return "created_on" not in self._request_summary.get()
|
|
306
|
+
|
|
307
|
+
@safe
|
|
308
|
+
def report_parsed_sql_plan(self, plan: google.protobuf.message.Message) -> None:
|
|
309
|
+
if self._not_in_request():
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
summary = self._request_summary.get()
|
|
313
|
+
|
|
314
|
+
if "parsed_sql_plans" not in summary:
|
|
315
|
+
summary["parsed_sql_plans"] = []
|
|
316
|
+
|
|
317
|
+
summary["parsed_sql_plans"].append(
|
|
318
|
+
_protobuf_to_json_with_redaction(plan, REDACTED_PLAN_SUFFIXES)
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
@safe
|
|
239
322
|
def report_function_usage(self, function_name: str) -> None:
|
|
323
|
+
if self._not_in_request():
|
|
324
|
+
return
|
|
325
|
+
|
|
240
326
|
summary = self._request_summary.get()
|
|
241
327
|
|
|
242
328
|
if "used_functions" not in summary:
|
|
@@ -244,8 +330,11 @@ class Telemetry:
|
|
|
244
330
|
|
|
245
331
|
summary["used_functions"][function_name] += 1
|
|
246
332
|
|
|
247
|
-
@
|
|
333
|
+
@safe
|
|
248
334
|
def report_request_failure(self, e: Exception) -> None:
|
|
335
|
+
if self._not_in_request():
|
|
336
|
+
return
|
|
337
|
+
|
|
249
338
|
summary = self._request_summary.get()
|
|
250
339
|
|
|
251
340
|
summary["was_successful"] = False
|
|
@@ -256,37 +345,78 @@ class Telemetry:
|
|
|
256
345
|
if error_location:
|
|
257
346
|
summary["error_location"] = error_location
|
|
258
347
|
|
|
259
|
-
@
|
|
260
|
-
def report_config_set(self,
|
|
348
|
+
@safe
|
|
349
|
+
def report_config_set(self, pairs: Iterable) -> None:
|
|
350
|
+
if self._not_in_request():
|
|
351
|
+
return
|
|
352
|
+
|
|
261
353
|
summary = self._request_summary.get()
|
|
262
354
|
|
|
263
355
|
if "config_set" not in summary:
|
|
264
356
|
summary["config_set"] = []
|
|
265
357
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
358
|
+
for p in pairs:
|
|
359
|
+
summary["config_set"].append(
|
|
360
|
+
{
|
|
361
|
+
"key": p.key,
|
|
362
|
+
"value": p.value if p.key in RECORDED_CONFIG_KEYS else "<redacted>",
|
|
363
|
+
}
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
@safe
|
|
367
|
+
def report_config_unset(self, keys: Iterable[str]) -> None:
|
|
368
|
+
if self._not_in_request():
|
|
369
|
+
return
|
|
272
370
|
|
|
273
|
-
@safe_telemetry
|
|
274
|
-
def report_config_unset(self, key):
|
|
275
371
|
summary = self._request_summary.get()
|
|
276
372
|
|
|
277
373
|
if "config_unset" not in summary:
|
|
278
374
|
summary["config_unset"] = []
|
|
279
375
|
|
|
280
|
-
summary["config_unset"].
|
|
376
|
+
summary["config_unset"].extend(keys)
|
|
377
|
+
|
|
378
|
+
@safe
|
|
379
|
+
def report_config_get(self, keys: Iterable[str]) -> None:
|
|
380
|
+
if self._not_in_request():
|
|
381
|
+
return
|
|
281
382
|
|
|
282
|
-
@safe_telemetry
|
|
283
|
-
def report_config_op_type(self, op_type: str):
|
|
284
383
|
summary = self._request_summary.get()
|
|
285
384
|
|
|
385
|
+
if "config_get" not in summary:
|
|
386
|
+
summary["config_get"] = []
|
|
387
|
+
|
|
388
|
+
summary["config_get"].extend(keys)
|
|
389
|
+
|
|
390
|
+
@safe
|
|
391
|
+
def report_config_op_type(self, op_type: str):
|
|
392
|
+
if self._not_in_request():
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
summary = self._request_summary.get()
|
|
286
396
|
summary["config_op_type"] = op_type
|
|
287
397
|
|
|
288
|
-
@
|
|
289
|
-
def
|
|
398
|
+
@safe
|
|
399
|
+
def report_query(
|
|
400
|
+
self, result: SnowflakeCursor | dict | Exception, **kwargs
|
|
401
|
+
) -> None:
|
|
402
|
+
if result is None or isinstance(result, dict) or self._not_in_request():
|
|
403
|
+
return
|
|
404
|
+
|
|
405
|
+
# SnowflakeCursor and SQL errors will have sfqid
|
|
406
|
+
# other exceptions will not have it
|
|
407
|
+
# TODO: handle async queries, but filter out telemetry export queries
|
|
408
|
+
qid = getattr(result, "sfqid", None)
|
|
409
|
+
|
|
410
|
+
if qid is None:
|
|
411
|
+
logger.warning("Missing query id in result: %s", result)
|
|
412
|
+
|
|
413
|
+
is_internal = kwargs.get("_is_internal", False)
|
|
414
|
+
if is_internal:
|
|
415
|
+
self._report_internal_query()
|
|
416
|
+
elif qid:
|
|
417
|
+
self._report_query_id(qid)
|
|
418
|
+
|
|
419
|
+
def _report_query_id(self, query_id: str):
|
|
290
420
|
summary = self._request_summary.get()
|
|
291
421
|
|
|
292
422
|
if "queries" not in summary:
|
|
@@ -294,13 +424,19 @@ class Telemetry:
|
|
|
294
424
|
|
|
295
425
|
summary["queries"].append(query_id)
|
|
296
426
|
|
|
297
|
-
|
|
298
|
-
def report_internal_query(self):
|
|
427
|
+
def _report_internal_query(self):
|
|
299
428
|
summary = self._request_summary.get()
|
|
429
|
+
|
|
430
|
+
if "internal_queries" not in summary:
|
|
431
|
+
summary["internal_queries"] = 0
|
|
432
|
+
|
|
300
433
|
summary["internal_queries"] += 1
|
|
301
434
|
|
|
302
|
-
@
|
|
435
|
+
@safe
|
|
303
436
|
def report_udf_usage(self, udf_name: str):
|
|
437
|
+
if self._not_in_request():
|
|
438
|
+
return
|
|
439
|
+
|
|
304
440
|
summary = self._request_summary.get()
|
|
305
441
|
|
|
306
442
|
if "udf_usage" not in summary:
|
|
@@ -308,8 +444,10 @@ class Telemetry:
|
|
|
308
444
|
|
|
309
445
|
summary["udf_usage"][udf_name] += 1
|
|
310
446
|
|
|
311
|
-
|
|
312
|
-
|
|
447
|
+
def _report_io(self, op: str, type: str, options: dict | None):
|
|
448
|
+
if self._not_in_request():
|
|
449
|
+
return
|
|
450
|
+
|
|
313
451
|
summary = self._request_summary.get()
|
|
314
452
|
|
|
315
453
|
if "io" not in summary:
|
|
@@ -322,16 +460,18 @@ class Telemetry:
|
|
|
322
460
|
|
|
323
461
|
summary["io"].append(io)
|
|
324
462
|
|
|
463
|
+
@safe
|
|
325
464
|
def report_io_read(self, type: str, options: dict | None):
|
|
326
|
-
self.
|
|
465
|
+
self._report_io("read", type, options)
|
|
327
466
|
|
|
467
|
+
@safe
|
|
328
468
|
def report_io_write(self, type: str, options: dict | None):
|
|
329
|
-
self.
|
|
469
|
+
self._report_io("write", type, options)
|
|
330
470
|
|
|
331
|
-
@
|
|
471
|
+
@safe
|
|
332
472
|
def send_server_started_telemetry(self):
|
|
333
473
|
message = {
|
|
334
|
-
**
|
|
474
|
+
**_basic_telemetry_data(),
|
|
335
475
|
TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
|
|
336
476
|
TelemetryType.EVENT_TYPE.value: EventType.SERVER_STARTED.value,
|
|
337
477
|
TelemetryField.KEY_DATA.value: {
|
|
@@ -340,17 +480,22 @@ class Telemetry:
|
|
|
340
480
|
}
|
|
341
481
|
self._send(message)
|
|
342
482
|
|
|
343
|
-
@
|
|
483
|
+
@safe
|
|
344
484
|
def send_request_summary_telemetry(self):
|
|
485
|
+
if self._not_in_request():
|
|
486
|
+
logger.warning(
|
|
487
|
+
"Truing to send request summary telemetry without initializing it"
|
|
488
|
+
)
|
|
489
|
+
return
|
|
490
|
+
|
|
345
491
|
summary = self._request_summary.get()
|
|
346
492
|
message = {
|
|
347
|
-
**
|
|
493
|
+
**_basic_telemetry_data(),
|
|
348
494
|
TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_REQUEST_SUMMARY.value,
|
|
349
495
|
TelemetryField.KEY_DATA.value: summary,
|
|
350
496
|
}
|
|
351
497
|
self._send(message)
|
|
352
498
|
|
|
353
|
-
@safe_telemetry
|
|
354
499
|
def _send(self, msg: Dict) -> None:
|
|
355
500
|
"""Queue a telemetry message for asynchronous processing."""
|
|
356
501
|
if not self._is_enabled:
|
|
@@ -386,19 +531,6 @@ class Telemetry:
|
|
|
386
531
|
finally:
|
|
387
532
|
self._message_queue.task_done()
|
|
388
533
|
|
|
389
|
-
# Process any remaining messages
|
|
390
|
-
while not self._message_queue.empty():
|
|
391
|
-
try:
|
|
392
|
-
message, timestamp = self._message_queue.get_nowait()
|
|
393
|
-
self._sink.add_telemetry_data(message, timestamp)
|
|
394
|
-
self._message_queue.task_done()
|
|
395
|
-
except Exception:
|
|
396
|
-
logger.warning(
|
|
397
|
-
"Failed to add remaining telemetry messages to sink during shutdown",
|
|
398
|
-
exc_info=True,
|
|
399
|
-
)
|
|
400
|
-
break
|
|
401
|
-
|
|
402
534
|
# Flush the sink
|
|
403
535
|
self._sink.flush()
|
|
404
536
|
|
|
@@ -440,6 +572,18 @@ def _error_location(e: Exception) -> Dict | None:
|
|
|
440
572
|
}
|
|
441
573
|
|
|
442
574
|
|
|
575
|
+
def _is_map_field(field_descriptor) -> bool:
|
|
576
|
+
"""
|
|
577
|
+
Check if a protobuf field is a map.
|
|
578
|
+
"""
|
|
579
|
+
return (
|
|
580
|
+
field_descriptor.label == field_descriptor.LABEL_REPEATED
|
|
581
|
+
and field_descriptor.message_type is not None
|
|
582
|
+
and field_descriptor.message_type.has_options
|
|
583
|
+
and field_descriptor.message_type.GetOptions().map_entry
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
|
|
443
587
|
def _protobuf_to_json_with_redaction(
|
|
444
588
|
message: google.protobuf.message.Message, redacted_suffixes: list[str]
|
|
445
589
|
) -> dict:
|
|
@@ -464,7 +608,9 @@ def _protobuf_to_json_with_redaction(
|
|
|
464
608
|
return "<redacted>"
|
|
465
609
|
|
|
466
610
|
# Handle different field types
|
|
467
|
-
if field_descriptor
|
|
611
|
+
if _is_map_field(field_descriptor):
|
|
612
|
+
return dict(value)
|
|
613
|
+
elif field_descriptor.type == field_descriptor.TYPE_MESSAGE:
|
|
468
614
|
if field_descriptor.label == field_descriptor.LABEL_REPEATED:
|
|
469
615
|
# Repeated message field
|
|
470
616
|
return [_protobuf_to_json_recursive(item, field_path) for item in value]
|
|
@@ -482,6 +628,11 @@ def _protobuf_to_json_with_redaction(
|
|
|
482
628
|
msg: google.protobuf.message.Message, current_path: str = ""
|
|
483
629
|
) -> dict:
|
|
484
630
|
"""Recursively convert protobuf message to dict"""
|
|
631
|
+
|
|
632
|
+
if not isinstance(msg, google.protobuf.message.Message):
|
|
633
|
+
logger.warning("Expected a protobuf message, got: %s", type(msg))
|
|
634
|
+
return {}
|
|
635
|
+
|
|
485
636
|
result = {}
|
|
486
637
|
|
|
487
638
|
# Use ListFields() to get all set fields
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from snowflake import snowpark
|
|
6
|
+
from snowflake.snowpark_connect.config import global_config
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_python_udxf_import_files(session: snowpark.Session) -> str:
|
|
10
|
+
config_imports = global_config.get("snowpark.connect.udf.imports", "")
|
|
11
|
+
config_imports = config_imports.strip("[] ").split(",") if config_imports else []
|
|
12
|
+
imports = {*session._python_files, *session._import_files, *config_imports}
|
|
13
|
+
|
|
14
|
+
return ",".join([file for file in imports if file])
|
|
File without changes
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: DataframeProcessorMsg.proto
|
|
4
|
+
"""Generated protocol buffer code."""
|
|
5
|
+
from google.protobuf.internal import builder as _builder
|
|
6
|
+
from google.protobuf import descriptor as _descriptor
|
|
7
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
8
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
9
|
+
# @@protoc_insertion_point(imports)
|
|
10
|
+
|
|
11
|
+
_sym_db = _symbol_database.Default()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
from google.protobuf import descriptor_pb2 as google_dot_protobuf_dot_descriptor__pb2
|
|
15
|
+
from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x44\x61taframeProcessorMsg.proto\x12\x13\x64\x61taframe_processor\x1a google/protobuf/descriptor.proto\x1a\x19google/protobuf/any.proto\"\x80\x01\n\x07Request\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12:\n\x0e\x64\x61taframe_type\x18\x02 \x01(\x0e\x32\".dataframe_processor.DataframeType\x12%\n\x07payload\x18\x03 \x01(\x0b\x32\x14.google.protobuf.Any\"\xd4\x02\n\x08Response\x12\x12\n\nrequest_id\x18\x01 \x01(\t\x12:\n\x0e\x64\x61taframe_type\x18\x02 \x01(\x0e\x32\".dataframe_processor.DataframeType\x12\x36\n\x04\x63ode\x18\x03 \x01(\x0e\x32(.dataframe_processor.Response.StatusCode\x12\x19\n\x0fresult_job_uuid\x18\x04 \x01(\tH\x00\x12\'\n\x07payload\x18\x05 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00\x12\x15\n\rerror_message\x18\x06 \x01(\t\"[\n\nStatusCode\x12\x06\n\x02OK\x10\x00\x12\x0e\n\nINIT_ERROR\x10\x01\x12\x13\n\x0f\x45XECUTION_ERROR\x10\x02\x12 \n\x1cSESSION_RESET_REQUIRED_ERROR\x10\x03\x42\x08\n\x06result\"\x18\n\x16TruncatedSparkRelation*C\n\rDataframeType\x12\r\n\tUNDEFINED\x10\x00\x12\x10\n\x0cSNOWPARK_API\x10\x01\x12\x11\n\rSPARK_CONNECT\x10\x02\x42$\n\x17\x63om.snowflake.dataframeB\x06Protos\x80\x01\x00\x62\x06proto3')
|
|
19
|
+
|
|
20
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
|
21
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'DataframeProcessorMsg_pb2', globals())
|
|
22
|
+
if _descriptor._USE_C_DESCRIPTORS == False:
|
|
23
|
+
|
|
24
|
+
DESCRIPTOR._options = None
|
|
25
|
+
DESCRIPTOR._serialized_options = b'\n\027com.snowflake.dataframeB\006Protos\200\001\000'
|
|
26
|
+
_DATAFRAMETYPE._serialized_start=613
|
|
27
|
+
_DATAFRAMETYPE._serialized_end=680
|
|
28
|
+
_REQUEST._serialized_start=114
|
|
29
|
+
_REQUEST._serialized_end=242
|
|
30
|
+
_RESPONSE._serialized_start=245
|
|
31
|
+
_RESPONSE._serialized_end=585
|
|
32
|
+
_RESPONSE_STATUSCODE._serialized_start=484
|
|
33
|
+
_RESPONSE_STATUSCODE._serialized_end=575
|
|
34
|
+
_TRUNCATEDSPARKRELATION._serialized_start=587
|
|
35
|
+
_TRUNCATEDSPARKRELATION._serialized_end=611
|
|
36
|
+
# @@protoc_insertion_point(module_scope)
|