snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +680 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +237 -23
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +123 -5
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +85 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +110 -48
- snowflake/snowpark_connect/server.py +546 -456
- snowflake/snowpark_connect/server_common/__init__.py +500 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +187 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +163 -22
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -11,10 +11,11 @@ from abc import ABC, abstractmethod
|
|
|
11
11
|
from collections import defaultdict
|
|
12
12
|
from collections.abc import Iterable
|
|
13
13
|
from contextvars import ContextVar
|
|
14
|
+
from dataclasses import dataclass
|
|
14
15
|
from enum import Enum, unique
|
|
15
|
-
from typing import Dict
|
|
16
16
|
|
|
17
17
|
import google.protobuf.message
|
|
18
|
+
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
18
19
|
|
|
19
20
|
from snowflake.connector.cursor import SnowflakeCursor
|
|
20
21
|
from snowflake.connector.telemetry import (
|
|
@@ -26,6 +27,7 @@ from snowflake.connector.time_util import get_time_millis
|
|
|
26
27
|
from snowflake.snowpark import Session
|
|
27
28
|
from snowflake.snowpark._internal.utils import get_os_name, get_python_version
|
|
28
29
|
from snowflake.snowpark.version import VERSION as snowpark_version
|
|
30
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
29
31
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
30
32
|
from snowflake.snowpark_connect.version import VERSION as sas_version
|
|
31
33
|
|
|
@@ -56,6 +58,7 @@ class TelemetryType(Enum):
|
|
|
56
58
|
|
|
57
59
|
class EventType(Enum):
|
|
58
60
|
SERVER_STARTED = "scos_server_started"
|
|
61
|
+
WARNING = "scos_warning"
|
|
59
62
|
|
|
60
63
|
|
|
61
64
|
# global labels
|
|
@@ -88,6 +91,7 @@ RECORDED_CONFIG_KEYS = {
|
|
|
88
91
|
"spark.sql.session.localRelationCacheThreshold",
|
|
89
92
|
"spark.sql.mapKeyDedupPolicy",
|
|
90
93
|
"snowpark.connect.sql.passthrough",
|
|
94
|
+
"snowpark.connect.cte.optimization_enabled",
|
|
91
95
|
"snowpark.connect.iceberg.external_volume",
|
|
92
96
|
"snowpark.connect.sql.identifiers.auto-uppercase",
|
|
93
97
|
"snowpark.connect.udtf.compatibility_mode",
|
|
@@ -104,7 +108,16 @@ REDACTED_PLAN_SUFFIXES = [
|
|
|
104
108
|
]
|
|
105
109
|
|
|
106
110
|
|
|
107
|
-
|
|
111
|
+
@dataclass
|
|
112
|
+
class TelemetryMessage:
|
|
113
|
+
"""Container for telemetry messages in the processing queue."""
|
|
114
|
+
|
|
115
|
+
message: dict
|
|
116
|
+
timestamp: int
|
|
117
|
+
is_warning: bool
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _basic_telemetry_data() -> dict:
|
|
108
121
|
return {
|
|
109
122
|
**STATIC_TELEMETRY_DATA,
|
|
110
123
|
TelemetryField.KEY_EVENT_ID.value: str(uuid.uuid4()),
|
|
@@ -121,9 +134,11 @@ def safe(func):
|
|
|
121
134
|
def wrap(*args, **kwargs):
|
|
122
135
|
try:
|
|
123
136
|
func(*args, **kwargs)
|
|
124
|
-
except Exception:
|
|
125
|
-
#
|
|
126
|
-
|
|
137
|
+
except Exception as e:
|
|
138
|
+
# report failed operation to telemetry
|
|
139
|
+
telemetry.send_warning_msg(
|
|
140
|
+
f"Telemetry operation {func} failed due to exception", e
|
|
141
|
+
)
|
|
127
142
|
|
|
128
143
|
return wrap
|
|
129
144
|
|
|
@@ -289,10 +304,7 @@ class Telemetry:
|
|
|
289
304
|
|
|
290
305
|
self._request_summary.set(summary)
|
|
291
306
|
|
|
292
|
-
|
|
293
|
-
summary["query_plan"] = _protobuf_to_json_with_redaction(
|
|
294
|
-
request.plan, REDACTED_PLAN_SUFFIXES
|
|
295
|
-
)
|
|
307
|
+
_set_query_plan(request, summary)
|
|
296
308
|
|
|
297
309
|
def _not_in_request(self):
|
|
298
310
|
# we don't want to add things to the summary if it's not initialized
|
|
@@ -335,6 +347,11 @@ class Telemetry:
|
|
|
335
347
|
summary["error_message"] = str(e)
|
|
336
348
|
summary["error_type"] = type(e).__name__
|
|
337
349
|
|
|
350
|
+
if not hasattr(e, "custom_error_code") or (e.custom_error_code is None):
|
|
351
|
+
summary["error_code"] = ErrorCodes.INTERNAL_ERROR
|
|
352
|
+
else:
|
|
353
|
+
summary["error_code"] = e.custom_error_code
|
|
354
|
+
|
|
338
355
|
error_location = _error_location(e)
|
|
339
356
|
if error_location:
|
|
340
357
|
summary["error_location"] = error_location
|
|
@@ -426,6 +443,63 @@ class Telemetry:
|
|
|
426
443
|
|
|
427
444
|
summary["internal_queries"] += 1
|
|
428
445
|
|
|
446
|
+
@safe
|
|
447
|
+
def report_describe_query_cache_lookup(self):
|
|
448
|
+
"""Report a describe query cache lookup."""
|
|
449
|
+
if self._not_in_request():
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
summary = self._request_summary.get()
|
|
453
|
+
|
|
454
|
+
if "describe_cache_lookups" not in summary:
|
|
455
|
+
summary["describe_cache_lookups"] = 0
|
|
456
|
+
|
|
457
|
+
summary["describe_cache_lookups"] += 1
|
|
458
|
+
|
|
459
|
+
@safe
|
|
460
|
+
def report_describe_query_cache_hit(self):
|
|
461
|
+
"""Report a describe query cache hit."""
|
|
462
|
+
if self._not_in_request():
|
|
463
|
+
return
|
|
464
|
+
|
|
465
|
+
summary = self._request_summary.get()
|
|
466
|
+
|
|
467
|
+
if "describe_cache_hits" not in summary:
|
|
468
|
+
summary["describe_cache_hits"] = 0
|
|
469
|
+
|
|
470
|
+
summary["describe_cache_hits"] += 1
|
|
471
|
+
|
|
472
|
+
@safe
|
|
473
|
+
def report_describe_query_cache_expired(self, expired_by: float):
|
|
474
|
+
"""Report a describe query cache hit."""
|
|
475
|
+
if self._not_in_request():
|
|
476
|
+
return
|
|
477
|
+
|
|
478
|
+
summary = self._request_summary.get()
|
|
479
|
+
|
|
480
|
+
if "describe_cache_expired" not in summary:
|
|
481
|
+
summary["describe_cache_expired"] = 0
|
|
482
|
+
|
|
483
|
+
summary["describe_cache_expired"] += 1
|
|
484
|
+
|
|
485
|
+
if "describe_cache_expired_by" not in summary:
|
|
486
|
+
summary["describe_cache_expired_by"] = []
|
|
487
|
+
|
|
488
|
+
summary["describe_cache_expired_by"].append(expired_by)
|
|
489
|
+
|
|
490
|
+
@safe
|
|
491
|
+
def report_describe_query_cache_clear(self):
|
|
492
|
+
"""Report a describe query cache clear."""
|
|
493
|
+
if self._not_in_request():
|
|
494
|
+
return
|
|
495
|
+
|
|
496
|
+
summary = self._request_summary.get()
|
|
497
|
+
|
|
498
|
+
if "describe_cache_cleared" not in summary:
|
|
499
|
+
summary["describe_cache_cleared"] = 0
|
|
500
|
+
|
|
501
|
+
summary["describe_cache_cleared"] += 1
|
|
502
|
+
|
|
429
503
|
@safe
|
|
430
504
|
def report_udf_usage(self, udf_name: str):
|
|
431
505
|
if self._not_in_request():
|
|
@@ -472,8 +546,8 @@ class Telemetry:
|
|
|
472
546
|
@safe
|
|
473
547
|
def send_request_summary_telemetry(self):
|
|
474
548
|
if self._not_in_request():
|
|
475
|
-
|
|
476
|
-
"
|
|
549
|
+
self.send_warning_msg(
|
|
550
|
+
"Trying to send request summary telemetry without initializing it"
|
|
477
551
|
)
|
|
478
552
|
return
|
|
479
553
|
|
|
@@ -485,14 +559,56 @@ class Telemetry:
|
|
|
485
559
|
}
|
|
486
560
|
self._send(message)
|
|
487
561
|
|
|
488
|
-
def
|
|
562
|
+
def send_warning_msg(self, msg: str, e: Exception = None) -> None:
|
|
563
|
+
# using this within @safe decorator may result in recursive loop
|
|
564
|
+
try:
|
|
565
|
+
message = self._build_warning_message(msg, e)
|
|
566
|
+
if not message:
|
|
567
|
+
return
|
|
568
|
+
|
|
569
|
+
self._send(message, is_warning=True)
|
|
570
|
+
except Exception:
|
|
571
|
+
# if there's an exception here, there's nothing we can really do about it
|
|
572
|
+
pass
|
|
573
|
+
|
|
574
|
+
def _build_warning_message(self, warning_msg: str, e: Exception = None) -> dict:
|
|
575
|
+
try:
|
|
576
|
+
data = {"warning_message": warning_msg}
|
|
577
|
+
if e is not None:
|
|
578
|
+
data["exception"] = repr(e)
|
|
579
|
+
|
|
580
|
+
# add session and operation id if available
|
|
581
|
+
spark_session_id = self._request_summary.get().get("spark_session_id", None)
|
|
582
|
+
if spark_session_id is not None:
|
|
583
|
+
data["spark_session_id"] = spark_session_id
|
|
584
|
+
|
|
585
|
+
spark_operation_id = self._request_summary.get().get(
|
|
586
|
+
"spark_operation_id", None
|
|
587
|
+
)
|
|
588
|
+
if spark_operation_id is not None:
|
|
589
|
+
data["spark_operation_id"] = spark_operation_id
|
|
590
|
+
|
|
591
|
+
message = {
|
|
592
|
+
**_basic_telemetry_data(),
|
|
593
|
+
TelemetryField.KEY_TYPE.value: TelemetryType.TYPE_EVENT.value,
|
|
594
|
+
TelemetryType.EVENT_TYPE.value: EventType.WARNING.value,
|
|
595
|
+
TelemetryField.KEY_DATA.value: data,
|
|
596
|
+
}
|
|
597
|
+
return message
|
|
598
|
+
except Exception:
|
|
599
|
+
return {}
|
|
600
|
+
|
|
601
|
+
def _send(self, msg: dict, is_warning: bool = False) -> None:
|
|
489
602
|
"""Queue a telemetry message for asynchronous processing."""
|
|
490
603
|
if not self._is_enabled:
|
|
491
604
|
return
|
|
492
605
|
|
|
493
606
|
timestamp = get_time_millis()
|
|
494
607
|
try:
|
|
495
|
-
|
|
608
|
+
telemetry_msg = TelemetryMessage(
|
|
609
|
+
message=msg, timestamp=timestamp, is_warning=is_warning
|
|
610
|
+
)
|
|
611
|
+
self._message_queue.put_nowait(telemetry_msg)
|
|
496
612
|
except queue.Full:
|
|
497
613
|
# If queue is full, drop the message to avoid blocking
|
|
498
614
|
logger.warning("Telemetry queue is full, dropping message")
|
|
@@ -510,13 +626,16 @@ class Telemetry:
|
|
|
510
626
|
while True:
|
|
511
627
|
try:
|
|
512
628
|
# block to allow the GIL to switch threads
|
|
513
|
-
|
|
514
|
-
if
|
|
515
|
-
# shutdown
|
|
629
|
+
telemetry_msg = self._message_queue.get()
|
|
630
|
+
if telemetry_msg is None:
|
|
631
|
+
# shutdown signal
|
|
516
632
|
break
|
|
517
|
-
self._sink.add_telemetry_data(
|
|
518
|
-
|
|
519
|
-
|
|
633
|
+
self._sink.add_telemetry_data(
|
|
634
|
+
telemetry_msg.message, telemetry_msg.timestamp
|
|
635
|
+
)
|
|
636
|
+
except Exception as e:
|
|
637
|
+
if not telemetry_msg.is_warning:
|
|
638
|
+
self.send_warning_msg("Failed to add telemetry message to sink", e)
|
|
520
639
|
finally:
|
|
521
640
|
self._message_queue.task_done()
|
|
522
641
|
|
|
@@ -529,7 +648,7 @@ class Telemetry:
|
|
|
529
648
|
return
|
|
530
649
|
|
|
531
650
|
try:
|
|
532
|
-
self._message_queue.put_nowait(
|
|
651
|
+
self._message_queue.put_nowait(None)
|
|
533
652
|
# Wait for worker thread to finish
|
|
534
653
|
self._worker_thread.join(timeout=3.0)
|
|
535
654
|
except Exception:
|
|
@@ -538,7 +657,7 @@ class Telemetry:
|
|
|
538
657
|
)
|
|
539
658
|
|
|
540
659
|
|
|
541
|
-
def _error_location(e: Exception) ->
|
|
660
|
+
def _error_location(e: Exception) -> dict | None:
|
|
542
661
|
"""
|
|
543
662
|
Inspect the exception traceback and extract the file name, line number, and function name
|
|
544
663
|
from the last frame (the one that raised the exception).
|
|
@@ -619,7 +738,7 @@ def _protobuf_to_json_with_redaction(
|
|
|
619
738
|
"""Recursively convert protobuf message to dict"""
|
|
620
739
|
|
|
621
740
|
if not isinstance(msg, google.protobuf.message.Message):
|
|
622
|
-
|
|
741
|
+
telemetry.send_warning_msg(f"Expected a protobuf message, got: {type(msg)}")
|
|
623
742
|
return {}
|
|
624
743
|
|
|
625
744
|
result = {}
|
|
@@ -644,6 +763,28 @@ def _protobuf_to_json_with_redaction(
|
|
|
644
763
|
)
|
|
645
764
|
|
|
646
765
|
|
|
766
|
+
def _set_query_plan(request: google.protobuf.message.Message, summary: dict) -> None:
|
|
767
|
+
if isinstance(request, proto_base.ExecutePlanRequest):
|
|
768
|
+
# ExecutePlanRequest has plan at top level
|
|
769
|
+
if hasattr(request, "plan"):
|
|
770
|
+
summary["query_plan"] = (
|
|
771
|
+
_protobuf_to_json_with_redaction(request.plan, REDACTED_PLAN_SUFFIXES),
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
elif isinstance(request, proto_base.AnalyzePlanRequest):
|
|
775
|
+
# AnalyzePlanRequest has plan under oneof analyze
|
|
776
|
+
analyze_type = request.WhichOneof("analyze")
|
|
777
|
+
if not analyze_type:
|
|
778
|
+
return
|
|
779
|
+
|
|
780
|
+
summary["analyze_type"] = analyze_type
|
|
781
|
+
analyze_field = getattr(request, analyze_type)
|
|
782
|
+
if hasattr(analyze_field, "plan"):
|
|
783
|
+
summary["query_plan"] = _protobuf_to_json_with_redaction(
|
|
784
|
+
analyze_field.plan, REDACTED_PLAN_SUFFIXES
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
|
|
647
788
|
# global telemetry client
|
|
648
789
|
telemetry = Telemetry(is_enabled="SNOWPARK_CONNECT_DISABLE_TELEMETRY" not in os.environ)
|
|
649
790
|
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from pyspark.errors import AnalysisException
|
|
8
|
+
|
|
9
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
10
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
11
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
12
|
+
from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
13
|
+
from snowflake.snowpark_connect.utils.context import get_spark_session_id
|
|
14
|
+
|
|
15
|
+
_temp_views = SynchronizedDict[Tuple[str, str], DataFrameContainer]()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None:
|
|
19
|
+
normalized_name = _normalize(name)
|
|
20
|
+
current_session_id = get_spark_session_id()
|
|
21
|
+
for key in list(_temp_views.keys()):
|
|
22
|
+
if _normalize(key[0]) == normalized_name and key[1] == current_session_id:
|
|
23
|
+
if replace:
|
|
24
|
+
_temp_views.remove(key)
|
|
25
|
+
break
|
|
26
|
+
else:
|
|
27
|
+
exception = AnalysisException(
|
|
28
|
+
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
|
|
29
|
+
)
|
|
30
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
31
|
+
raise exception
|
|
32
|
+
|
|
33
|
+
_temp_views[(name, current_session_id)] = df
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def unregister_temp_view(name: str) -> bool:
|
|
37
|
+
normalized_name = _normalize(name)
|
|
38
|
+
|
|
39
|
+
for key in _temp_views.keys():
|
|
40
|
+
normalized_key = _normalize(key[0])
|
|
41
|
+
if normalized_name == normalized_key and key[1] == get_spark_session_id():
|
|
42
|
+
pop_result = _temp_views.remove(key)
|
|
43
|
+
return pop_result is not None
|
|
44
|
+
return False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_temp_view(name: str) -> Optional[DataFrameContainer]:
|
|
48
|
+
normalized_name = _normalize(name)
|
|
49
|
+
for key in _temp_views.keys():
|
|
50
|
+
normalized_key = _normalize(key[0])
|
|
51
|
+
if normalized_name == normalized_key and key[1] == get_spark_session_id():
|
|
52
|
+
return _temp_views.get(key)
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_temp_view_normalized_names() -> list[str]:
|
|
57
|
+
return [
|
|
58
|
+
_normalize(key[0])
|
|
59
|
+
for key in _temp_views.keys()
|
|
60
|
+
if key[1] == get_spark_session_id()
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _normalize(name: str) -> str:
|
|
65
|
+
from snowflake.snowpark_connect.config import global_config
|
|
66
|
+
|
|
67
|
+
return name if global_config.spark_sql_caseSensitive else name.lower()
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
import re
|
|
5
|
+
import uuid
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from typing import Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from pyspark.errors import AnalysisException
|
|
10
|
+
from pyspark.errors.exceptions.base import TempTableAlreadyExistsException
|
|
11
|
+
|
|
12
|
+
from snowflake.snowpark import DataFrame, Session
|
|
13
|
+
from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
|
|
14
|
+
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
15
|
+
from snowflake.snowpark.types import StructField, StructType
|
|
16
|
+
from snowflake.snowpark_connect.column_name_handler import ColumnNameMap, ColumnNames
|
|
17
|
+
from snowflake.snowpark_connect.config import (
|
|
18
|
+
global_config,
|
|
19
|
+
sessions_config,
|
|
20
|
+
should_create_temporary_view_in_snowflake,
|
|
21
|
+
)
|
|
22
|
+
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
23
|
+
from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
|
|
24
|
+
from snowflake.snowpark_connect.utils.context import get_spark_session_id
|
|
25
|
+
from snowflake.snowpark_connect.utils.identifiers import (
|
|
26
|
+
spark_to_sf_single_id,
|
|
27
|
+
spark_to_sf_single_id_with_unquoting,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
_INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
|
|
31
|
+
|
|
32
|
+
_CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
|
|
33
|
+
|
|
34
|
+
_temp_views = SynchronizedDict[Tuple[str, str], DataFrameContainer]()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None:
|
|
38
|
+
normalized_name = _normalize(name)
|
|
39
|
+
current_session_id = get_spark_session_id()
|
|
40
|
+
for key in list(_temp_views.keys()):
|
|
41
|
+
if _normalize(key[0]) == normalized_name and key[1] == current_session_id:
|
|
42
|
+
if replace:
|
|
43
|
+
_temp_views.remove(key)
|
|
44
|
+
break
|
|
45
|
+
else:
|
|
46
|
+
raise TempTableAlreadyExistsException(
|
|
47
|
+
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
_temp_views[(name, current_session_id)] = df
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def unregister_temp_view(name: str) -> bool:
|
|
54
|
+
normalized_name = _normalize(name)
|
|
55
|
+
|
|
56
|
+
for key in _temp_views.keys():
|
|
57
|
+
normalized_key = _normalize(key[0])
|
|
58
|
+
if normalized_name == normalized_key and key[1] == get_spark_session_id():
|
|
59
|
+
pop_result = _temp_views.remove(key)
|
|
60
|
+
return pop_result is not None
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_temp_view(name: str) -> Optional[DataFrameContainer]:
|
|
65
|
+
normalized_name = _normalize(name)
|
|
66
|
+
for key in _temp_views.keys():
|
|
67
|
+
normalized_key = _normalize(key[0])
|
|
68
|
+
if normalized_name == normalized_key and key[1] == get_spark_session_id():
|
|
69
|
+
return _temp_views.get(key)
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_temp_view_normalized_names() -> list[str]:
|
|
74
|
+
return [
|
|
75
|
+
_normalize(key[0])
|
|
76
|
+
for key in _temp_views.keys()
|
|
77
|
+
if key[1] == get_spark_session_id()
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _normalize(name: str) -> str:
|
|
82
|
+
return name if global_config.spark_sql_caseSensitive else name.lower()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def assert_snowflake_view_does_not_exist_in_cache(name: str, replace: bool):
|
|
86
|
+
temp_view = get_temp_view(name)
|
|
87
|
+
if temp_view is not None and not replace:
|
|
88
|
+
raise AnalysisException(
|
|
89
|
+
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def assert_cached_view_does_not_exist_in_snowflake(
|
|
94
|
+
snowflake_view_name: list[str], replace: bool
|
|
95
|
+
):
|
|
96
|
+
if len(snowflake_view_name) == 1:
|
|
97
|
+
name = unquote_if_quoted(snowflake_view_name[0])
|
|
98
|
+
sql_statement = f"SHOW VIEWS LIKE '{name}'"
|
|
99
|
+
else:
|
|
100
|
+
name = unquote_if_quoted(snowflake_view_name[1])
|
|
101
|
+
sql_statement = f"SHOW VIEWS LIKE '{name}' IN SCHEMA {snowflake_view_name[0]}"
|
|
102
|
+
if (
|
|
103
|
+
not replace
|
|
104
|
+
and len(Session.get_active_session().sql(sql_statement).collect()) > 0
|
|
105
|
+
):
|
|
106
|
+
raise AnalysisException(
|
|
107
|
+
f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def create_temporary_view_from_dataframe(
|
|
112
|
+
input_df_container: DataFrameContainer,
|
|
113
|
+
request_view_name: str,
|
|
114
|
+
is_global: bool,
|
|
115
|
+
replace: bool,
|
|
116
|
+
) -> None:
|
|
117
|
+
input_df = input_df_container.dataframe
|
|
118
|
+
|
|
119
|
+
if is_global:
|
|
120
|
+
view_name = [global_config.spark_sql_globalTempDatabase, request_view_name]
|
|
121
|
+
else:
|
|
122
|
+
view_name = [request_view_name]
|
|
123
|
+
case_sensitive_view_name = ".".join(
|
|
124
|
+
[spark_to_sf_single_id_with_unquoting(part) for part in view_name]
|
|
125
|
+
)
|
|
126
|
+
snowflake_view_name = [
|
|
127
|
+
spark_to_sf_single_id_with_unquoting(part, True) for part in view_name
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
if should_create_temporary_view_in_snowflake():
|
|
131
|
+
_create_snowflake_temporary_view(
|
|
132
|
+
input_df_container, snowflake_view_name, case_sensitive_view_name, replace
|
|
133
|
+
)
|
|
134
|
+
else:
|
|
135
|
+
store_temporary_view_as_dataframe(
|
|
136
|
+
input_df,
|
|
137
|
+
input_df_container.column_map,
|
|
138
|
+
input_df_container.column_map.get_spark_columns(),
|
|
139
|
+
input_df_container.column_map.get_snowpark_columns(),
|
|
140
|
+
case_sensitive_view_name,
|
|
141
|
+
snowflake_view_name,
|
|
142
|
+
replace,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _create_snowflake_temporary_view(
|
|
147
|
+
input_df_container: DataFrameContainer,
|
|
148
|
+
snowflake_view_name: list[str],
|
|
149
|
+
stored_view_name: str,
|
|
150
|
+
replace: bool,
|
|
151
|
+
):
|
|
152
|
+
column_map = input_df_container.column_map
|
|
153
|
+
input_df = input_df_container.dataframe
|
|
154
|
+
|
|
155
|
+
session_config = sessions_config[get_spark_session_id()]
|
|
156
|
+
duplicate_column_names_handling_mode = session_config[
|
|
157
|
+
"snowpark.connect.views.duplicate_column_names_handling_mode"
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
# rename columns to match spark names
|
|
161
|
+
if duplicate_column_names_handling_mode == "rename":
|
|
162
|
+
# deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
|
|
163
|
+
rename_map = _create_column_rename_map(column_map.columns, True)
|
|
164
|
+
input_df = input_df.rename(rename_map)
|
|
165
|
+
elif duplicate_column_names_handling_mode == "drop":
|
|
166
|
+
# Drop duplicate column names by removing all but the first occurrence.
|
|
167
|
+
duplicated_columns, remaining_columns = _find_duplicated_columns(
|
|
168
|
+
column_map.columns
|
|
169
|
+
)
|
|
170
|
+
rename_map = _create_column_rename_map(remaining_columns, False)
|
|
171
|
+
if len(duplicated_columns) > 0:
|
|
172
|
+
input_df = input_df.drop(*duplicated_columns)
|
|
173
|
+
input_df = input_df.rename(rename_map)
|
|
174
|
+
else:
|
|
175
|
+
# rename columns without deduplication
|
|
176
|
+
rename_map = _create_column_rename_map(column_map.columns, False)
|
|
177
|
+
input_df = input_df.rename(rename_map)
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
create_snowflake_temporary_view(
|
|
181
|
+
input_df, snowflake_view_name, stored_view_name, replace
|
|
182
|
+
)
|
|
183
|
+
except SnowparkSQLException as exc:
|
|
184
|
+
if _is_error_caused_by_view_referencing_itself(exc) and replace:
|
|
185
|
+
# This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
|
|
186
|
+
_create_chained_view(input_df, snowflake_view_name)
|
|
187
|
+
else:
|
|
188
|
+
raise
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _create_column_rename_map(
|
|
192
|
+
columns: list[ColumnNames], rename_duplicated: bool
|
|
193
|
+
) -> dict:
|
|
194
|
+
if rename_duplicated is False:
|
|
195
|
+
# if we are not renaming duplicated columns, we can just return the original names
|
|
196
|
+
return {
|
|
197
|
+
col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
|
|
198
|
+
for col in columns
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
column_counts = Counter()
|
|
202
|
+
not_renamed_cols = []
|
|
203
|
+
renamed_cols = []
|
|
204
|
+
|
|
205
|
+
for col in columns:
|
|
206
|
+
new_column_name = col.spark_name
|
|
207
|
+
normalized_name = new_column_name.lower()
|
|
208
|
+
column_counts[normalized_name] += 1
|
|
209
|
+
|
|
210
|
+
if column_counts[normalized_name] > 1:
|
|
211
|
+
new_column_name = (
|
|
212
|
+
f"{new_column_name}_DEDUP_{column_counts[normalized_name] - 1}"
|
|
213
|
+
)
|
|
214
|
+
renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
|
|
215
|
+
else:
|
|
216
|
+
not_renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
|
|
217
|
+
|
|
218
|
+
if len(renamed_cols) == 0:
|
|
219
|
+
return {
|
|
220
|
+
col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
|
|
221
|
+
for col in not_renamed_cols
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
# we need to make sure that we don't have duplicated names after renaming
|
|
225
|
+
# columns that were not renamed in this iteration should have priority over renamed duplicates
|
|
226
|
+
return _create_column_rename_map(not_renamed_cols + renamed_cols, True)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _find_duplicated_columns(
|
|
230
|
+
columns: list[ColumnNames],
|
|
231
|
+
) -> (list[str], list[ColumnNames]):
|
|
232
|
+
duplicates = []
|
|
233
|
+
remaining_columns = []
|
|
234
|
+
seen = set()
|
|
235
|
+
for col in columns:
|
|
236
|
+
if col.spark_name in seen:
|
|
237
|
+
duplicates.append(col.snowpark_name)
|
|
238
|
+
else:
|
|
239
|
+
seen.add(col.spark_name)
|
|
240
|
+
remaining_columns.append(col)
|
|
241
|
+
return duplicates, remaining_columns
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _generate_random_builtin_view_name() -> str:
|
|
245
|
+
return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
|
|
249
|
+
return "view definition refers to view being defined" in str(exc).lower()
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _create_chained_view(input_df: DataFrame, view_name: list[str]) -> None:
|
|
253
|
+
"""
|
|
254
|
+
In order to create a view, which references itself, Spark would here take the previous
|
|
255
|
+
definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
|
|
256
|
+
as a workaround, we create a chain of internal views instead. This function:
|
|
257
|
+
1. Renames previous definition of A to some internal name (instead of deleting).
|
|
258
|
+
2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
session = Session.get_active_session()
|
|
262
|
+
|
|
263
|
+
view_name = ".".join(view_name)
|
|
264
|
+
|
|
265
|
+
tmp_name = _generate_random_builtin_view_name()
|
|
266
|
+
old_name_replacement = _generate_random_builtin_view_name()
|
|
267
|
+
|
|
268
|
+
input_df.create_or_replace_temp_view(tmp_name)
|
|
269
|
+
|
|
270
|
+
session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
|
|
271
|
+
|
|
272
|
+
ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
|
|
273
|
+
|
|
274
|
+
ddl = ddl.replace(view_name, old_name_replacement)
|
|
275
|
+
|
|
276
|
+
# GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
|
|
277
|
+
ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
|
|
278
|
+
|
|
279
|
+
session.sql(ddl).collect()
|
|
280
|
+
|
|
281
|
+
session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def store_temporary_view_as_dataframe(
|
|
285
|
+
input_df: DataFrame,
|
|
286
|
+
parent_column_map: ColumnNameMap,
|
|
287
|
+
spark_columns: list[str],
|
|
288
|
+
snowpark_columns: list[str],
|
|
289
|
+
view_name: str,
|
|
290
|
+
snowflake_view_name: list[str],
|
|
291
|
+
replace: bool,
|
|
292
|
+
):
|
|
293
|
+
assert_cached_view_does_not_exist_in_snowflake(snowflake_view_name, replace)
|
|
294
|
+
schema = StructType(
|
|
295
|
+
[StructField(field.name, field.datatype) for field in input_df.schema.fields]
|
|
296
|
+
)
|
|
297
|
+
input_df_container = DataFrameContainer.create_with_column_mapping(
|
|
298
|
+
dataframe=input_df,
|
|
299
|
+
spark_column_names=spark_columns,
|
|
300
|
+
snowpark_column_names=snowpark_columns,
|
|
301
|
+
parent_column_name_map=parent_column_map,
|
|
302
|
+
cached_schema_getter=lambda: schema,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
if replace:
|
|
306
|
+
try:
|
|
307
|
+
Session.get_active_session().sql(
|
|
308
|
+
"DROP VIEW IF EXISTS " + ".".join(snowflake_view_name)
|
|
309
|
+
).collect()
|
|
310
|
+
except SnowparkSQLException as e:
|
|
311
|
+
# Spark allows for both table and temporary view to exist with the same name.
|
|
312
|
+
# Snowflake throws exception if we try to drop the view with doesn't exist but a table with the same name exists.
|
|
313
|
+
if (
|
|
314
|
+
"SQL compilation error: Object found is of type 'TABLE', not specified type 'VIEW'"
|
|
315
|
+
not in str(e)
|
|
316
|
+
):
|
|
317
|
+
raise
|
|
318
|
+
|
|
319
|
+
register_temp_view(view_name, input_df_container, replace)
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def create_snowflake_temporary_view(
|
|
323
|
+
input_df: DataFrame,
|
|
324
|
+
snowflake_view_name: list[str],
|
|
325
|
+
stored_view_name: str,
|
|
326
|
+
replace: bool,
|
|
327
|
+
comment: Optional[str] = None,
|
|
328
|
+
) -> None:
|
|
329
|
+
assert_snowflake_view_does_not_exist_in_cache(stored_view_name, replace)
|
|
330
|
+
if replace:
|
|
331
|
+
unregister_temp_view(stored_view_name)
|
|
332
|
+
input_df.create_or_replace_temp_view(snowflake_view_name, comment=comment)
|
|
333
|
+
else:
|
|
334
|
+
input_df.create_temp_view(snowflake_view_name, comment=comment)
|