snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +680 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +237 -23
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +123 -5
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +85 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +110 -48
- snowflake/snowpark_connect/server.py +546 -456
- snowflake/snowpark_connect/server_common/__init__.py +500 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +187 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +163 -22
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -7,16 +7,18 @@ import re
|
|
|
7
7
|
import string
|
|
8
8
|
import time
|
|
9
9
|
import uuid
|
|
10
|
-
from typing import Sequence
|
|
10
|
+
from typing import Any, Sequence
|
|
11
11
|
|
|
12
12
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
13
13
|
|
|
14
14
|
import snowflake.snowpark.functions as snowpark_fn
|
|
15
15
|
from snowflake import snowpark
|
|
16
|
+
from snowflake.snowpark import Column
|
|
16
17
|
from snowflake.snowpark.types import (
|
|
17
18
|
BinaryType,
|
|
18
19
|
BooleanType,
|
|
19
20
|
ByteType,
|
|
21
|
+
DataType,
|
|
20
22
|
DateType,
|
|
21
23
|
DecimalType,
|
|
22
24
|
DoubleType,
|
|
@@ -92,6 +94,21 @@ TYPE_MAP_FOR_TO_SCHEMA = {
|
|
|
92
94
|
}
|
|
93
95
|
|
|
94
96
|
|
|
97
|
+
# This mapping is used to map the compression type to the extension of the file.
|
|
98
|
+
FILE_COMPRESSION_TO_EXTENSION = {
|
|
99
|
+
"GZIP": "gz",
|
|
100
|
+
"BZ2": "bz2",
|
|
101
|
+
"BROTLI": "br",
|
|
102
|
+
"ZSTD": "zst",
|
|
103
|
+
"DEFLATE": "deflate",
|
|
104
|
+
"RAW_DEFLATE": "raw_deflate",
|
|
105
|
+
"SNAPPY": "snappy",
|
|
106
|
+
"LZO": "lzo",
|
|
107
|
+
"LZ4": "lz4",
|
|
108
|
+
"BZIP2": "bz2",
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
95
112
|
def get_df_with_partition_row_number(
|
|
96
113
|
container: DataFrameContainer,
|
|
97
114
|
plan_id: int | None,
|
|
@@ -159,6 +176,7 @@ def generate_spark_compatible_filename(
|
|
|
159
176
|
attempt_number: int = 0,
|
|
160
177
|
compression: str = None,
|
|
161
178
|
format_ext: str = "parquet",
|
|
179
|
+
shared_uuid: str = None,
|
|
162
180
|
) -> str:
|
|
163
181
|
"""Generate a Spark-compatible filename following the convention:
|
|
164
182
|
part-<task-id>-<uuid>-c<attempt-number>.<compression>.<format>
|
|
@@ -168,12 +186,13 @@ def generate_spark_compatible_filename(
|
|
|
168
186
|
attempt_number: Attempt number (usually 0)
|
|
169
187
|
compression: Compression type (e.g., 'snappy', 'gzip', 'none')
|
|
170
188
|
format_ext: File format extension (e.g., 'parquet', 'csv', 'json')
|
|
189
|
+
shared_uuid: Shared UUID for the file
|
|
171
190
|
|
|
172
191
|
Returns:
|
|
173
192
|
A filename string following Spark's naming convention
|
|
174
193
|
"""
|
|
175
|
-
#
|
|
176
|
-
file_uuid = str(uuid.uuid4())
|
|
194
|
+
# Use the shared UUID if provided, otherwise generate a new one for uniqueness
|
|
195
|
+
file_uuid = shared_uuid or str(uuid.uuid4())
|
|
177
196
|
|
|
178
197
|
# Format task ID with leading zeros (5 digits)
|
|
179
198
|
formatted_task_id = f"{task_id:05d}"
|
|
@@ -186,13 +205,15 @@ def generate_spark_compatible_filename(
|
|
|
186
205
|
|
|
187
206
|
# Add compression if specified and not 'none'
|
|
188
207
|
if compression and compression.lower() not in ("none", "uncompressed"):
|
|
189
|
-
compression_part = f".{compression.lower()}"
|
|
208
|
+
compression_part = f".{FILE_COMPRESSION_TO_EXTENSION.get(compression.upper(), compression.lower())}"
|
|
190
209
|
else:
|
|
191
210
|
compression_part = ""
|
|
192
211
|
|
|
193
212
|
# Add format extension if specified
|
|
194
|
-
if format_ext:
|
|
213
|
+
if format_ext == "parquet":
|
|
195
214
|
return f"{base_name}{compression_part}.{format_ext}"
|
|
215
|
+
elif format_ext is not None and format_ext != "":
|
|
216
|
+
return f"{base_name}.{format_ext}{compression_part}"
|
|
196
217
|
else:
|
|
197
218
|
return f"{base_name}{compression_part}"
|
|
198
219
|
|
|
@@ -267,3 +288,105 @@ def snowpark_functions_col(name: str, column_map: ColumnNameMap) -> snowpark.Col
|
|
|
267
288
|
"""
|
|
268
289
|
is_qualified_name = name not in column_map.get_snowpark_columns()
|
|
269
290
|
return snowpark_fn.col(name, _is_qualified_name=is_qualified_name)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def is_aggregate_function(func_name: str) -> bool:
|
|
294
|
+
"""
|
|
295
|
+
Check if a function name is an aggregate function.
|
|
296
|
+
|
|
297
|
+
Uses a hybrid approach:
|
|
298
|
+
1. First checks PySpark's docstring convention (docstrings starting with "Aggregate function:")
|
|
299
|
+
2. Falls back to a hardcoded list for functions with missing/incorrect docstrings
|
|
300
|
+
|
|
301
|
+
This ensures comprehensive coverage while automatically supporting new PySpark aggregate functions.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
func_name: The function name to check (case-insensitive)
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
True if the function is an aggregate function, False otherwise
|
|
308
|
+
"""
|
|
309
|
+
try:
|
|
310
|
+
import pyspark.sql.functions as pyspark_functions
|
|
311
|
+
|
|
312
|
+
# TODO:
|
|
313
|
+
"""
|
|
314
|
+
Check we can leverage scala classes to determine agg functions:
|
|
315
|
+
https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala#L207
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
# Try PySpark docstring approach first (covers most aggregate functions)
|
|
319
|
+
pyspark_func = getattr(pyspark_functions, func_name.lower(), None)
|
|
320
|
+
if pyspark_func and pyspark_func.__doc__:
|
|
321
|
+
if pyspark_func.__doc__.lstrip().startswith("Aggregate function:"):
|
|
322
|
+
return True
|
|
323
|
+
|
|
324
|
+
# Fallback list for aggregate functions with missing/incorrect docstrings
|
|
325
|
+
# These are known aggregate functions that don't have proper docstring markers
|
|
326
|
+
fallback_aggregates = {
|
|
327
|
+
"percentile_cont",
|
|
328
|
+
"percentile_disc",
|
|
329
|
+
"any_value",
|
|
330
|
+
"grouping",
|
|
331
|
+
"grouping_id",
|
|
332
|
+
}
|
|
333
|
+
return func_name.lower() in fallback_aggregates
|
|
334
|
+
|
|
335
|
+
except Exception:
|
|
336
|
+
return False
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def get_all_dependent_column_names(columns: list[Column]) -> set[str]:
|
|
340
|
+
all_dependent_column_names = set()
|
|
341
|
+
|
|
342
|
+
for col in columns:
|
|
343
|
+
if hasattr(col, "_expr1"):
|
|
344
|
+
all_dependent_column_names = all_dependent_column_names.union(
|
|
345
|
+
col._expr1.dependent_column_names()
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
return all_dependent_column_names
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def map_pivot_value_to_spark_column_name(pivot_value: Any) -> tuple[str, bool]:
|
|
352
|
+
"""
|
|
353
|
+
Maps pivot_value to the spark column name, without appending the aggregation suffix.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
A tuple containing the spark column name and a boolean indicating whether the original_value was null or not.
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
is_null = False
|
|
360
|
+
|
|
361
|
+
if pivot_value in (None, "NULL", "None"):
|
|
362
|
+
spark_name = "null"
|
|
363
|
+
is_null = True
|
|
364
|
+
else:
|
|
365
|
+
if isinstance(pivot_value, tuple):
|
|
366
|
+
spark_name = str(list(pivot_value))
|
|
367
|
+
elif isinstance(pivot_value, dict):
|
|
368
|
+
spark_name = "{" + ", ".join(str(v) for v in pivot_value.values()) + "}"
|
|
369
|
+
else:
|
|
370
|
+
spark_name = str(pivot_value)
|
|
371
|
+
|
|
372
|
+
return spark_name, is_null
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def create_pivot_column_condition(
|
|
376
|
+
col: Column,
|
|
377
|
+
pivot_value: Any,
|
|
378
|
+
pivot_value_is_null: bool,
|
|
379
|
+
cast_literal_to: DataType | None = None,
|
|
380
|
+
) -> snowpark.Column:
|
|
381
|
+
if isinstance(pivot_value, dict):
|
|
382
|
+
elements = [
|
|
383
|
+
snowpark_fn.lit(item) for pair in pivot_value.items() for item in pair
|
|
384
|
+
]
|
|
385
|
+
lit = snowpark_fn.object_construct_keep_null(*elements)
|
|
386
|
+
else:
|
|
387
|
+
lit = snowpark_fn.lit(pivot_value)
|
|
388
|
+
|
|
389
|
+
if cast_literal_to:
|
|
390
|
+
lit = snowpark_fn.cast(lit, cast_literal_to)
|
|
391
|
+
|
|
392
|
+
return snowpark_fn.is_null(col) if pivot_value_is_null else (col == lit)
|
|
@@ -11,6 +11,8 @@ from snowflake import snowpark
|
|
|
11
11
|
from snowflake.snowpark import DataFrameWriter
|
|
12
12
|
from snowflake.snowpark.dataframe import DataFrame
|
|
13
13
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.relation.read import jdbc_read_dbapi
|
|
15
17
|
from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDialect
|
|
16
18
|
from snowflake.snowpark_connect.relation.read.utils import Connection
|
|
@@ -65,9 +67,13 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
65
67
|
self._create_table(conn, table, container, jdbc_dialect)
|
|
66
68
|
case "errorifexists":
|
|
67
69
|
if table_exist:
|
|
68
|
-
|
|
70
|
+
exception = ValueError(
|
|
69
71
|
"table is already exist and write mode is ERROR_IF_EXISTS"
|
|
70
72
|
)
|
|
73
|
+
attach_custom_error_code(
|
|
74
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
75
|
+
)
|
|
76
|
+
raise exception
|
|
71
77
|
else:
|
|
72
78
|
self._create_table(conn, table, container, jdbc_dialect)
|
|
73
79
|
case "overwrite":
|
|
@@ -82,7 +88,9 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
82
88
|
else:
|
|
83
89
|
self._create_table(conn, table, container, jdbc_dialect)
|
|
84
90
|
case _:
|
|
85
|
-
|
|
91
|
+
exception = ValueError(f"Invalid write mode value{write_mode}")
|
|
92
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
93
|
+
raise exception
|
|
86
94
|
|
|
87
95
|
task_insert_into_data_source_with_retry(
|
|
88
96
|
input_df,
|
|
@@ -141,6 +149,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
141
149
|
cursor.execute(sql)
|
|
142
150
|
except Exception as e:
|
|
143
151
|
logger.error(f"failed to drop table {table} from the data source {e}")
|
|
152
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
144
153
|
raise e
|
|
145
154
|
|
|
146
155
|
def _create_table(
|
|
@@ -189,6 +198,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
189
198
|
cursor.execute(sql)
|
|
190
199
|
except Exception as e:
|
|
191
200
|
logger.error(f"failed to create a table {table} from the data source {e}")
|
|
201
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
192
202
|
raise e
|
|
193
203
|
|
|
194
204
|
|
|
@@ -218,6 +228,7 @@ def _task_insert_into_data_source(
|
|
|
218
228
|
except Exception as e:
|
|
219
229
|
logger.debug(f"failed to insert into data source {e}")
|
|
220
230
|
conn.rollback()
|
|
231
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
221
232
|
raise e
|
|
222
233
|
finally:
|
|
223
234
|
cursor.close()
|
|
@@ -274,6 +285,7 @@ def task_insert_into_data_source_with_retry(
|
|
|
274
285
|
)
|
|
275
286
|
except Exception as e:
|
|
276
287
|
logger.debug(f"failed to insert into data source {e}")
|
|
288
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
277
289
|
raise e
|
|
278
290
|
finally:
|
|
279
291
|
close_connection(conn)
|
|
@@ -339,4 +351,8 @@ def convert_sp_to_sql_type(
|
|
|
339
351
|
case _:
|
|
340
352
|
return "TIMESTAMP"
|
|
341
353
|
case _:
|
|
342
|
-
|
|
354
|
+
exception = TypeError(
|
|
355
|
+
f"Unsupported data type: {datatype.__class__.__name__}"
|
|
356
|
+
)
|
|
357
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
358
|
+
raise exception
|