snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
- snowflake/snowpark_connect/client/__init__.py +15 -0
- snowflake/snowpark_connect/client/error_utils.py +30 -0
- snowflake/snowpark_connect/client/exceptions.py +36 -0
- snowflake/snowpark_connect/client/query_results.py +90 -0
- snowflake/snowpark_connect/client/server.py +680 -0
- snowflake/snowpark_connect/client/utils/__init__.py +10 -0
- snowflake/snowpark_connect/client/utils/session.py +85 -0
- snowflake/snowpark_connect/column_name_handler.py +404 -243
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/config.py +237 -23
- snowflake/snowpark_connect/constants.py +2 -0
- snowflake/snowpark_connect/dataframe_container.py +102 -8
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +172 -23
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
- snowflake/snowpark_connect/expression/literal.py +37 -13
- snowflake/snowpark_connect/expression/map_cast.py +123 -5
- snowflake/snowpark_connect/expression/map_expression.py +80 -27
- snowflake/snowpark_connect/expression/map_extension.py +322 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
- snowflake/snowpark_connect/expression/map_udf.py +85 -20
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
- snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
- snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
- snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +110 -10
- snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
- snowflake/snowpark_connect/relation/map_extension.py +263 -29
- snowflake/snowpark_connect/relation/map_join.py +683 -442
- snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
- snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
- snowflake/snowpark_connect/relation/map_relation.py +48 -19
- snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
- snowflake/snowpark_connect/relation/map_show_string.py +13 -6
- snowflake/snowpark_connect/relation/map_sql.py +1233 -222
- snowflake/snowpark_connect/relation/map_stats.py +48 -9
- snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
- snowflake/snowpark_connect/relation/read/map_read.py +134 -43
- snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
- snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
- snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
- snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
- snowflake/snowpark_connect/relation/read/utils.py +50 -5
- snowflake/snowpark_connect/relation/stage_locator.py +91 -55
- snowflake/snowpark_connect/relation/utils.py +128 -5
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +929 -319
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +110 -48
- snowflake/snowpark_connect/server.py +546 -456
- snowflake/snowpark_connect/server_common/__init__.py +500 -0
- snowflake/snowpark_connect/snowflake_session.py +65 -0
- snowflake/snowpark_connect/start_server.py +53 -5
- snowflake/snowpark_connect/type_mapping.py +349 -27
- snowflake/snowpark_connect/typed_column.py +9 -7
- snowflake/snowpark_connect/utils/artifacts.py +9 -8
- snowflake/snowpark_connect/utils/cache.py +49 -27
- snowflake/snowpark_connect/utils/concurrent.py +36 -1
- snowflake/snowpark_connect/utils/context.py +187 -37
- snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
- snowflake/snowpark_connect/utils/identifiers.py +137 -3
- snowflake/snowpark_connect/utils/io_utils.py +57 -1
- snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
- snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
- snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
- snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
- snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +64 -28
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
- snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
- snowflake/snowpark_connect/utils/telemetry.py +163 -22
- snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
- snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
- snowflake/snowpark_connect/utils/udf_cache.py +117 -41
- snowflake/snowpark_connect/utils/udf_helper.py +39 -37
- snowflake/snowpark_connect/utils/udf_utils.py +133 -14
- snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
- snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +6 -2
- snowflake/snowpark_decoder/spark_decoder.py +12 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
- snowflake/snowpark_connect/hidden_column.py +0 -39
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -32,14 +32,16 @@ def create_udtf(
|
|
|
32
32
|
udtf = udtf_proto.python_udtf
|
|
33
33
|
callable_func = CloudPickleSerializer().loads(udtf.command)
|
|
34
34
|
|
|
35
|
-
|
|
35
|
+
original_func = callable_func.eval
|
|
36
|
+
func_signature = inspect.signature(original_func)
|
|
36
37
|
# Set all input types to VariantType regardless of type hints so that we can pass all arguments as VariantType.
|
|
37
38
|
# Otherwise, we will run into issues with type mismatches. This only applies for UDTF registration.
|
|
38
39
|
# We subtract one here since UDTF functions are class methods and always have "self" as the first parameter.
|
|
39
40
|
input_types = [VariantType()] * (len(func_signature.parameters) - 1)
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
42
|
+
if imports:
|
|
43
|
+
# Wrapp callable to allow reading imported files
|
|
44
|
+
callable_func = artifacts_reader_wrapper(callable_func)
|
|
43
45
|
|
|
44
46
|
if is_arrow_enabled:
|
|
45
47
|
callable_func = spark_compatible_udtf_wrapper_with_arrow(
|
|
@@ -48,7 +50,7 @@ def create_udtf(
|
|
|
48
50
|
elif is_spark_compatible_udtf_mode_enabled:
|
|
49
51
|
callable_func = spark_compatible_udtf_wrapper(callable_func, expected_types)
|
|
50
52
|
else:
|
|
51
|
-
callable_func.process =
|
|
53
|
+
callable_func.process = original_func
|
|
52
54
|
if hasattr(callable_func, "terminate"):
|
|
53
55
|
callable_func.end_partition = callable_func.terminate
|
|
54
56
|
|
|
@@ -107,7 +109,9 @@ def create_udtf(
|
|
|
107
109
|
imports=imports,
|
|
108
110
|
)
|
|
109
111
|
case _:
|
|
110
|
-
raise NotImplementedError(
|
|
112
|
+
raise NotImplementedError(
|
|
113
|
+
f"[snowpark_connect::unsupported_operation] {called_from}"
|
|
114
|
+
)
|
|
111
115
|
|
|
112
116
|
|
|
113
117
|
def artifacts_reader_wrapper(user_udtf_cls: type) -> type:
|
|
@@ -127,7 +131,12 @@ def artifacts_reader_wrapper(user_udtf_cls: type) -> type:
|
|
|
127
131
|
|
|
128
132
|
# Change directory to the one containing the UDF imported files
|
|
129
133
|
import_path = sys._xoptions["snowflake_import_directory"]
|
|
130
|
-
|
|
134
|
+
if os.name == "nt":
|
|
135
|
+
import tempfile
|
|
136
|
+
|
|
137
|
+
tmp_path = os.path.join(tempfile.gettempdir(), f"sas-{os.getpid()}")
|
|
138
|
+
else:
|
|
139
|
+
tmp_path = f"/tmp/sas-{os.getpid()}"
|
|
131
140
|
os.makedirs(tmp_path, exist_ok=True)
|
|
132
141
|
os.chdir(tmp_path)
|
|
133
142
|
shutil.copytree(import_path, tmp_path, dirs_exist_ok=True)
|
|
@@ -195,17 +204,19 @@ def _create_convert_table_argument_to_row():
|
|
|
195
204
|
# Named access: row["col1"], row["col2"]
|
|
196
205
|
if key in self._field_to_index:
|
|
197
206
|
return self._values[self._field_to_index[key]]
|
|
198
|
-
raise KeyError(key)
|
|
207
|
+
raise KeyError(f"[snowpark_connect::invalid_operation] {key}")
|
|
199
208
|
else:
|
|
200
|
-
raise TypeError(
|
|
209
|
+
raise TypeError(
|
|
210
|
+
f"[snowpark_connect::type_mismatch] Invalid key type: {type(key)}"
|
|
211
|
+
)
|
|
201
212
|
|
|
202
213
|
def __getattr__(self, name):
|
|
203
214
|
# Attribute access: row.col1, row.col2
|
|
204
215
|
if name.startswith("_"):
|
|
205
|
-
raise AttributeError(name)
|
|
216
|
+
raise AttributeError(f"[snowpark_connect::invalid_operation] {name}")
|
|
206
217
|
if name in self._field_to_index:
|
|
207
218
|
return self._values[self._field_to_index[name]]
|
|
208
|
-
raise AttributeError(name)
|
|
219
|
+
raise AttributeError(f"[snowpark_connect::invalid_operation] {name}")
|
|
209
220
|
|
|
210
221
|
def __len__(self):
|
|
211
222
|
return len(self._values)
|
|
@@ -279,7 +290,9 @@ def spark_compatible_udtf_wrapper(
|
|
|
279
290
|
return val
|
|
280
291
|
if isinstance(val, datetime.datetime):
|
|
281
292
|
return val.date()
|
|
282
|
-
raise AttributeError(
|
|
293
|
+
raise AttributeError(
|
|
294
|
+
f"[snowpark_connect::invalid_input] Invalid date value {val}"
|
|
295
|
+
)
|
|
283
296
|
|
|
284
297
|
def _coerce_to_binary(val: object, target_type_name: str = "byte") -> bytes | None:
|
|
285
298
|
if target_type_name == "binary":
|
|
@@ -343,7 +356,9 @@ def spark_compatible_udtf_wrapper(
|
|
|
343
356
|
def _coerce_to_timestamp(val: object) -> datetime.datetime | None:
|
|
344
357
|
if isinstance(val, datetime.datetime):
|
|
345
358
|
return val
|
|
346
|
-
raise AttributeError(
|
|
359
|
+
raise AttributeError(
|
|
360
|
+
f"[snowpark_connect::invalid_input] Invalid time stamp value {val}"
|
|
361
|
+
)
|
|
347
362
|
|
|
348
363
|
SCALAR_COERCERS = {
|
|
349
364
|
"bool": _coerce_to_bool,
|
|
@@ -447,7 +462,7 @@ def spark_compatible_udtf_wrapper(
|
|
|
447
462
|
|
|
448
463
|
if not isinstance(raw_row_tuple, (tuple, list)):
|
|
449
464
|
raise TypeError(
|
|
450
|
-
f"[UDTF_INVALID_OUTPUT_ROW_TYPE] return value should be an iterable object containing tuples, but got {type(raw_row_tuple)}"
|
|
465
|
+
f"[snowpark_connect::type_mismatch] [UDTF_INVALID_OUTPUT_ROW_TYPE] return value should be an iterable object containing tuples, but got {type(raw_row_tuple)}"
|
|
451
466
|
)
|
|
452
467
|
|
|
453
468
|
if len(raw_row_tuple) != len(expected_types):
|
|
@@ -467,7 +482,7 @@ def spark_compatible_udtf_wrapper(
|
|
|
467
482
|
and val is not None
|
|
468
483
|
):
|
|
469
484
|
raise RuntimeError(
|
|
470
|
-
f"[UNEXPECTED_TUPLE_WITH_STRUCT] Expected a struct for column at position {i}, but got a primitive value of type {type(val)}"
|
|
485
|
+
f"[snowpark_connect::type_mismatch] [UNEXPECTED_TUPLE_WITH_STRUCT] Expected a struct for column at position {i}, but got a primitive value of type {type(val)}"
|
|
471
486
|
)
|
|
472
487
|
|
|
473
488
|
coerced_row_list = [None] * len(expected_types)
|
|
@@ -533,7 +548,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
533
548
|
return pa.map_(key_type, value_type)
|
|
534
549
|
case _, _:
|
|
535
550
|
raise TypeError(
|
|
536
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported Python scalar type for Arrow conversion: {target_py_type}"
|
|
551
|
+
f"[snowpark_connect::unsupported_type] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported Python scalar type for Arrow conversion: {target_py_type}"
|
|
537
552
|
)
|
|
538
553
|
elif kind == "array":
|
|
539
554
|
element_type_info = type_marker
|
|
@@ -543,7 +558,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
543
558
|
struct_fields_info = type_marker
|
|
544
559
|
if not isinstance(struct_fields_info, dict):
|
|
545
560
|
raise TypeError(
|
|
546
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Invalid struct definition for Arrow: expected dict, got {type(struct_fields_info)}"
|
|
561
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Invalid struct definition for Arrow: expected dict, got {type(struct_fields_info)}"
|
|
547
562
|
)
|
|
548
563
|
fields = []
|
|
549
564
|
for field_name, field_type_info in struct_fields_info.items():
|
|
@@ -552,7 +567,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
552
567
|
return pa.struct(fields)
|
|
553
568
|
else:
|
|
554
569
|
raise TypeError(
|
|
555
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported data kind for Arrow conversion: {kind}"
|
|
570
|
+
f"[snowpark_connect::unsupported_type] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported data kind for Arrow conversion: {kind}"
|
|
556
571
|
)
|
|
557
572
|
|
|
558
573
|
def _convert_to_arrow_value(
|
|
@@ -576,7 +591,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
576
591
|
]
|
|
577
592
|
if not isinstance(obj, (list, tuple)):
|
|
578
593
|
raise TypeError(
|
|
579
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected list or tuple for Arrow array type, got {type(obj).__name__}"
|
|
594
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected list or tuple for Arrow array type, got {type(obj).__name__}"
|
|
580
595
|
)
|
|
581
596
|
element_type = arrow_type.value_type
|
|
582
597
|
return [_convert_to_arrow_value(e, element_type, "array") for e in obj]
|
|
@@ -584,7 +599,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
584
599
|
if pa.types.is_map(arrow_type):
|
|
585
600
|
if not isinstance(obj, dict):
|
|
586
601
|
raise TypeError(
|
|
587
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected dict for Arrow map type, got {type(obj).__name__}"
|
|
602
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected dict for Arrow map type, got {type(obj).__name__}"
|
|
588
603
|
)
|
|
589
604
|
key_type = arrow_type.key_type
|
|
590
605
|
value_type = arrow_type.item_type
|
|
@@ -610,7 +625,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
610
625
|
else:
|
|
611
626
|
# If the UDTF yields a list/tuple (or anything not a dict) for a struct column, it's an error.
|
|
612
627
|
raise TypeError(
|
|
613
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Expected a dictionary for Arrow struct type column, but got {type(obj).__name__}"
|
|
628
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Expected a dictionary for Arrow struct type column, but got {type(obj).__name__}"
|
|
614
629
|
)
|
|
615
630
|
|
|
616
631
|
# Check if a scalar type is expected and if obj is a collection; if so, error out.
|
|
@@ -622,7 +637,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
622
637
|
):
|
|
623
638
|
if isinstance(obj, (list, tuple, dict)):
|
|
624
639
|
raise TypeError(
|
|
625
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert Python collection type {type(obj).__name__} to scalar Arrow type {arrow_type}"
|
|
640
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert Python collection type {type(obj).__name__} to scalar Arrow type {arrow_type}"
|
|
626
641
|
)
|
|
627
642
|
|
|
628
643
|
if pa.types.is_boolean(arrow_type):
|
|
@@ -638,7 +653,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
638
653
|
elif obj == 1:
|
|
639
654
|
return True
|
|
640
655
|
raise TypeError(
|
|
641
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {obj} to Arrow boolean"
|
|
656
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {obj} to Arrow boolean"
|
|
642
657
|
)
|
|
643
658
|
if isinstance(obj, str):
|
|
644
659
|
v_str = obj.strip().lower()
|
|
@@ -647,7 +662,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
647
662
|
if v_str == "false":
|
|
648
663
|
return False
|
|
649
664
|
raise TypeError(
|
|
650
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow boolean"
|
|
665
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow boolean"
|
|
651
666
|
)
|
|
652
667
|
|
|
653
668
|
if pa.types.is_integer(arrow_type):
|
|
@@ -663,7 +678,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
663
678
|
except ValueError:
|
|
664
679
|
pass
|
|
665
680
|
raise TypeError(
|
|
666
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow integer"
|
|
681
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow integer"
|
|
667
682
|
)
|
|
668
683
|
|
|
669
684
|
if pa.types.is_floating(arrow_type):
|
|
@@ -675,7 +690,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
675
690
|
except ValueError:
|
|
676
691
|
pass
|
|
677
692
|
raise TypeError(
|
|
678
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow float"
|
|
693
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow float"
|
|
679
694
|
)
|
|
680
695
|
|
|
681
696
|
if pa.types.is_string(arrow_type):
|
|
@@ -687,7 +702,7 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
687
702
|
if isinstance(obj, str):
|
|
688
703
|
return obj
|
|
689
704
|
raise TypeError(
|
|
690
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow string"
|
|
705
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow string"
|
|
691
706
|
)
|
|
692
707
|
|
|
693
708
|
if pa.types.is_binary(arrow_type) or pa.types.is_fixed_size_binary(arrow_type):
|
|
@@ -698,21 +713,21 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
698
713
|
if isinstance(obj, int):
|
|
699
714
|
return bytearray([obj])
|
|
700
715
|
raise TypeError(
|
|
701
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow binary"
|
|
716
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow binary"
|
|
702
717
|
)
|
|
703
718
|
|
|
704
719
|
if pa.types.is_date(arrow_type):
|
|
705
720
|
if isinstance(obj, datetime.date):
|
|
706
721
|
return obj
|
|
707
722
|
raise TypeError(
|
|
708
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow date. Expected datetime.date."
|
|
723
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow date. Expected datetime.date."
|
|
709
724
|
)
|
|
710
725
|
|
|
711
726
|
if pa.types.is_timestamp(arrow_type):
|
|
712
727
|
if isinstance(obj, datetime.datetime):
|
|
713
728
|
return obj
|
|
714
729
|
raise TypeError(
|
|
715
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow timestamp. Expected datetime.datetime."
|
|
730
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow timestamp. Expected datetime.datetime."
|
|
716
731
|
)
|
|
717
732
|
|
|
718
733
|
if pa.types.is_decimal(arrow_type):
|
|
@@ -727,11 +742,11 @@ def spark_compatible_udtf_wrapper_with_arrow(
|
|
|
727
742
|
pass
|
|
728
743
|
|
|
729
744
|
raise TypeError(
|
|
730
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow decimal. Expected decimal.Decimal or compatible int/str."
|
|
745
|
+
f"[snowpark_connect::type_mismatch] [UDTF_ARROW_TYPE_CAST_ERROR] Cannot convert {type(obj).__name__} to Arrow decimal. Expected decimal.Decimal or compatible int/str."
|
|
731
746
|
)
|
|
732
747
|
|
|
733
748
|
raise TypeError(
|
|
734
|
-
f"[UDTF_ARROW_TYPE_CAST_ERROR] Unsupported type conversion for {type(obj).__name__} to Arrow type {arrow_type}"
|
|
749
|
+
f"[snowpark_connect::unsupported_operation] [UDTF_ARROW_TYPE_CAST_ERROR] Unsupported type conversion for {type(obj).__name__} to Arrow type {arrow_type}"
|
|
735
750
|
)
|
|
736
751
|
|
|
737
752
|
class WrappedUDTF:
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import tempfile
|
|
7
|
+
import threading
|
|
8
|
+
import zipfile
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from snowflake.snowpark import Session
|
|
12
|
+
|
|
13
|
+
_java_initialized_ = threading.Event()
|
|
14
|
+
_java_initialized_lock = threading.Lock()
|
|
15
|
+
JAVA_UDFS_JAR_NAME = "java_udfs-1.0-SNAPSHOT.jar"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upload_java_udf_jar(session: Session) -> None:
|
|
19
|
+
global _java_initialized_
|
|
20
|
+
if _java_initialized_.is_set():
|
|
21
|
+
return
|
|
22
|
+
|
|
23
|
+
with _java_initialized_lock:
|
|
24
|
+
if not _java_initialized_.is_set():
|
|
25
|
+
stage = session.get_session_stage()
|
|
26
|
+
try:
|
|
27
|
+
jar_path = importlib.resources.files(
|
|
28
|
+
"snowflake.snowpark_connect.resources"
|
|
29
|
+
).joinpath(JAVA_UDFS_JAR_NAME)
|
|
30
|
+
except NotADirectoryError:
|
|
31
|
+
# importlib.resource doesn't work in Stage Package method
|
|
32
|
+
zip_path = Path(__file__).parent.parent.parent.parent
|
|
33
|
+
jar_path_in_zip = (
|
|
34
|
+
f"snowflake/snowpark_connect/resources/{JAVA_UDFS_JAR_NAME}"
|
|
35
|
+
)
|
|
36
|
+
temp_dir = tempfile.gettempdir()
|
|
37
|
+
|
|
38
|
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
39
|
+
if jar_path_in_zip not in zip_ref.namelist():
|
|
40
|
+
raise FileNotFoundError(
|
|
41
|
+
f"[snowpark_connect::invalid_input] {jar_path_in_zip} not found"
|
|
42
|
+
)
|
|
43
|
+
zip_ref.extract(jar_path_in_zip, temp_dir)
|
|
44
|
+
|
|
45
|
+
jar_path = f"{temp_dir}/{jar_path_in_zip}"
|
|
46
|
+
|
|
47
|
+
from snowflake.snowpark_connect.resources_initializer import RESOURCE_PATH
|
|
48
|
+
|
|
49
|
+
upload_result = session.file.put(
|
|
50
|
+
str(jar_path), f"{stage}/{RESOURCE_PATH}", overwrite=True
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if upload_result[0].status != "UPLOADED":
|
|
54
|
+
raise RuntimeError(
|
|
55
|
+
f"[snowpark_connect::internal_error] Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
|
|
56
|
+
)
|
|
57
|
+
_java_initialized_.set()
|
|
@@ -85,8 +85,12 @@ class DataframeProcessorSession:
|
|
|
85
85
|
request = spark_proto.ExecutePlanRequest()
|
|
86
86
|
elif any_msg.Is(spark_proto.AnalyzePlanRequest.DESCRIPTOR):
|
|
87
87
|
request = spark_proto.AnalyzePlanRequest()
|
|
88
|
+
elif any_msg.Is(spark_proto.AddArtifactsRequest.DESCRIPTOR):
|
|
89
|
+
request = spark_proto.AddArtifactsRequest()
|
|
90
|
+
elif any_msg.Is(spark_proto.ArtifactStatusesRequest.DESCRIPTOR):
|
|
91
|
+
request = spark_proto.ArtifactStatusesRequest()
|
|
88
92
|
else:
|
|
89
|
-
raise NotImplementedError("Unknown request type")
|
|
93
|
+
raise NotImplementedError(f"Unknown request type: {any_msg.TypeName()}")
|
|
90
94
|
dp_req_proto.payload.Unpack(request)
|
|
91
95
|
result = self._decoder.request(request)
|
|
92
96
|
|
|
@@ -107,5 +111,5 @@ class DataframeProcessorSession:
|
|
|
107
111
|
)
|
|
108
112
|
return proto2str(dp_res_proto)
|
|
109
113
|
except Exception:
|
|
110
|
-
#
|
|
114
|
+
# raise the error to GS
|
|
111
115
|
raise
|
|
@@ -60,6 +60,18 @@ class SparkDecoder:
|
|
|
60
60
|
return ResponseEnvelope(
|
|
61
61
|
config_response=self.servicer.Config(request, ctx)
|
|
62
62
|
)
|
|
63
|
+
case proto.AddArtifactsRequest():
|
|
64
|
+
return ResponseEnvelope(
|
|
65
|
+
add_artifacts_response=self.servicer.AddArtifacts(
|
|
66
|
+
iter([request]), ctx
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
case proto.ArtifactStatusesRequest():
|
|
70
|
+
return ResponseEnvelope(
|
|
71
|
+
artifact_status_response=self.servicer.ArtifactStatus(
|
|
72
|
+
request, ctx
|
|
73
|
+
)
|
|
74
|
+
)
|
|
63
75
|
case _:
|
|
64
76
|
raise NotImplementedError(
|
|
65
77
|
"Unknown request type: %s" % type(request)
|
|
@@ -312,9 +312,9 @@ def generate_spark_submit_cmd(
|
|
|
312
312
|
args_for_spark.append(v)
|
|
313
313
|
if args.verbose:
|
|
314
314
|
args_for_spark.append("--verbose")
|
|
315
|
-
setup_logging(logging.
|
|
315
|
+
setup_logging(logging.DEBUG)
|
|
316
316
|
else:
|
|
317
|
-
setup_logging(logging.
|
|
317
|
+
setup_logging(logging.INFO)
|
|
318
318
|
args_for_spark.append(args.filename)
|
|
319
319
|
args_for_spark.extend(args.app_arguments)
|
|
320
320
|
return args_for_spark
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snowpark-connect
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.6.0
|
|
4
4
|
Summary: Snowpark Connect for Spark
|
|
5
5
|
Author: Snowflake, Inc
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -9,29 +9,36 @@ Description-Content-Type: text/markdown
|
|
|
9
9
|
License-File: LICENSE.txt
|
|
10
10
|
License-File: LICENSE-binary
|
|
11
11
|
License-File: NOTICE-binary
|
|
12
|
+
Requires-Dist: snowpark-connect-deps-1==3.56.3
|
|
13
|
+
Requires-Dist: snowpark-connect-deps-2==3.56.3
|
|
12
14
|
Requires-Dist: certifi>=2025.1.31
|
|
13
15
|
Requires-Dist: cloudpickle
|
|
14
|
-
Requires-Dist: fsspec
|
|
16
|
+
Requires-Dist: fsspec
|
|
15
17
|
Requires-Dist: jpype1
|
|
16
|
-
Requires-Dist: protobuf<
|
|
18
|
+
Requires-Dist: protobuf<6.32.0,>=4.25.3
|
|
17
19
|
Requires-Dist: s3fs>=2025.3.0
|
|
18
20
|
Requires-Dist: snowflake.core<2,>=1.0.5
|
|
19
|
-
Requires-Dist: snowflake-snowpark-python[pandas]<1.
|
|
21
|
+
Requires-Dist: snowflake-snowpark-python[pandas]<1.43.0,>=1.42.0
|
|
22
|
+
Requires-Dist: snowflake-connector-python<4.2.0,>=3.18.0
|
|
20
23
|
Requires-Dist: sqlglot>=26.3.8
|
|
21
24
|
Requires-Dist: jaydebeapi
|
|
22
|
-
Requires-Dist: aiobotocore
|
|
25
|
+
Requires-Dist: aiobotocore<=2.25.0,>=2.23.0
|
|
23
26
|
Requires-Dist: py4j==0.10.9.7
|
|
24
27
|
Requires-Dist: pandas>=1.0.5
|
|
25
28
|
Requires-Dist: pyarrow>=4.0.0
|
|
26
|
-
Requires-Dist: grpcio
|
|
27
|
-
Requires-Dist: grpcio-status
|
|
29
|
+
Requires-Dist: grpcio<=1.71.0,>=1.56.0
|
|
30
|
+
Requires-Dist: grpcio-status<=1.71.0,>=1.56.0
|
|
28
31
|
Requires-Dist: googleapis-common-protos>=1.56.4
|
|
29
32
|
Requires-Dist: numpy<2,>=1.15
|
|
33
|
+
Requires-Dist: gcsfs>=2025.9.0
|
|
34
|
+
Provides-Extra: jdk
|
|
35
|
+
Requires-Dist: jdk4py==17.0.9.2; extra == "jdk"
|
|
30
36
|
Dynamic: author
|
|
31
37
|
Dynamic: description
|
|
32
38
|
Dynamic: description-content-type
|
|
33
39
|
Dynamic: license
|
|
34
40
|
Dynamic: license-file
|
|
41
|
+
Dynamic: provides-extra
|
|
35
42
|
Dynamic: requires-dist
|
|
36
43
|
Dynamic: requires-python
|
|
37
44
|
Dynamic: summary
|