snowpark-connect 0.24.0__py3-none-any.whl → 0.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +116 -4
- snowflake/snowpark_connect/config.py +23 -0
- snowflake/snowpark_connect/constants.py +0 -29
- snowflake/snowpark_connect/dataframe_container.py +22 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
- snowflake/snowpark_connect/expression/literal.py +13 -2
- snowflake/snowpark_connect/expression/map_cast.py +5 -8
- snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
- snowflake/snowpark_connect/expression/map_udf.py +88 -29
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
- snowflake/snowpark_connect/expression/map_unresolved_function.py +840 -367
- snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
- snowflake/snowpark_connect/hidden_column.py +39 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
- snowflake/snowpark_connect/relation/map_extension.py +52 -11
- snowflake/snowpark_connect/relation/map_join.py +258 -62
- snowflake/snowpark_connect/relation/map_map_partitions.py +9 -4
- snowflake/snowpark_connect/relation/map_relation.py +12 -1
- snowflake/snowpark_connect/relation/map_row_ops.py +8 -1
- snowflake/snowpark_connect/relation/map_sql.py +88 -11
- snowflake/snowpark_connect/relation/map_udtf.py +100 -46
- snowflake/snowpark_connect/relation/read/map_read.py +3 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
- snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
- snowflake/snowpark_connect/relation/utils.py +44 -0
- snowflake/snowpark_connect/relation/write/map_write.py +175 -75
- snowflake/snowpark_connect/resources_initializer.py +47 -6
- snowflake/snowpark_connect/server.py +26 -4
- snowflake/snowpark_connect/type_mapping.py +29 -25
- snowflake/snowpark_connect/typed_column.py +14 -0
- snowflake/snowpark_connect/utils/artifacts.py +23 -0
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/context.py +6 -1
- snowflake/snowpark_connect/utils/external_udxf_cache.py +36 -0
- snowflake/snowpark_connect/utils/scala_udf_utils.py +596 -0
- snowflake/snowpark_connect/utils/session.py +4 -0
- snowflake/snowpark_connect/utils/telemetry.py +6 -17
- snowflake/snowpark_connect/utils/udf_helper.py +2 -0
- snowflake/snowpark_connect/utils/udf_utils.py +22 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +1 -0
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/METADATA +1 -1
- snowpark_connect-0.26.0.dist-info/RECORD +481 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
- snowpark_connect-0.24.0.dist-info/RECORD +0 -898
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
"""
|
|
5
|
+
Scala UDF utilities for Snowpark Connect.
|
|
6
|
+
|
|
7
|
+
This module provides utilities for creating and managing Scala User-Defined Functions (UDFs)
|
|
8
|
+
in Snowflake through Snowpark Connect. It handles the conversion between different type systems
|
|
9
|
+
(Snowpark, Scala, Snowflake, Spark protobuf) and generates the necessary SQL DDL statements
|
|
10
|
+
for UDF creation.
|
|
11
|
+
|
|
12
|
+
Key components:
|
|
13
|
+
- ScalaUdf: Reference class for Scala UDFs
|
|
14
|
+
- ScalaUDFDef: Definition class for Scala UDF creation
|
|
15
|
+
- Type mapping functions for different type systems
|
|
16
|
+
- UDF creation and management utilities
|
|
17
|
+
"""
|
|
18
|
+
import re
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import List, Union
|
|
22
|
+
|
|
23
|
+
import snowflake.snowpark.types as snowpark_type
|
|
24
|
+
import snowflake.snowpark_connect.includes.python.pyspark.sql.connect.proto.types_pb2 as types_proto
|
|
25
|
+
from snowflake.snowpark_connect.resources_initializer import RESOURCE_PATH
|
|
26
|
+
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
27
|
+
from snowflake.snowpark_connect.utils.udf_utils import (
|
|
28
|
+
ProcessCommonInlineUserDefinedFunction,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Prefix used for internally generated Scala UDF names to avoid conflicts
|
|
32
|
+
CREATE_SCALA_UDF_PREFIX = "__SC_BUILD_IN_CREATE_UDF_SCALA_"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ScalaUdf:
|
|
36
|
+
"""
|
|
37
|
+
Reference class for Scala UDFs, providing similar properties like Python UserDefinedFunction.
|
|
38
|
+
|
|
39
|
+
This class serves as a lightweight reference to a Scala UDF that has been created
|
|
40
|
+
in Snowflake, storing the essential metadata needed for function calls.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
name: str,
|
|
46
|
+
input_types: List[snowpark_type.DataType],
|
|
47
|
+
return_type: snowpark_type.DataType,
|
|
48
|
+
) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Initialize a Scala UDF reference.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
name: The name of the UDF in Snowflake
|
|
54
|
+
input_types: List of input parameter types
|
|
55
|
+
return_type: The return type of the UDF
|
|
56
|
+
"""
|
|
57
|
+
self.name = name
|
|
58
|
+
self._input_types = input_types
|
|
59
|
+
self._return_type = return_type
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(frozen=True)
|
|
63
|
+
class Param:
|
|
64
|
+
"""
|
|
65
|
+
Represents a function parameter with name and data type.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
name: Parameter name
|
|
69
|
+
data_type: Parameter data type as a string
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
name: str
|
|
73
|
+
data_type: str
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass(frozen=True)
|
|
77
|
+
class NullHandling(str, Enum):
|
|
78
|
+
"""
|
|
79
|
+
Enumeration for UDF null handling behavior.
|
|
80
|
+
|
|
81
|
+
Determines how the UDF behaves when input parameters contain null values.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
RETURNS_NULL_ON_NULL_INPUT = "RETURNS NULL ON NULL INPUT"
|
|
85
|
+
CALLED_ON_NULL_INPUT = "CALLED ON NULL INPUT"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass(frozen=True)
|
|
89
|
+
class ReturnType:
|
|
90
|
+
"""
|
|
91
|
+
Represents the return type of a function.
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
data_type: Return data type as a string
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
data_type: str
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass(frozen=True)
|
|
101
|
+
class Signature:
|
|
102
|
+
"""
|
|
103
|
+
Represents a function signature with parameters and return type.
|
|
104
|
+
|
|
105
|
+
Attributes:
|
|
106
|
+
params: List of function parameters
|
|
107
|
+
returns: Function return type
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
params: List[Param]
|
|
111
|
+
returns: ReturnType
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass(frozen=True)
|
|
115
|
+
class ScalaUDFDef:
|
|
116
|
+
"""
|
|
117
|
+
Complete definition for creating a Scala UDF in Snowflake.
|
|
118
|
+
|
|
119
|
+
Contains all the information needed to generate the CREATE FUNCTION SQL statement
|
|
120
|
+
and the Scala code body for the UDF.
|
|
121
|
+
|
|
122
|
+
Attributes:
|
|
123
|
+
name: UDF name
|
|
124
|
+
signature: SQL signature (for Snowflake function definition)
|
|
125
|
+
scala_signature: Scala signature (for Scala code generation)
|
|
126
|
+
imports: List of JAR files to import
|
|
127
|
+
null_handling: Null handling behavior (defaults to RETURNS_NULL_ON_NULL_INPUT)
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
name: str
|
|
131
|
+
signature: Signature
|
|
132
|
+
scala_signature: Signature
|
|
133
|
+
scala_invocation_args: List[str]
|
|
134
|
+
imports: List[str]
|
|
135
|
+
null_handling: NullHandling = NullHandling.RETURNS_NULL_ON_NULL_INPUT
|
|
136
|
+
|
|
137
|
+
# -------------------- DDL Emitter --------------------
|
|
138
|
+
|
|
139
|
+
def _gen_body_scala(self) -> str:
|
|
140
|
+
"""
|
|
141
|
+
Generate the Scala code body for the UDF.
|
|
142
|
+
|
|
143
|
+
Creates a Scala object that loads the serialized function from a binary file
|
|
144
|
+
and provides a run method to execute it.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
String containing the complete Scala code for the UDF body
|
|
148
|
+
"""
|
|
149
|
+
# Convert Array to Seq for Scala compatibility in function signatures.
|
|
150
|
+
udf_func_input_types = (
|
|
151
|
+
", ".join(p.data_type for p in self.scala_signature.params)
|
|
152
|
+
).replace("Array", "Seq")
|
|
153
|
+
# Create the Scala arguments and input types string: "arg0: Type0, arg1: Type1, ...".
|
|
154
|
+
joined_wrapper_arg_and_input_types_str = ", ".join(
|
|
155
|
+
f"{p.name}: {p.data_type}" for p in self.scala_signature.params
|
|
156
|
+
)
|
|
157
|
+
# This is used in defining the input types for the wrapper function. For Maps to work correctly with Scala UDFs,
|
|
158
|
+
# we need to set the Map types to Map[String, String]. These get cast to the respective original types
|
|
159
|
+
# when the original UDF function is invoked.
|
|
160
|
+
wrapper_arg_and_input_types_str = re.sub(
|
|
161
|
+
pattern=r"Map\[\w+,\s\w+\]",
|
|
162
|
+
repl="Map[String, String]",
|
|
163
|
+
string=joined_wrapper_arg_and_input_types_str,
|
|
164
|
+
)
|
|
165
|
+
invocation_args = ", ".join(self.scala_invocation_args)
|
|
166
|
+
|
|
167
|
+
# Cannot directly return a map from a Scala UDF due to issues with non-String values. Snowflake SQL Scala only
|
|
168
|
+
# supports Map[String, String] as input types. Therefore, we convert the map to a JSON string before returning.
|
|
169
|
+
# This is processed as a Variant by SQL.
|
|
170
|
+
udf_func_return_type = self.scala_signature.returns.data_type
|
|
171
|
+
is_map_return = udf_func_return_type.startswith("Map")
|
|
172
|
+
wrapper_return_type = "String" if is_map_return else udf_func_return_type
|
|
173
|
+
|
|
174
|
+
# Need to call the map to JSON string converter when a map is returned by the user's function.
|
|
175
|
+
invoke_udf_func = (
|
|
176
|
+
f"write(func({invocation_args}))"
|
|
177
|
+
if is_map_return
|
|
178
|
+
else f"func({invocation_args})"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# The lines of code below are required only when a Map is returned by the UDF. This is needed to serialize the
|
|
182
|
+
# map output to a JSON string.
|
|
183
|
+
map_return_imports = (
|
|
184
|
+
""
|
|
185
|
+
if not is_map_return
|
|
186
|
+
else """
|
|
187
|
+
import org.json4s._
|
|
188
|
+
import org.json4s.native.Serialization._
|
|
189
|
+
import org.json4s.native.Serialization
|
|
190
|
+
"""
|
|
191
|
+
)
|
|
192
|
+
map_return_formatter = (
|
|
193
|
+
""
|
|
194
|
+
if not is_map_return
|
|
195
|
+
else """
|
|
196
|
+
implicit val formats = Serialization.formats(NoTypeHints)
|
|
197
|
+
"""
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return f"""import org.apache.spark.sql.connect.common.UdfPacket
|
|
201
|
+
{map_return_imports}
|
|
202
|
+
import java.io.{{ByteArrayInputStream, ObjectInputStream}}
|
|
203
|
+
import java.nio.file.{{Files, Paths}}
|
|
204
|
+
|
|
205
|
+
object __RecreatedSparkUdf {{
|
|
206
|
+
{map_return_formatter}
|
|
207
|
+
private lazy val func: ({udf_func_input_types}) => {udf_func_return_type} = {{
|
|
208
|
+
val importDirectory = System.getProperty("com.snowflake.import_directory")
|
|
209
|
+
val fPath = importDirectory + "{self.name}.bin"
|
|
210
|
+
val bytes = Files.readAllBytes(Paths.get(fPath))
|
|
211
|
+
val ois = new ObjectInputStream(new ByteArrayInputStream(bytes))
|
|
212
|
+
try {{
|
|
213
|
+
ois.readObject().asInstanceOf[UdfPacket].function.asInstanceOf[({udf_func_input_types}) => {udf_func_return_type}]
|
|
214
|
+
}} finally {{
|
|
215
|
+
ois.close()
|
|
216
|
+
}}
|
|
217
|
+
}}
|
|
218
|
+
|
|
219
|
+
def __wrapperFunc({wrapper_arg_and_input_types_str}): {wrapper_return_type} = {{
|
|
220
|
+
{invoke_udf_func}
|
|
221
|
+
}}
|
|
222
|
+
}}
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
def to_create_function_sql(self) -> str:
|
|
226
|
+
"""
|
|
227
|
+
Generate the complete CREATE FUNCTION SQL statement for the Scala UDF.
|
|
228
|
+
|
|
229
|
+
Creates a Snowflake CREATE OR REPLACE TEMPORARY FUNCTION statement with
|
|
230
|
+
all necessary clauses including language, runtime version, packages,
|
|
231
|
+
imports, and the Scala code body.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
Complete SQL DDL statement for creating the UDF
|
|
235
|
+
"""
|
|
236
|
+
# self.validate()
|
|
237
|
+
|
|
238
|
+
args = ", ".join(f"{p.name} {p.data_type}" for p in self.signature.params)
|
|
239
|
+
ret_type = self.signature.returns.data_type
|
|
240
|
+
|
|
241
|
+
def quote_single(s: str) -> str:
|
|
242
|
+
"""Helper function to wrap strings in single quotes for SQL."""
|
|
243
|
+
return "'" + s + "'"
|
|
244
|
+
|
|
245
|
+
# Handler and imports
|
|
246
|
+
imports_sql = f"IMPORTS = ({', '.join(quote_single(x) for x in self.imports)})"
|
|
247
|
+
|
|
248
|
+
return f"""
|
|
249
|
+
CREATE OR REPLACE TEMPORARY FUNCTION {self.name}({args})
|
|
250
|
+
RETURNS {ret_type}
|
|
251
|
+
LANGUAGE SCALA
|
|
252
|
+
{self.null_handling.value}
|
|
253
|
+
RUNTIME_VERSION = 2.12
|
|
254
|
+
PACKAGES = ('com.snowflake:snowpark:latest')
|
|
255
|
+
{imports_sql}
|
|
256
|
+
HANDLER = '__RecreatedSparkUdf.__wrapperFunc'
|
|
257
|
+
AS
|
|
258
|
+
$$
|
|
259
|
+
{self._gen_body_scala()}
|
|
260
|
+
$$;"""
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def build_scala_udf_imports(session, payload, udf_name, is_map_return) -> List[str]:
|
|
264
|
+
"""
|
|
265
|
+
Build the list of imports needed for the Scala UDF.
|
|
266
|
+
|
|
267
|
+
This function:
|
|
268
|
+
1. Saves the UDF payload to a binary file in the session stage
|
|
269
|
+
2. Collects user-uploaded JAR files from the stage
|
|
270
|
+
3. Returns a list of all required JAR files for the UDF
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
session: Snowpark session
|
|
274
|
+
payload: Binary payload containing the serialized UDF
|
|
275
|
+
udf_name: Name of the UDF (used for the binary file name)
|
|
276
|
+
is_map_return: Indicates if the UDF returns a Map (affects imports)
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
List of JAR file paths to be imported by the UDF
|
|
280
|
+
"""
|
|
281
|
+
# Save pciudf._payload to a bin file:
|
|
282
|
+
import io
|
|
283
|
+
|
|
284
|
+
payload_as_stream = io.BytesIO(payload)
|
|
285
|
+
stage = session.get_session_stage()
|
|
286
|
+
stage_resource_path = stage + RESOURCE_PATH
|
|
287
|
+
closure_binary_file = stage_resource_path + "/" + udf_name + ".bin"
|
|
288
|
+
session.file.put_stream(
|
|
289
|
+
payload_as_stream,
|
|
290
|
+
closure_binary_file,
|
|
291
|
+
overwrite=True,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Get a list of the jar files uploaded to the stage. We need to import the user's jar for the Scala UDF.
|
|
295
|
+
res = session.sql(rf"LIST {stage}/ PATTERN='.*\.jar';").collect()
|
|
296
|
+
user_jars = []
|
|
297
|
+
for row in res:
|
|
298
|
+
if RESOURCE_PATH not in row[0]:
|
|
299
|
+
# Remove the stage path since it is not properly formatted.
|
|
300
|
+
user_jars.append(row[0][row[0].find("/") :])
|
|
301
|
+
|
|
302
|
+
# Jars used when the return type is a Map.
|
|
303
|
+
map_jars = (
|
|
304
|
+
[]
|
|
305
|
+
if not is_map_return
|
|
306
|
+
else [
|
|
307
|
+
f"{stage_resource_path}/json4s-core_2.12-3.7.0-M11.jar",
|
|
308
|
+
f"{stage_resource_path}/json4s-native_2.12-3.7.0-M11.jar",
|
|
309
|
+
f"{stage_resource_path}/paranamer-2.8.3.jar",
|
|
310
|
+
]
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Format the user jars to be used in the IMPORTS clause of the stored procedure.
|
|
314
|
+
return (
|
|
315
|
+
[
|
|
316
|
+
closure_binary_file,
|
|
317
|
+
f"{stage_resource_path}/spark-connect-client-jvm_2.12-3.5.6.jar",
|
|
318
|
+
f"{stage_resource_path}/spark-common-utils_2.12-3.5.6.jar",
|
|
319
|
+
f"{stage_resource_path}/spark-sql_2.12-3.5.6.jar",
|
|
320
|
+
f"{stage_resource_path}/json4s-ast_2.12-3.7.0-M11.jar",
|
|
321
|
+
]
|
|
322
|
+
+ map_jars
|
|
323
|
+
+ [f"{stage + jar}" for jar in user_jars]
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def create_scala_udf(pciudf: ProcessCommonInlineUserDefinedFunction) -> ScalaUdf:
|
|
328
|
+
"""
|
|
329
|
+
Create a Scala UDF in Snowflake from a ProcessCommonInlineUserDefinedFunction object.
|
|
330
|
+
|
|
331
|
+
This function handles the complete process of creating a Scala UDF:
|
|
332
|
+
1. Generates a unique function name if not provided
|
|
333
|
+
2. Checks for existing UDFs in the session cache
|
|
334
|
+
3. Creates the necessary imports list
|
|
335
|
+
4. Maps types between different systems (Snowpark, Scala, Snowflake)
|
|
336
|
+
5. Generates and executes the CREATE FUNCTION SQL statement
|
|
337
|
+
|
|
338
|
+
If the UDF already exists in the session cache, it will be reused.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
pciudf: The ProcessCommonInlineUserDefinedFunction object containing UDF details.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
A ScalaUdf object representing the created or cached Scala UDF.
|
|
345
|
+
"""
|
|
346
|
+
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
347
|
+
|
|
348
|
+
function_name = pciudf._function_name
|
|
349
|
+
# If a function name is not provided, hash the binary file and use the first ten characters as the function name.
|
|
350
|
+
if not function_name:
|
|
351
|
+
import hashlib
|
|
352
|
+
|
|
353
|
+
function_name = hashlib.sha256(pciudf._payload).hexdigest()[:10]
|
|
354
|
+
udf_name = CREATE_SCALA_UDF_PREFIX + function_name
|
|
355
|
+
|
|
356
|
+
session = get_or_create_snowpark_session()
|
|
357
|
+
if udf_name in session._udfs:
|
|
358
|
+
cached_udf = session._udfs[udf_name]
|
|
359
|
+
return ScalaUdf(cached_udf.name, cached_udf.input_types, cached_udf.return_type)
|
|
360
|
+
|
|
361
|
+
# In case the Scala UDF was created with `spark.udf.register`, the Spark Scala input types (from protobuf) are
|
|
362
|
+
# stored in pciudf.scala_input_types.
|
|
363
|
+
# We cannot rely solely on the inputTypes field from the Scala UDF or the Snowpark input types, since:
|
|
364
|
+
# - spark.udf.register arguments come from the inputTypes field
|
|
365
|
+
# - UDFs created with a data type (like below) do not populate the inputTypes field. This requires the input types
|
|
366
|
+
# inferred by Snowpark. e.g.: udf((i: Long) => (i + 1).toInt, IntegerType)
|
|
367
|
+
input_types = (
|
|
368
|
+
pciudf._scala_input_types if pciudf._scala_input_types else pciudf._input_types
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
scala_input_params: List[Param] = []
|
|
372
|
+
sql_input_params: List[Param] = []
|
|
373
|
+
scala_invocation_args: List[str] = [] # arguments passed into the udf function
|
|
374
|
+
if input_types: # input_types can be None when no arguments are provided
|
|
375
|
+
for i, input_type in enumerate(input_types):
|
|
376
|
+
param_name = "arg" + str(i)
|
|
377
|
+
# Create the Scala arguments and input types string: "arg0: Type0, arg1: Type1, ...".
|
|
378
|
+
scala_input_params.append(
|
|
379
|
+
Param(param_name, map_type_to_scala_type(input_type))
|
|
380
|
+
)
|
|
381
|
+
# Create the Snowflake SQL arguments and input types string: "arg0 TYPE0, arg1 TYPE1, ...".
|
|
382
|
+
sql_input_params.append(
|
|
383
|
+
Param(param_name, map_type_to_snowflake_type(input_type))
|
|
384
|
+
)
|
|
385
|
+
# In the case of Map input types, we need to cast the argument to the correct type in Scala.
|
|
386
|
+
# Snowflake SQL Scala can only handle MAP[VARCHAR, VARCHAR] as input types.
|
|
387
|
+
scala_invocation_args.append(
|
|
388
|
+
cast_scala_map_args_from_given_type(param_name, input_type)
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
scala_return_type = map_type_to_scala_type(pciudf._original_return_type)
|
|
392
|
+
# If the SQL return type is a MAP, change this to VARIANT because of issues with Scala UDFs.
|
|
393
|
+
sql_return_type = map_type_to_snowflake_type(pciudf._original_return_type)
|
|
394
|
+
imports = build_scala_udf_imports(
|
|
395
|
+
session,
|
|
396
|
+
pciudf._payload,
|
|
397
|
+
udf_name,
|
|
398
|
+
is_map_return=sql_return_type.startswith("MAP"),
|
|
399
|
+
)
|
|
400
|
+
sql_return_type = (
|
|
401
|
+
"VARIANT" if sql_return_type.startswith("MAP") else sql_return_type
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
udf_def = ScalaUDFDef(
|
|
405
|
+
name=udf_name,
|
|
406
|
+
signature=Signature(
|
|
407
|
+
params=sql_input_params, returns=ReturnType(sql_return_type)
|
|
408
|
+
),
|
|
409
|
+
imports=imports,
|
|
410
|
+
scala_signature=Signature(
|
|
411
|
+
params=scala_input_params, returns=ReturnType(scala_return_type)
|
|
412
|
+
),
|
|
413
|
+
scala_invocation_args=scala_invocation_args,
|
|
414
|
+
)
|
|
415
|
+
create_udf_sql = udf_def.to_create_function_sql()
|
|
416
|
+
logger.info(f"Creating Scala UDF: {create_udf_sql}")
|
|
417
|
+
session.sql(create_udf_sql).collect()
|
|
418
|
+
return ScalaUdf(udf_name, pciudf._input_types, pciudf._return_type)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def map_type_to_scala_type(
|
|
422
|
+
t: Union[snowpark_type.DataType, types_proto.DataType]
|
|
423
|
+
) -> str:
|
|
424
|
+
"""Maps a Snowpark or Spark protobuf type to a Scala type string."""
|
|
425
|
+
if not t:
|
|
426
|
+
return "String"
|
|
427
|
+
is_snowpark_type = isinstance(t, snowpark_type.DataType)
|
|
428
|
+
condition = type(t) if is_snowpark_type else t.WhichOneof("kind")
|
|
429
|
+
match condition:
|
|
430
|
+
case snowpark_type.ArrayType | "array":
|
|
431
|
+
return (
|
|
432
|
+
f"Array[{map_type_to_scala_type(t.element_type)}]"
|
|
433
|
+
if is_snowpark_type
|
|
434
|
+
else f"Array[{map_type_to_scala_type(t.array.element_type)}]"
|
|
435
|
+
)
|
|
436
|
+
case snowpark_type.BinaryType | "binary":
|
|
437
|
+
return "Array[Byte]"
|
|
438
|
+
case snowpark_type.BooleanType | "boolean":
|
|
439
|
+
return "Boolean"
|
|
440
|
+
case snowpark_type.ByteType | "byte":
|
|
441
|
+
return "Byte"
|
|
442
|
+
case snowpark_type.DateType | "date":
|
|
443
|
+
return "java.sql.Date"
|
|
444
|
+
case snowpark_type.DecimalType | "decimal":
|
|
445
|
+
return "java.math.BigDecimal"
|
|
446
|
+
case snowpark_type.DoubleType | "double":
|
|
447
|
+
return "Double"
|
|
448
|
+
case snowpark_type.FloatType | "float":
|
|
449
|
+
return "Float"
|
|
450
|
+
case snowpark_type.GeographyType:
|
|
451
|
+
return "Geography"
|
|
452
|
+
case snowpark_type.IntegerType | "integer":
|
|
453
|
+
return "Int"
|
|
454
|
+
case snowpark_type.LongType | "long":
|
|
455
|
+
return "Long"
|
|
456
|
+
case snowpark_type.MapType | "map": # can also map to OBJECT in Snowflake
|
|
457
|
+
key_type = (
|
|
458
|
+
map_type_to_scala_type(t.key_type)
|
|
459
|
+
if is_snowpark_type
|
|
460
|
+
else map_type_to_scala_type(t.map.key_type)
|
|
461
|
+
)
|
|
462
|
+
value_type = (
|
|
463
|
+
map_type_to_scala_type(t.value_type)
|
|
464
|
+
if is_snowpark_type
|
|
465
|
+
else map_type_to_scala_type(t.map.value_type)
|
|
466
|
+
)
|
|
467
|
+
return f"Map[{key_type}, {value_type}]"
|
|
468
|
+
case snowpark_type.NullType | "null":
|
|
469
|
+
return "String" # cannot set the return type to Null in Snowpark Scala UDFs
|
|
470
|
+
case snowpark_type.ShortType | "short":
|
|
471
|
+
return "Short"
|
|
472
|
+
case snowpark_type.StringType | "string" | "char" | "varchar":
|
|
473
|
+
return "String"
|
|
474
|
+
case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
|
|
475
|
+
return "java.sql.Timestamp"
|
|
476
|
+
case snowpark_type.VariantType:
|
|
477
|
+
return "Variant"
|
|
478
|
+
case _:
|
|
479
|
+
raise ValueError(f"Unsupported Snowpark type: {t}")
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def map_type_to_snowflake_type(
|
|
483
|
+
t: Union[snowpark_type.DataType, types_proto.DataType]
|
|
484
|
+
) -> str:
|
|
485
|
+
"""Maps a Snowpark or Spark protobuf type to a Snowflake type string."""
|
|
486
|
+
if not t:
|
|
487
|
+
return "VARCHAR"
|
|
488
|
+
is_snowpark_type = isinstance(t, snowpark_type.DataType)
|
|
489
|
+
condition = type(t) if is_snowpark_type else t.WhichOneof("kind")
|
|
490
|
+
match condition:
|
|
491
|
+
case snowpark_type.ArrayType | "array":
|
|
492
|
+
return (
|
|
493
|
+
f"ARRAY({map_type_to_snowflake_type(t.element_type)})"
|
|
494
|
+
if is_snowpark_type
|
|
495
|
+
else f"ARRAY({map_type_to_snowflake_type(t.array.element_type)})"
|
|
496
|
+
)
|
|
497
|
+
case snowpark_type.BinaryType | "binary":
|
|
498
|
+
return "BINARY"
|
|
499
|
+
case snowpark_type.BooleanType | "boolean":
|
|
500
|
+
return "BOOLEAN"
|
|
501
|
+
case snowpark_type.ByteType | "byte":
|
|
502
|
+
return "TINYINT"
|
|
503
|
+
case snowpark_type.DateType | "date":
|
|
504
|
+
return "DATE"
|
|
505
|
+
case snowpark_type.DecimalType | "decimal":
|
|
506
|
+
return "NUMBER"
|
|
507
|
+
case snowpark_type.DoubleType | "double":
|
|
508
|
+
return "DOUBLE"
|
|
509
|
+
case snowpark_type.FloatType | "float":
|
|
510
|
+
return "FLOAT"
|
|
511
|
+
case snowpark_type.GeographyType:
|
|
512
|
+
return "GEOGRAPHY"
|
|
513
|
+
case snowpark_type.IntegerType | "integer":
|
|
514
|
+
return "INT"
|
|
515
|
+
case snowpark_type.LongType | "long":
|
|
516
|
+
return "BIGINT"
|
|
517
|
+
case snowpark_type.MapType | "map":
|
|
518
|
+
# Maps to OBJECT in Snowflake if key and value types are not specified.
|
|
519
|
+
key_type = (
|
|
520
|
+
map_type_to_snowflake_type(t.key_type)
|
|
521
|
+
if is_snowpark_type
|
|
522
|
+
else map_type_to_snowflake_type(t.map.key_type)
|
|
523
|
+
)
|
|
524
|
+
value_type = (
|
|
525
|
+
map_type_to_snowflake_type(t.value_type)
|
|
526
|
+
if is_snowpark_type
|
|
527
|
+
else map_type_to_snowflake_type(t.map.value_type)
|
|
528
|
+
)
|
|
529
|
+
return (
|
|
530
|
+
f"MAP({key_type}, {value_type})"
|
|
531
|
+
if key_type and value_type
|
|
532
|
+
else "OBJECT"
|
|
533
|
+
)
|
|
534
|
+
case snowpark_type.NullType | "null":
|
|
535
|
+
return "VARCHAR"
|
|
536
|
+
case snowpark_type.ShortType | "short":
|
|
537
|
+
return "SMALLINT"
|
|
538
|
+
case snowpark_type.StringType | "string" | "char" | "varchar":
|
|
539
|
+
return "VARCHAR"
|
|
540
|
+
case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
|
|
541
|
+
return "TIMESTAMP"
|
|
542
|
+
case snowpark_type.VariantType:
|
|
543
|
+
return "VARIANT"
|
|
544
|
+
case _:
|
|
545
|
+
raise ValueError(f"Unsupported Snowpark type: {t}")
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def cast_scala_map_args_from_given_type(
|
|
549
|
+
arg_name: str, input_type: Union[snowpark_type.DataType, types_proto.DataType]
|
|
550
|
+
) -> str:
|
|
551
|
+
"""If the input_type is a Map, cast the argument arg_name to a Map[key_type, value_type] in Scala."""
|
|
552
|
+
is_snowpark_type = isinstance(input_type, snowpark_type.DataType)
|
|
553
|
+
|
|
554
|
+
def convert_from_string_to_type(
|
|
555
|
+
arg_name: str, t: Union[snowpark_type.DataType, types_proto.DataType]
|
|
556
|
+
) -> str:
|
|
557
|
+
"""Convert the string argument arg_name to the specified type t in Scala."""
|
|
558
|
+
condition = type(t) if is_snowpark_type else t.WhichOneof("kind")
|
|
559
|
+
match condition:
|
|
560
|
+
case snowpark_type.BinaryType | "binary":
|
|
561
|
+
return arg_name + ".getBytes()"
|
|
562
|
+
case snowpark_type.BooleanType | "boolean":
|
|
563
|
+
return arg_name + ".toBoolean"
|
|
564
|
+
case snowpark_type.ByteType | "byte":
|
|
565
|
+
return arg_name + ".getBytes().head" # TODO: verify if this is correct
|
|
566
|
+
case snowpark_type.DateType | "date":
|
|
567
|
+
return f"java.sql.Date.valueOf({arg_name})"
|
|
568
|
+
case snowpark_type.DecimalType | "decimal":
|
|
569
|
+
return f"new BigDecimal({arg_name})"
|
|
570
|
+
case snowpark_type.DoubleType | "double":
|
|
571
|
+
return arg_name + ".toDouble"
|
|
572
|
+
case snowpark_type.FloatType | "float":
|
|
573
|
+
return arg_name + ".toFloat"
|
|
574
|
+
case snowpark_type.IntegerType | "integer":
|
|
575
|
+
return arg_name + ".toInt"
|
|
576
|
+
case snowpark_type.LongType | "long":
|
|
577
|
+
return arg_name + ".toLong"
|
|
578
|
+
case snowpark_type.ShortType | "short":
|
|
579
|
+
return arg_name + ".toShort"
|
|
580
|
+
case snowpark_type.StringType | "string" | "char" | "varchar":
|
|
581
|
+
return arg_name
|
|
582
|
+
case snowpark_type.TimestampType | "timestamp" | "timestamp_ntz":
|
|
583
|
+
return "java.sql.Timestamp.valueOf({arg_name})"
|
|
584
|
+
case _:
|
|
585
|
+
raise ValueError(f"Unsupported Snowpark type: {t}")
|
|
586
|
+
|
|
587
|
+
if (is_snowpark_type and isinstance(input_type, snowpark_type.MapType)) or (
|
|
588
|
+
not is_snowpark_type and input_type.WhichOneof("kind") == "map"
|
|
589
|
+
):
|
|
590
|
+
key_type = input_type.key_type if is_snowpark_type else input_type.map.key_type
|
|
591
|
+
value_type = (
|
|
592
|
+
input_type.value_type if is_snowpark_type else input_type.map.value_type
|
|
593
|
+
)
|
|
594
|
+
return f"{arg_name}.map {{ case (k, v) => ({convert_from_string_to_type('k', key_type)}, {convert_from_string_to_type('v', value_type)})}}"
|
|
595
|
+
else:
|
|
596
|
+
return arg_name
|
|
@@ -14,6 +14,9 @@ from snowflake.snowpark_connect.constants import DEFAULT_CONNECTION_NAME
|
|
|
14
14
|
from snowflake.snowpark_connect.utils.describe_query_cache import (
|
|
15
15
|
instrument_session_for_describe_cache,
|
|
16
16
|
)
|
|
17
|
+
from snowflake.snowpark_connect.utils.external_udxf_cache import (
|
|
18
|
+
init_external_udxf_cache,
|
|
19
|
+
)
|
|
17
20
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
18
21
|
from snowflake.snowpark_connect.utils.telemetry import telemetry
|
|
19
22
|
from snowflake.snowpark_connect.utils.udf_cache import init_builtin_udf_cache
|
|
@@ -63,6 +66,7 @@ def configure_snowpark_session(session: snowpark.Session):
|
|
|
63
66
|
|
|
64
67
|
# built-in udf cache
|
|
65
68
|
init_builtin_udf_cache(session)
|
|
69
|
+
init_external_udxf_cache(session)
|
|
66
70
|
|
|
67
71
|
# Set experimental parameters (warnings globally suppressed)
|
|
68
72
|
session.ast_enabled = False
|
|
@@ -94,12 +94,6 @@ RECORDED_CONFIG_KEYS = {
|
|
|
94
94
|
"snowpark.connect.views.duplicate_column_names_handling_mode",
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
-
# io types for which we don't track options
|
|
98
|
-
REDACTED_IO_TYPES = {
|
|
99
|
-
"jdbc",
|
|
100
|
-
"net.snowflake.spark.snowflake",
|
|
101
|
-
}
|
|
102
|
-
|
|
103
97
|
# these fields will be redacted when reporting the spark query plan
|
|
104
98
|
REDACTED_PLAN_SUFFIXES = [
|
|
105
99
|
# config values can be set using SQL, so we have to redact it
|
|
@@ -444,7 +438,7 @@ class Telemetry:
|
|
|
444
438
|
|
|
445
439
|
summary["udf_usage"][udf_name] += 1
|
|
446
440
|
|
|
447
|
-
def _report_io(self, op: str, type: str
|
|
441
|
+
def _report_io(self, op: str, type: str):
|
|
448
442
|
if self._not_in_request():
|
|
449
443
|
return
|
|
450
444
|
|
|
@@ -453,20 +447,15 @@ class Telemetry:
|
|
|
453
447
|
if "io" not in summary:
|
|
454
448
|
summary["io"] = []
|
|
455
449
|
|
|
456
|
-
|
|
457
|
-
io = {"op": op, "type": type}
|
|
458
|
-
else:
|
|
459
|
-
io = {"op": op, "type": type, "options": options}
|
|
460
|
-
|
|
461
|
-
summary["io"].append(io)
|
|
450
|
+
summary["io"].append({"op": op, "type": type})
|
|
462
451
|
|
|
463
452
|
@safe
|
|
464
|
-
def report_io_read(self, type: str
|
|
465
|
-
self._report_io("read", type
|
|
453
|
+
def report_io_read(self, type: str):
|
|
454
|
+
self._report_io("read", type)
|
|
466
455
|
|
|
467
456
|
@safe
|
|
468
|
-
def report_io_write(self, type: str
|
|
469
|
-
self._report_io("write", type
|
|
457
|
+
def report_io_write(self, type: str):
|
|
458
|
+
self._report_io("write", type)
|
|
470
459
|
|
|
471
460
|
@safe
|
|
472
461
|
def send_server_started_telemetry(self):
|
|
@@ -240,6 +240,8 @@ def _check_supported_udf(
|
|
|
240
240
|
raise ValueError(
|
|
241
241
|
"Function type java_udf not supported for common inline user-defined function"
|
|
242
242
|
)
|
|
243
|
+
case "scalar_scala_udf":
|
|
244
|
+
pass
|
|
243
245
|
case _ as function_type:
|
|
244
246
|
raise ValueError(
|
|
245
247
|
f"Function type {function_type} not supported for common inline user-defined function"
|