snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +116 -4
- snowflake/snowpark_connect/config.py +13 -0
- snowflake/snowpark_connect/constants.py +0 -29
- snowflake/snowpark_connect/dataframe_container.py +6 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
- snowflake/snowpark_connect/expression/function_defaults.py +207 -0
- snowflake/snowpark_connect/expression/literal.py +18 -2
- snowflake/snowpark_connect/expression/map_cast.py +5 -8
- snowflake/snowpark_connect/expression/map_expression.py +10 -1
- snowflake/snowpark_connect/expression/map_extension.py +12 -2
- snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
- snowflake/snowpark_connect/expression/map_udf.py +26 -8
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
- snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
- snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
- snowflake/snowpark_connect/hidden_column.py +39 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
- snowflake/snowpark_connect/relation/map_extension.py +56 -15
- snowflake/snowpark_connect/relation/map_join.py +258 -62
- snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
- snowflake/snowpark_connect/relation/map_sql.py +88 -11
- snowflake/snowpark_connect/relation/map_udtf.py +4 -2
- snowflake/snowpark_connect/relation/read/map_read.py +3 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
- snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
- snowflake/snowpark_connect/relation/read/utils.py +6 -7
- snowflake/snowpark_connect/relation/utils.py +1 -170
- snowflake/snowpark_connect/relation/write/map_write.py +62 -53
- snowflake/snowpark_connect/resources_initializer.py +29 -1
- snowflake/snowpark_connect/server.py +18 -3
- snowflake/snowpark_connect/type_mapping.py +29 -25
- snowflake/snowpark_connect/typed_column.py +14 -0
- snowflake/snowpark_connect/utils/artifacts.py +23 -0
- snowflake/snowpark_connect/utils/context.py +6 -1
- snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
- snowflake/snowpark_connect/utils/telemetry.py +6 -17
- snowflake/snowpark_connect/utils/udf_helper.py +2 -0
- snowflake/snowpark_connect/utils/udf_utils.py +38 -7
- snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
- snowpark_connect-0.25.0.dist-info/RECORD +477 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
- snowpark_connect-0.23.0.dist-info/RECORD +0 -893
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
|
@@ -73,13 +73,16 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
73
73
|
)
|
|
74
74
|
from snowflake.snowpark_connect.config import (
|
|
75
75
|
get_boolean_session_config_param,
|
|
76
|
+
get_timestamp_type,
|
|
76
77
|
global_config,
|
|
77
78
|
)
|
|
78
79
|
from snowflake.snowpark_connect.constants import (
|
|
79
80
|
DUPLICATE_KEY_FOUND_ERROR_TEMPLATE,
|
|
80
|
-
SPARK_TZ_ABBREVIATIONS_OVERRIDES,
|
|
81
81
|
STRUCTURED_TYPES_ENABLED,
|
|
82
82
|
)
|
|
83
|
+
from snowflake.snowpark_connect.expression.function_defaults import (
|
|
84
|
+
inject_function_defaults,
|
|
85
|
+
)
|
|
83
86
|
from snowflake.snowpark_connect.expression.literal import get_literal_field_and_name
|
|
84
87
|
from snowflake.snowpark_connect.expression.map_cast import (
|
|
85
88
|
CAST_FUNCTIONS,
|
|
@@ -97,7 +100,6 @@ from snowflake.snowpark_connect.type_mapping import (
|
|
|
97
100
|
map_snowpark_to_pyspark_types,
|
|
98
101
|
map_spark_timestamp_format_expression,
|
|
99
102
|
map_type_string_to_snowpark_type,
|
|
100
|
-
parse_ddl_string,
|
|
101
103
|
)
|
|
102
104
|
from snowflake.snowpark_connect.typed_column import (
|
|
103
105
|
TypedColumn,
|
|
@@ -206,19 +208,6 @@ def _validate_numeric_args(
|
|
|
206
208
|
return modified_args
|
|
207
209
|
|
|
208
210
|
|
|
209
|
-
def get_timestamp_type():
|
|
210
|
-
match global_config["spark.sql.timestampType"]:
|
|
211
|
-
case "TIMESTAMP_LTZ":
|
|
212
|
-
timestamp_type = TimestampType(TimestampTimeZone.LTZ)
|
|
213
|
-
case "TIMESTAMP_NTZ":
|
|
214
|
-
timestamp_type = TimestampType(TimestampTimeZone.NTZ)
|
|
215
|
-
case "TIMESTAMP_TZ":
|
|
216
|
-
timestamp_type = TimestampType(TimestampTimeZone.TZ)
|
|
217
|
-
case _:
|
|
218
|
-
timestamp_type = TimestampType(TimestampTimeZone.DEFAULT)
|
|
219
|
-
return timestamp_type
|
|
220
|
-
|
|
221
|
-
|
|
222
211
|
def unwrap_literal(exp: expressions_proto.Expression):
|
|
223
212
|
"""Workaround for Snowpark functions generating invalid SQL when used with fn.lit (SNOW-1871954)"""
|
|
224
213
|
return get_literal_field_and_name(exp.literal)[0]
|
|
@@ -299,6 +288,9 @@ def map_unresolved_function(
|
|
|
299
288
|
function_name = exp.unresolved_function.function_name.lower()
|
|
300
289
|
is_udtf_call = function_name in session._udtfs
|
|
301
290
|
|
|
291
|
+
# Inject default parameters for functions that need them (especially for Scala clients)
|
|
292
|
+
inject_function_defaults(exp.unresolved_function)
|
|
293
|
+
|
|
302
294
|
def _resolve_args_expressions(exp: expressions_proto.Expression):
|
|
303
295
|
def _resolve_fn_arg(exp):
|
|
304
296
|
with resolving_fun_args():
|
|
@@ -516,146 +508,401 @@ def map_unresolved_function(
|
|
|
516
508
|
result_exp = snowpark_fn.when(
|
|
517
509
|
snowpark_args[1] == 0, snowpark_fn.lit(None)
|
|
518
510
|
).otherwise(snowpark_args[0] % snowpark_args[1])
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
524
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
525
|
-
or isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
526
|
-
and isinstance(snowpark_typed_args[1].typ, _IntegralType)
|
|
527
|
-
or isinstance(snowpark_typed_args[0].typ, _IntegralType)
|
|
528
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
529
|
-
):
|
|
530
|
-
result_exp, (
|
|
531
|
-
return_type_precision,
|
|
532
|
-
return_type_scale,
|
|
533
|
-
) = _mul_div_precision_helper(snowpark_typed_args, snowpark_args, 0)
|
|
534
|
-
result_type = DecimalType(return_type_precision, return_type_scale)
|
|
535
|
-
else:
|
|
536
|
-
result_exp = snowpark_args[0] * snowpark_args[1]
|
|
537
|
-
result_type = _find_common_type(
|
|
538
|
-
[arg.typ for arg in snowpark_typed_args]
|
|
539
|
-
)
|
|
540
|
-
case "+":
|
|
541
|
-
if isinstance(snowpark_typed_args[0].typ, DateType) and not isinstance(
|
|
542
|
-
snowpark_typed_args[1].typ,
|
|
543
|
-
(_IntegralType, StringType),
|
|
544
|
-
):
|
|
545
|
-
raise AnalysisException(
|
|
546
|
-
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{snowpark_arg_names[0]} + {snowpark_arg_names[1]}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
|
|
547
|
-
)
|
|
548
|
-
elif (
|
|
549
|
-
isinstance(snowpark_typed_args[0].typ, BinaryType)
|
|
550
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
551
|
-
or isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
552
|
-
and isinstance(snowpark_typed_args[1].typ, BinaryType)
|
|
553
|
-
) or (
|
|
554
|
-
isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
555
|
-
and isinstance(snowpark_typed_args[1].typ, TimestampType)
|
|
556
|
-
or isinstance(snowpark_typed_args[0].typ, TimestampType)
|
|
557
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
558
|
-
):
|
|
559
|
-
raise AnalysisException(
|
|
560
|
-
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} + {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
561
|
-
)
|
|
562
|
-
elif isinstance(snowpark_typed_args[1].typ, DateType) and not isinstance(
|
|
563
|
-
snowpark_typed_args[0].typ,
|
|
564
|
-
(_IntegralType, StringType),
|
|
565
|
-
):
|
|
566
|
-
raise AnalysisException(
|
|
567
|
-
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{snowpark_arg_names[0]} + {snowpark_arg_names[1]}" due to data type mismatch: Parameter 1 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".',
|
|
568
|
-
)
|
|
569
|
-
elif (
|
|
570
|
-
isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
571
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
572
|
-
or isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
573
|
-
and isinstance(snowpark_typed_args[1].typ, _IntegralType)
|
|
574
|
-
or isinstance(snowpark_typed_args[0].typ, _IntegralType)
|
|
575
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
576
|
-
):
|
|
577
|
-
result_exp, (
|
|
578
|
-
return_type_precision,
|
|
579
|
-
return_type_scale,
|
|
580
|
-
) = _add_sub_precision_helper(snowpark_typed_args, snowpark_args, 0)
|
|
581
|
-
result_type = DecimalType(return_type_precision, return_type_scale)
|
|
582
|
-
else:
|
|
583
|
-
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
584
|
-
if any(isinstance(arg.typ, DateType) for arg in snowpark_typed_args):
|
|
585
|
-
# TODO SNOW-2034420: resolve return type
|
|
586
|
-
result_exp = _type_with_typer(result_exp)
|
|
587
|
-
else:
|
|
511
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
512
|
+
case (NullType(), NullType()):
|
|
513
|
+
result_type = DoubleType()
|
|
514
|
+
case _:
|
|
588
515
|
result_type = _find_common_type(
|
|
589
516
|
[arg.typ for arg in snowpark_typed_args]
|
|
590
517
|
)
|
|
591
|
-
case "
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
518
|
+
case "*":
|
|
519
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
520
|
+
case (DecimalType() as t, NullType()) | (
|
|
521
|
+
NullType(),
|
|
522
|
+
DecimalType() as t,
|
|
523
|
+
):
|
|
524
|
+
p1, s1 = _get_type_precision(t)
|
|
525
|
+
result_type = _get_decimal_multiplication_result_type(
|
|
526
|
+
p1, s1, p1, s1
|
|
527
|
+
)
|
|
528
|
+
result_exp = snowpark_fn.lit(None)
|
|
529
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
530
|
+
t, (DecimalType, _IntegralType)
|
|
531
|
+
):
|
|
532
|
+
p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
|
|
533
|
+
p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
|
|
534
|
+
result_type = _get_decimal_multiplication_result_type(
|
|
535
|
+
p1, s1, p2, s2
|
|
536
|
+
)
|
|
537
|
+
result_exp = _get_decimal_multiplication_result_exp(
|
|
538
|
+
result_type, t, snowpark_args
|
|
539
|
+
)
|
|
540
|
+
case (NullType(), NullType()):
|
|
541
|
+
result_type = DoubleType()
|
|
542
|
+
result_exp = snowpark_fn.lit(None)
|
|
543
|
+
case (StringType(), StringType()):
|
|
544
|
+
if spark_sql_ansi_enabled:
|
|
545
|
+
raise AnalysisException(
|
|
546
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
|
|
547
|
+
)
|
|
548
|
+
else:
|
|
549
|
+
result_type = DoubleType()
|
|
550
|
+
result_exp = snowpark_args[0].try_cast(
|
|
551
|
+
result_type
|
|
552
|
+
) * snowpark_args[1].try_cast(result_type)
|
|
553
|
+
case (StringType(), _IntegralType()):
|
|
554
|
+
if spark_sql_ansi_enabled:
|
|
555
|
+
result_type = LongType()
|
|
556
|
+
result_exp = (
|
|
557
|
+
snowpark_args[0].cast(result_type) * snowpark_args[1]
|
|
558
|
+
)
|
|
559
|
+
else:
|
|
560
|
+
result_type = DoubleType()
|
|
561
|
+
result_exp = (
|
|
562
|
+
snowpark_args[0].try_cast(result_type) * snowpark_args[1]
|
|
563
|
+
)
|
|
564
|
+
case (StringType(), _FractionalType()):
|
|
565
|
+
result_type = DoubleType()
|
|
566
|
+
if spark_sql_ansi_enabled:
|
|
567
|
+
result_exp = (
|
|
568
|
+
snowpark_args[0].cast(result_type) * snowpark_args[1]
|
|
569
|
+
)
|
|
570
|
+
else:
|
|
571
|
+
result_exp = (
|
|
572
|
+
snowpark_args[0].try_cast(result_type) * snowpark_args[1]
|
|
573
|
+
)
|
|
574
|
+
case (_IntegralType(), StringType()):
|
|
575
|
+
if spark_sql_ansi_enabled:
|
|
576
|
+
result_type = LongType()
|
|
577
|
+
result_exp = snowpark_args[0] * snowpark_args[1].cast(
|
|
578
|
+
result_type
|
|
579
|
+
)
|
|
580
|
+
else:
|
|
581
|
+
result_type = DoubleType()
|
|
582
|
+
result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
|
|
583
|
+
result_type
|
|
584
|
+
)
|
|
585
|
+
case (_FractionalType(), StringType()):
|
|
586
|
+
result_type = DoubleType()
|
|
587
|
+
if spark_sql_ansi_enabled:
|
|
588
|
+
result_exp = snowpark_args[0] * snowpark_args[1].cast(
|
|
589
|
+
result_type
|
|
590
|
+
)
|
|
591
|
+
else:
|
|
592
|
+
result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
|
|
593
|
+
result_type
|
|
594
|
+
)
|
|
595
|
+
case (_NumericType() as t, NullType()) | (
|
|
596
|
+
NullType(),
|
|
597
|
+
_NumericType() as t,
|
|
598
|
+
):
|
|
599
|
+
result_type = t
|
|
600
|
+
result_exp = snowpark_fn.lit(None)
|
|
601
|
+
case (_NumericType(), _NumericType()):
|
|
639
602
|
result_type = _find_common_type(
|
|
640
603
|
[arg.typ for arg in snowpark_typed_args]
|
|
641
604
|
)
|
|
642
|
-
|
|
605
|
+
result_exp = snowpark_args[0].cast(result_type) * snowpark_args[
|
|
606
|
+
1
|
|
607
|
+
].cast(result_type)
|
|
608
|
+
case _:
|
|
609
|
+
raise AnalysisException(
|
|
610
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
611
|
+
)
|
|
612
|
+
case "+":
|
|
613
|
+
spark_function_name = _get_spark_function_name(
|
|
614
|
+
snowpark_typed_args[0],
|
|
615
|
+
snowpark_typed_args[1],
|
|
616
|
+
snowpark_arg_names,
|
|
617
|
+
exp,
|
|
618
|
+
spark_function_name,
|
|
619
|
+
"+",
|
|
620
|
+
)
|
|
621
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
622
|
+
case (NullType(), _) | (_, NullType()):
|
|
623
|
+
result_type = _get_add_sub_result_type(
|
|
624
|
+
snowpark_typed_args[0].typ,
|
|
625
|
+
snowpark_typed_args[1].typ,
|
|
626
|
+
spark_function_name,
|
|
627
|
+
)
|
|
628
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
629
|
+
result_exp = result_exp.cast(result_type)
|
|
630
|
+
case (DateType(), t) | (t, DateType()):
|
|
631
|
+
date_param_index = (
|
|
632
|
+
0 if isinstance(snowpark_typed_args[0].typ, DateType) else 1
|
|
633
|
+
)
|
|
634
|
+
t_param_index = 1 - date_param_index
|
|
635
|
+
if isinstance(t, (IntegerType, ShortType, ByteType)):
|
|
636
|
+
result_type = DateType()
|
|
637
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
638
|
+
elif (
|
|
639
|
+
"INTERVAL"
|
|
640
|
+
== snowpark_typed_args[t_param_index].col._expr1.pretty_name
|
|
641
|
+
):
|
|
642
|
+
result_type = TimestampType()
|
|
643
|
+
result_exp = (
|
|
644
|
+
snowpark_args[date_param_index]
|
|
645
|
+
+ snowpark_args[t_param_index]
|
|
646
|
+
)
|
|
647
|
+
else:
|
|
648
|
+
raise AnalysisException(
|
|
649
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[t_param_index]}" has the type "{t}".',
|
|
650
|
+
)
|
|
651
|
+
case (StringType(), StringType()):
|
|
652
|
+
if spark_sql_ansi_enabled:
|
|
653
|
+
raise AnalysisException(
|
|
654
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
|
|
655
|
+
)
|
|
656
|
+
else:
|
|
657
|
+
result_type = DoubleType()
|
|
658
|
+
result_exp = snowpark_fn.try_cast(
|
|
659
|
+
snowpark_args[0], result_type
|
|
660
|
+
) + snowpark_fn.try_cast(snowpark_args[1], result_type)
|
|
661
|
+
case (StringType(), _NumericType() as t):
|
|
662
|
+
if spark_sql_ansi_enabled:
|
|
663
|
+
result_type = (
|
|
664
|
+
DoubleType()
|
|
665
|
+
if isinstance(t, _FractionalType)
|
|
666
|
+
else LongType()
|
|
667
|
+
)
|
|
668
|
+
result_exp = (
|
|
669
|
+
snowpark_args[0].cast(result_type) + snowpark_args[1]
|
|
670
|
+
)
|
|
671
|
+
else:
|
|
672
|
+
result_type = DoubleType()
|
|
673
|
+
result_exp = (
|
|
674
|
+
snowpark_fn.try_cast(snowpark_args[0], result_type)
|
|
675
|
+
+ snowpark_args[1]
|
|
676
|
+
)
|
|
677
|
+
case (_NumericType() as t, StringType()):
|
|
678
|
+
if spark_sql_ansi_enabled:
|
|
679
|
+
result_type = (
|
|
680
|
+
DoubleType()
|
|
681
|
+
if isinstance(t, _FractionalType)
|
|
682
|
+
else LongType()
|
|
683
|
+
)
|
|
684
|
+
result_exp = snowpark_args[0] + snowpark_args[1].cast(
|
|
685
|
+
result_type
|
|
686
|
+
)
|
|
687
|
+
else:
|
|
688
|
+
result_type = DoubleType()
|
|
689
|
+
result_exp = snowpark_args[0] + snowpark_fn.try_cast(
|
|
690
|
+
snowpark_args[1], result_type
|
|
691
|
+
)
|
|
692
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
693
|
+
t, (BinaryType, TimestampType)
|
|
694
|
+
):
|
|
695
|
+
raise AnalysisException(
|
|
696
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
697
|
+
)
|
|
698
|
+
case _:
|
|
699
|
+
result_type = _get_add_sub_result_type(
|
|
700
|
+
snowpark_typed_args[0].typ,
|
|
701
|
+
snowpark_typed_args[1].typ,
|
|
702
|
+
spark_function_name,
|
|
703
|
+
)
|
|
704
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
705
|
+
if isinstance(result_type, DecimalType):
|
|
706
|
+
result_exp = _cast_helper(result_exp, result_type)
|
|
707
|
+
case "-":
|
|
708
|
+
spark_function_name = _get_spark_function_name(
|
|
709
|
+
snowpark_typed_args[0],
|
|
710
|
+
snowpark_typed_args[1],
|
|
711
|
+
snowpark_arg_names,
|
|
712
|
+
exp,
|
|
713
|
+
spark_function_name,
|
|
714
|
+
"-",
|
|
715
|
+
)
|
|
643
716
|
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
717
|
+
case (NullType(), _) | (_, NullType()):
|
|
718
|
+
result_type = _get_add_sub_result_type(
|
|
719
|
+
snowpark_typed_args[0].typ,
|
|
720
|
+
snowpark_typed_args[1].typ,
|
|
721
|
+
spark_function_name,
|
|
722
|
+
)
|
|
723
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
724
|
+
result_exp = result_exp.cast(result_type)
|
|
725
|
+
case (DateType(), DateType()):
|
|
726
|
+
# TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
|
|
727
|
+
result_type = LongType()
|
|
728
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
729
|
+
case (DateType(), StringType()):
|
|
730
|
+
if "INTERVAL" == snowpark_typed_args[1].col._expr1.pretty_name:
|
|
731
|
+
result_type = TimestampType()
|
|
732
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
733
|
+
else:
|
|
734
|
+
# TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
|
|
735
|
+
result_type = LongType()
|
|
736
|
+
input_type = (
|
|
737
|
+
DateType() if spark_sql_ansi_enabled else DoubleType()
|
|
738
|
+
)
|
|
739
|
+
result_exp = snowpark_args[0] - snowpark_args[1].cast(
|
|
740
|
+
input_type
|
|
741
|
+
)
|
|
742
|
+
case (StringType(), DateType()):
|
|
743
|
+
# TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
|
|
744
|
+
result_type = LongType()
|
|
745
|
+
input_type = DateType()
|
|
746
|
+
result_exp = snowpark_args[0].cast(input_type) - snowpark_args[1]
|
|
747
|
+
case (DateType(), (IntegerType() | ShortType() | ByteType())):
|
|
748
|
+
result_type = DateType()
|
|
749
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
750
|
+
case (DateType(), _):
|
|
751
|
+
raise AnalysisException(
|
|
752
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
|
|
753
|
+
)
|
|
754
|
+
case (_, DateType()):
|
|
755
|
+
raise AnalysisException(
|
|
756
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "DATE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".',
|
|
757
|
+
)
|
|
758
|
+
case (StringType(), StringType()):
|
|
759
|
+
if spark_sql_ansi_enabled:
|
|
760
|
+
raise AnalysisException(
|
|
761
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
|
|
762
|
+
)
|
|
763
|
+
else:
|
|
764
|
+
result_type = DoubleType()
|
|
765
|
+
result_exp = snowpark_fn.try_cast(
|
|
766
|
+
snowpark_args[0], result_type
|
|
767
|
+
) - snowpark_fn.try_cast(snowpark_args[1], result_type)
|
|
768
|
+
case (StringType(), _NumericType() as t):
|
|
769
|
+
if spark_sql_ansi_enabled:
|
|
770
|
+
result_type = (
|
|
771
|
+
DoubleType()
|
|
772
|
+
if isinstance(t, _FractionalType)
|
|
773
|
+
else LongType()
|
|
774
|
+
)
|
|
775
|
+
result_exp = (
|
|
776
|
+
snowpark_args[0].cast(result_type) - snowpark_args[1]
|
|
777
|
+
)
|
|
778
|
+
else:
|
|
779
|
+
result_type = DoubleType()
|
|
780
|
+
result_exp = (
|
|
781
|
+
snowpark_fn.try_cast(snowpark_args[0], result_type)
|
|
782
|
+
- snowpark_args[1]
|
|
783
|
+
)
|
|
784
|
+
case (_NumericType() as t, StringType()):
|
|
785
|
+
if spark_sql_ansi_enabled:
|
|
786
|
+
result_type = (
|
|
787
|
+
DoubleType()
|
|
788
|
+
if isinstance(t, _FractionalType)
|
|
789
|
+
else LongType()
|
|
790
|
+
)
|
|
791
|
+
result_exp = snowpark_args[0] - snowpark_args[1].cast(
|
|
792
|
+
result_type
|
|
793
|
+
)
|
|
794
|
+
else:
|
|
795
|
+
result_type = DoubleType()
|
|
796
|
+
result_exp = snowpark_args[0] - snowpark_fn.try_cast(
|
|
797
|
+
snowpark_args[1], result_type
|
|
798
|
+
)
|
|
644
799
|
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
645
|
-
t,
|
|
646
|
-
) or isinstance(t, _IntegralType) or isinstance(
|
|
647
|
-
snowpark_typed_args[1].typ, NullType
|
|
800
|
+
t, (BinaryType, TimestampType)
|
|
648
801
|
):
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
) = _mul_div_precision_helper(snowpark_typed_args, snowpark_args, 1)
|
|
653
|
-
result_type = DecimalType(return_type_precision, return_type_scale)
|
|
802
|
+
raise AnalysisException(
|
|
803
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
804
|
+
)
|
|
654
805
|
case _:
|
|
806
|
+
result_type = _get_add_sub_result_type(
|
|
807
|
+
snowpark_typed_args[0].typ,
|
|
808
|
+
snowpark_typed_args[1].typ,
|
|
809
|
+
spark_function_name,
|
|
810
|
+
)
|
|
811
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
812
|
+
if isinstance(result_type, DecimalType):
|
|
813
|
+
result_exp = _cast_helper(result_exp, result_type)
|
|
814
|
+
case "/":
|
|
815
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
816
|
+
case (DecimalType() as t1, NullType()):
|
|
817
|
+
p1, s1 = _get_type_precision(t1)
|
|
818
|
+
result_type, _ = _get_decimal_division_result_type(p1, s1, p1, s1)
|
|
819
|
+
result_exp = snowpark_fn.lit(None).cast(result_type)
|
|
820
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
821
|
+
t, (DecimalType, _IntegralType)
|
|
822
|
+
):
|
|
823
|
+
p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
|
|
824
|
+
p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
|
|
825
|
+
result_type, overflow_detected = _get_decimal_division_result_type(
|
|
826
|
+
p1, s1, p2, s2
|
|
827
|
+
)
|
|
828
|
+
result_exp = _get_decimal_division_result_exp(
|
|
829
|
+
result_type,
|
|
830
|
+
t,
|
|
831
|
+
overflow_detected,
|
|
832
|
+
snowpark_args,
|
|
833
|
+
spark_function_name,
|
|
834
|
+
)
|
|
835
|
+
case (NullType(), NullType()):
|
|
836
|
+
result_type = DoubleType()
|
|
837
|
+
result_exp = snowpark_fn.lit(None)
|
|
838
|
+
case (StringType(), StringType()):
|
|
839
|
+
if spark_sql_ansi_enabled:
|
|
840
|
+
raise AnalysisException(
|
|
841
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
|
|
842
|
+
)
|
|
843
|
+
else:
|
|
844
|
+
result_type = DoubleType()
|
|
845
|
+
result_exp = _divnull(
|
|
846
|
+
snowpark_args[0].try_cast(result_type),
|
|
847
|
+
snowpark_args[1].try_cast(result_type),
|
|
848
|
+
)
|
|
849
|
+
case (StringType(), _IntegralType()):
|
|
850
|
+
result_type = DoubleType()
|
|
851
|
+
if spark_sql_ansi_enabled:
|
|
852
|
+
result_exp = _divnull(
|
|
853
|
+
snowpark_args[0].cast(LongType()),
|
|
854
|
+
snowpark_args[1].cast(result_type),
|
|
855
|
+
)
|
|
856
|
+
else:
|
|
857
|
+
result_exp = _divnull(
|
|
858
|
+
snowpark_args[0].try_cast(result_type), snowpark_args[1]
|
|
859
|
+
)
|
|
860
|
+
result_exp = result_exp.cast(result_type)
|
|
861
|
+
case (StringType(), _FractionalType()):
|
|
862
|
+
result_type = DoubleType()
|
|
863
|
+
if spark_sql_ansi_enabled:
|
|
864
|
+
result_exp = _divnull(
|
|
865
|
+
snowpark_args[0].cast(result_type), snowpark_args[1]
|
|
866
|
+
)
|
|
867
|
+
else:
|
|
868
|
+
result_exp = _divnull(
|
|
869
|
+
snowpark_args[0].try_cast(result_type), snowpark_args[1]
|
|
870
|
+
)
|
|
871
|
+
case (_IntegralType(), StringType()):
|
|
872
|
+
result_type = DoubleType()
|
|
873
|
+
if spark_sql_ansi_enabled:
|
|
874
|
+
result_exp = _divnull(
|
|
875
|
+
snowpark_args[0].cast(result_type),
|
|
876
|
+
snowpark_args[1].cast(LongType()),
|
|
877
|
+
)
|
|
878
|
+
else:
|
|
879
|
+
result_exp = _divnull(
|
|
880
|
+
snowpark_args[0], snowpark_args[1].try_cast(result_type)
|
|
881
|
+
)
|
|
882
|
+
result_exp = result_exp.cast(result_type)
|
|
883
|
+
case (_FractionalType(), StringType()):
|
|
655
884
|
result_type = DoubleType()
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
885
|
+
if spark_sql_ansi_enabled:
|
|
886
|
+
result_exp = _divnull(
|
|
887
|
+
snowpark_args[0], snowpark_args[1].cast(result_type)
|
|
888
|
+
)
|
|
889
|
+
else:
|
|
890
|
+
result_exp = _divnull(
|
|
891
|
+
snowpark_args[0], snowpark_args[1].try_cast(result_type)
|
|
892
|
+
)
|
|
893
|
+
case (_NumericType(), NullType()) | (NullType(), _NumericType()):
|
|
894
|
+
result_type = DoubleType()
|
|
895
|
+
result_exp = snowpark_fn.lit(None)
|
|
896
|
+
case (_NumericType(), _NumericType()):
|
|
897
|
+
result_type = DoubleType()
|
|
898
|
+
result_exp = _divnull(
|
|
899
|
+
snowpark_args[0].cast(result_type),
|
|
900
|
+
snowpark_args[1].cast(result_type),
|
|
901
|
+
)
|
|
902
|
+
case _:
|
|
903
|
+
raise AnalysisException(
|
|
904
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
905
|
+
)
|
|
659
906
|
case "~":
|
|
660
907
|
result_exp = TypedColumn(
|
|
661
908
|
snowpark_fn.bitnot(snowpark_args[0]),
|
|
@@ -1759,9 +2006,11 @@ def map_unresolved_function(
|
|
|
1759
2006
|
snowpark_args[1], snowpark_args[2], snowpark_args[0]
|
|
1760
2007
|
)
|
|
1761
2008
|
else:
|
|
2009
|
+
spark_function_name = f"convert_timezone(current_timezone(), {', '.join(snowpark_arg_names)})"
|
|
1762
2010
|
result_exp = snowpark_fn.convert_timezone(*snowpark_args)
|
|
1763
2011
|
|
|
1764
|
-
|
|
2012
|
+
result_type = TimestampType(TimestampTimeZone.NTZ)
|
|
2013
|
+
result_exp = result_exp.cast(result_type)
|
|
1765
2014
|
|
|
1766
2015
|
case "corr":
|
|
1767
2016
|
col1_type = snowpark_typed_args[0].typ
|
|
@@ -2049,11 +2298,8 @@ def map_unresolved_function(
|
|
|
2049
2298
|
result_exp = TypedColumn(snowpark_fn.current_date(), lambda: [DateType()])
|
|
2050
2299
|
spark_function_name = "current_date()"
|
|
2051
2300
|
case "current_timestamp" | "now":
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
get_timestamp_type(),
|
|
2055
|
-
)
|
|
2056
|
-
result_exp = TypedColumn(result_exp, lambda: [get_timestamp_type()])
|
|
2301
|
+
result_type = TimestampType(TimestampTimeZone.LTZ)
|
|
2302
|
+
result_exp = snowpark_fn.to_timestamp_ltz(snowpark_fn.current_timestamp())
|
|
2057
2303
|
case "current_timezone":
|
|
2058
2304
|
result_exp = snowpark_fn.lit(global_config.spark_sql_session_timeZone)
|
|
2059
2305
|
result_type = StringType()
|
|
@@ -2644,7 +2890,7 @@ def map_unresolved_function(
|
|
|
2644
2890
|
return results
|
|
2645
2891
|
|
|
2646
2892
|
spark_function_name = f"from_csv({snowpark_arg_names[0]})"
|
|
2647
|
-
|
|
2893
|
+
result_type = map_type_string_to_snowpark_type(snowpark_arg_names[1])
|
|
2648
2894
|
|
|
2649
2895
|
if len(snowpark_arg_names) > 2 and snowpark_arg_names[2].startswith(
|
|
2650
2896
|
"named_struct"
|
|
@@ -2669,8 +2915,7 @@ def map_unresolved_function(
|
|
|
2669
2915
|
|
|
2670
2916
|
result_exp = snowpark_fn.when(
|
|
2671
2917
|
snowpark_args[0].is_null(), snowpark_fn.lit(None)
|
|
2672
|
-
).otherwise(snowpark_fn.cast(csv_result,
|
|
2673
|
-
result_type = ddl_schema
|
|
2918
|
+
).otherwise(snowpark_fn.cast(csv_result, result_type))
|
|
2674
2919
|
case "from_json":
|
|
2675
2920
|
# TODO: support options.
|
|
2676
2921
|
if len(snowpark_args) > 2:
|
|
@@ -2897,21 +3142,15 @@ def map_unresolved_function(
|
|
|
2897
3142
|
)
|
|
2898
3143
|
result_type = StringType()
|
|
2899
3144
|
case "from_utc_timestamp":
|
|
2900
|
-
|
|
2901
|
-
@cached_udf(
|
|
2902
|
-
input_types=[StringType()],
|
|
2903
|
-
return_type=StringType(),
|
|
2904
|
-
)
|
|
2905
|
-
def map_from_spark_tz(tz):
|
|
2906
|
-
return SPARK_TZ_ABBREVIATIONS_OVERRIDES.get(tz, tz)
|
|
2907
|
-
|
|
2908
|
-
target_tz = map_from_spark_tz(snowpark_args[1])
|
|
3145
|
+
target_tz = _map_from_spark_tz(snowpark_args[1])
|
|
2909
3146
|
result_exp = _try_to_cast(
|
|
2910
3147
|
"try_to_timestamp",
|
|
2911
|
-
snowpark_fn.from_utc_timestamp(snowpark_args[0], target_tz)
|
|
3148
|
+
snowpark_fn.from_utc_timestamp(snowpark_args[0], target_tz).cast(
|
|
3149
|
+
TimestampType()
|
|
3150
|
+
),
|
|
2912
3151
|
snowpark_args[0],
|
|
2913
3152
|
)
|
|
2914
|
-
result_type = TimestampType(
|
|
3153
|
+
result_type = TimestampType()
|
|
2915
3154
|
case "get":
|
|
2916
3155
|
if exp.unresolved_function.arguments[1].HasField("literal"):
|
|
2917
3156
|
index = unwrap_literal(exp.unresolved_function.arguments[1])
|
|
@@ -3761,7 +4000,7 @@ def map_unresolved_function(
|
|
|
3761
4000
|
snowpark_args[1] <= 0, snowpark_fn.lit("")
|
|
3762
4001
|
).otherwise(snowpark_fn.left(*snowpark_args))
|
|
3763
4002
|
result_type = StringType()
|
|
3764
|
-
case "length" | "char_length" | "character_length":
|
|
4003
|
+
case "length" | "char_length" | "character_length" | "len":
|
|
3765
4004
|
if exp.unresolved_function.arguments[0].HasField("literal"):
|
|
3766
4005
|
# Only update the name if it has the literal field.
|
|
3767
4006
|
# If it doesn't, it means it's binary data.
|
|
@@ -3817,18 +4056,14 @@ def map_unresolved_function(
|
|
|
3817
4056
|
).otherwise(snowpark_fn.ln(snowpark_args[0]))
|
|
3818
4057
|
result_type = DoubleType()
|
|
3819
4058
|
case "localtimestamp":
|
|
3820
|
-
|
|
3821
|
-
|
|
4059
|
+
result_type = TimestampType(TimestampTimeZone.NTZ)
|
|
4060
|
+
result_exp = snowpark_fn.to_timestamp_ntz(
|
|
4061
|
+
snowpark_fn.builtin("localtimestamp")()
|
|
4062
|
+
)
|
|
3822
4063
|
case "locate":
|
|
3823
4064
|
substr = unwrap_literal(exp.unresolved_function.arguments[0])
|
|
3824
4065
|
value = snowpark_args[1]
|
|
3825
|
-
|
|
3826
|
-
start_pos = unwrap_literal(exp.unresolved_function.arguments[2])
|
|
3827
|
-
else:
|
|
3828
|
-
# start_pos is an optional argument and if not provided we should default to 1.
|
|
3829
|
-
# This path will only be reached by spark connect scala clients.
|
|
3830
|
-
start_pos = 1
|
|
3831
|
-
spark_function_name = f"locate({', '.join(snowpark_arg_names)}, 1)"
|
|
4066
|
+
start_pos = unwrap_literal(exp.unresolved_function.arguments[2])
|
|
3832
4067
|
|
|
3833
4068
|
if start_pos > 0:
|
|
3834
4069
|
result_exp = snowpark_fn.locate(substr, value, start_pos)
|
|
@@ -3999,7 +4234,7 @@ def map_unresolved_function(
|
|
|
3999
4234
|
match function_name:
|
|
4000
4235
|
case "make_timestamp":
|
|
4001
4236
|
make_function_name = "timestamp_tz_from_parts"
|
|
4002
|
-
result_type =
|
|
4237
|
+
result_type = get_timestamp_type()
|
|
4003
4238
|
case "make_timestamp_ltz":
|
|
4004
4239
|
make_function_name = "timestamp_ltz_from_parts"
|
|
4005
4240
|
result_type = TimestampType(TimestampTimeZone.LTZ)
|
|
@@ -4677,7 +4912,7 @@ def map_unresolved_function(
|
|
|
4677
4912
|
snowpark_args[0],
|
|
4678
4913
|
)
|
|
4679
4914
|
result_type = DateType()
|
|
4680
|
-
case "not":
|
|
4915
|
+
case "not" | "!":
|
|
4681
4916
|
spark_function_name = f"(NOT {snowpark_arg_names[0]})"
|
|
4682
4917
|
result_exp = ~snowpark_args[0]
|
|
4683
4918
|
result_type = BooleanType()
|
|
@@ -5253,9 +5488,8 @@ def map_unresolved_function(
|
|
|
5253
5488
|
# TODO: Seems like more validation of the arguments is appropriate.
|
|
5254
5489
|
args = exp.unresolved_function.arguments
|
|
5255
5490
|
if len(args) > 0:
|
|
5256
|
-
if not (
|
|
5257
|
-
|
|
5258
|
-
or isinstance(snowpark_typed_args[0].typ, NullType)
|
|
5491
|
+
if not isinstance(
|
|
5492
|
+
snowpark_typed_args[0].typ, (IntegerType, LongType, NullType)
|
|
5259
5493
|
):
|
|
5260
5494
|
raise AnalysisException(
|
|
5261
5495
|
f"""[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the ("INT" or "BIGINT") type, however {snowpark_arg_names[0]} has the type "{snowpark_typed_args[0].typ}"""
|
|
@@ -6553,15 +6787,22 @@ def map_unresolved_function(
|
|
|
6553
6787
|
case "timestamp_add":
|
|
6554
6788
|
# Added to DataFrame functions in 4.0.0 - but can be called from SQL in 3.5.3.
|
|
6555
6789
|
spark_function_name = f"timestampadd({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, {snowpark_arg_names[2]})"
|
|
6790
|
+
|
|
6791
|
+
typ = snowpark_typed_args[2].typ
|
|
6792
|
+
result_type = (
|
|
6793
|
+
typ
|
|
6794
|
+
if isinstance(typ, TimestampType)
|
|
6795
|
+
else TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6796
|
+
)
|
|
6797
|
+
|
|
6556
6798
|
result_exp = snowpark_fn.cast(
|
|
6557
6799
|
snowpark_fn.dateadd(
|
|
6558
6800
|
unwrap_literal(exp.unresolved_function.arguments[0]),
|
|
6559
6801
|
snowpark_args[1],
|
|
6560
6802
|
snowpark_args[2],
|
|
6561
6803
|
),
|
|
6562
|
-
|
|
6804
|
+
result_type,
|
|
6563
6805
|
)
|
|
6564
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.NTZ)
|
|
6565
6806
|
case "timestamp_diff":
|
|
6566
6807
|
# Added to DataFrame functions in 4.0.0 - but can be called from SQL in 3.5.3.
|
|
6567
6808
|
spark_function_name = f"timestampdiff({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, {snowpark_arg_names[2]})"
|
|
@@ -6574,9 +6815,9 @@ def map_unresolved_function(
|
|
|
6574
6815
|
case "timestamp_micros":
|
|
6575
6816
|
result_exp = snowpark_fn.cast(
|
|
6576
6817
|
snowpark_fn.to_timestamp(snowpark_args[0], 6),
|
|
6577
|
-
TimestampType(snowpark.types.TimestampTimeZone.
|
|
6818
|
+
TimestampType(snowpark.types.TimestampTimeZone.LTZ),
|
|
6578
6819
|
)
|
|
6579
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.
|
|
6820
|
+
result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6580
6821
|
case "timestamp_millis":
|
|
6581
6822
|
if not isinstance(snowpark_typed_args[0].typ, _IntegralType):
|
|
6582
6823
|
raise AnalysisException(
|
|
@@ -6584,9 +6825,9 @@ def map_unresolved_function(
|
|
|
6584
6825
|
)
|
|
6585
6826
|
result_exp = snowpark_fn.cast(
|
|
6586
6827
|
snowpark_fn.to_timestamp(snowpark_args[0] * 1_000, 6),
|
|
6587
|
-
TimestampType(snowpark.types.TimestampTimeZone.
|
|
6828
|
+
TimestampType(snowpark.types.TimestampTimeZone.LTZ),
|
|
6588
6829
|
)
|
|
6589
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.
|
|
6830
|
+
result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6590
6831
|
case "timestamp_seconds":
|
|
6591
6832
|
# Spark allows seconds to be fractional. Snowflake does not allow that
|
|
6592
6833
|
# even though the documentation explicitly says that it does.
|
|
@@ -6599,9 +6840,9 @@ def map_unresolved_function(
|
|
|
6599
6840
|
snowpark_fn.to_timestamp(
|
|
6600
6841
|
snowpark_fn.cast(snowpark_args[0] * 1_000_000, LongType()), 6
|
|
6601
6842
|
),
|
|
6602
|
-
TimestampType(snowpark.types.TimestampTimeZone.
|
|
6843
|
+
TimestampType(snowpark.types.TimestampTimeZone.LTZ),
|
|
6603
6844
|
)
|
|
6604
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.
|
|
6845
|
+
result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6605
6846
|
case "to_char" | "to_varchar":
|
|
6606
6847
|
# The structure of the Spark format string must match: [MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]
|
|
6607
6848
|
# Note the grammar above was retrieved from an error message from PySpark, but it is not entirely accurate.
|
|
@@ -6747,20 +6988,29 @@ def map_unresolved_function(
|
|
|
6747
6988
|
for typed_arg in snowpark_typed_args
|
|
6748
6989
|
]
|
|
6749
6990
|
|
|
6991
|
+
timezone_conf = global_config.get("spark.sql.session.timeZone")
|
|
6992
|
+
|
|
6993
|
+
# Objects do not preserve keys order in Snowflake, so we need to pass them in the array
|
|
6994
|
+
# Not all the types are preserved in Snowflake Object, timestamps and dates are converted to strings
|
|
6995
|
+
# to properly format them types have to be passed as argument
|
|
6750
6996
|
@cached_udf(
|
|
6751
|
-
input_types=[VariantType(), VariantType()],
|
|
6997
|
+
input_types=[VariantType(), ArrayType(), ArrayType(), VariantType()],
|
|
6752
6998
|
return_type=StringType(),
|
|
6999
|
+
packages=["jpype1"],
|
|
6753
7000
|
)
|
|
6754
|
-
def _to_csv(
|
|
6755
|
-
|
|
7001
|
+
def _to_csv(
|
|
7002
|
+
col: dict, keys: list, types: list, options: Optional[dict]
|
|
7003
|
+
) -> str:
|
|
7004
|
+
import datetime
|
|
7005
|
+
|
|
7006
|
+
import jpype
|
|
7007
|
+
|
|
6756
7008
|
if options is not None:
|
|
6757
7009
|
if not isinstance(options, dict):
|
|
6758
7010
|
raise TypeError(
|
|
6759
7011
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
6760
7012
|
)
|
|
6761
7013
|
|
|
6762
|
-
sep = options.get("sep") or sep
|
|
6763
|
-
|
|
6764
7014
|
python_to_snowflake_type = {
|
|
6765
7015
|
"str": "STRING",
|
|
6766
7016
|
"bool": "BOOLEAN",
|
|
@@ -6780,22 +7030,166 @@ def map_unresolved_function(
|
|
|
6780
7030
|
f'[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
|
|
6781
7031
|
)
|
|
6782
7032
|
|
|
7033
|
+
options = options or {}
|
|
7034
|
+
lowercased_options = {
|
|
7035
|
+
key.lower(): value for key, value in options.items()
|
|
7036
|
+
}
|
|
7037
|
+
|
|
7038
|
+
sep = lowercased_options.get("sep") or (
|
|
7039
|
+
lowercased_options.get("delimiter") or ","
|
|
7040
|
+
)
|
|
7041
|
+
quote = lowercased_options.get("quote") or '"'
|
|
7042
|
+
quote_all = lowercased_options.get("quoteall", "false")
|
|
7043
|
+
escape = lowercased_options.get("escape") or "\\"
|
|
7044
|
+
|
|
7045
|
+
ignore_leading_white_space = lowercased_options.get(
|
|
7046
|
+
"ignoreleadingwhitespace", "true"
|
|
7047
|
+
)
|
|
7048
|
+
ignore_trailing_white_space = lowercased_options.get(
|
|
7049
|
+
"ignoretrailingwhitespace", "true"
|
|
7050
|
+
)
|
|
7051
|
+
null_value = lowercased_options.get("nullvalue") or ""
|
|
7052
|
+
empty_value = lowercased_options.get("emptyvalue") or '""'
|
|
7053
|
+
char_to_escape_quote_escaping = (
|
|
7054
|
+
lowercased_options.get("chartoescapequoteescaping") or escape
|
|
7055
|
+
)
|
|
7056
|
+
|
|
7057
|
+
date_format = lowercased_options.get("dateformat") or "yyyy-MM-dd"
|
|
7058
|
+
timestamp_format = (
|
|
7059
|
+
lowercased_options.get("timestampformat")
|
|
7060
|
+
or "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"
|
|
7061
|
+
)
|
|
7062
|
+
timestamp_NTZ_format = (
|
|
7063
|
+
lowercased_options.get("timestampntzformat")
|
|
7064
|
+
or "yyyy-MM-dd'T'HH:mm:ss[.SSS]"
|
|
7065
|
+
)
|
|
7066
|
+
|
|
7067
|
+
def to_boolean(value: str) -> bool:
|
|
7068
|
+
return value.lower() == "true"
|
|
7069
|
+
|
|
7070
|
+
quote_all = to_boolean(quote_all)
|
|
7071
|
+
ignore_leading_white_space = to_boolean(ignore_leading_white_space)
|
|
7072
|
+
ignore_trailing_white_space = to_boolean(ignore_trailing_white_space)
|
|
7073
|
+
|
|
7074
|
+
def escape_str(value: str) -> str:
|
|
7075
|
+
escape_quote = escape + quote if escape != quote else escape
|
|
7076
|
+
return (
|
|
7077
|
+
value.replace(escape, char_to_escape_quote_escaping + escape)
|
|
7078
|
+
.replace(quote, escape_quote)
|
|
7079
|
+
.replace("\r", "\\r")
|
|
7080
|
+
)
|
|
7081
|
+
|
|
7082
|
+
def escape_and_quote_string(value) -> str:
|
|
7083
|
+
if quote_all:
|
|
7084
|
+
return f"{quote}{escape_str(str(value))}{quote}"
|
|
7085
|
+
return str(value)
|
|
7086
|
+
|
|
7087
|
+
time_types = ("date", "timestamp", "timestamp_ntz")
|
|
7088
|
+
maps_timestamps = any(
|
|
7089
|
+
python_type in time_types for python_type in types
|
|
7090
|
+
)
|
|
7091
|
+
|
|
7092
|
+
# Multiple execution of the UDF are done within the same process, that's why we need to check if the JVM was not already started
|
|
7093
|
+
if maps_timestamps and not jpype.isJVMStarted():
|
|
7094
|
+
jpype.startJVM()
|
|
7095
|
+
|
|
7096
|
+
if maps_timestamps:
|
|
7097
|
+
ZonedDateTime = jpype.JClass("java.time.ZonedDateTime")
|
|
7098
|
+
ZoneId = jpype.JClass("java.time.ZoneId")
|
|
7099
|
+
DateTimeFormatter = jpype.JClass(
|
|
7100
|
+
"java.time.format.DateTimeFormatter"
|
|
7101
|
+
)
|
|
7102
|
+
Instant = jpype.JClass("java.time.Instant")
|
|
7103
|
+
LocalDate = jpype.JClass("java.time.LocalDate")
|
|
7104
|
+
LocalDateTime = jpype.JClass("java.time.LocalDateTime")
|
|
7105
|
+
timestamp_formatter = DateTimeFormatter.ofPattern(timestamp_format)
|
|
7106
|
+
timestamp_ntz_formatter = DateTimeFormatter.ofPattern(
|
|
7107
|
+
timestamp_NTZ_format
|
|
7108
|
+
)
|
|
7109
|
+
date_formatter = DateTimeFormatter.ofPattern(date_format)
|
|
7110
|
+
|
|
6783
7111
|
result = []
|
|
6784
|
-
for
|
|
7112
|
+
for key, python_type in zip(keys, types):
|
|
7113
|
+
value = col.get(key)
|
|
6785
7114
|
if value is None:
|
|
6786
|
-
result.append(
|
|
7115
|
+
result.append(escape_and_quote_string(null_value))
|
|
7116
|
+
elif python_type in ("date", "timestamp", "timestamp_ntz"):
|
|
7117
|
+
match python_type:
|
|
7118
|
+
case "date":
|
|
7119
|
+
value = datetime.datetime.strptime(value, "%Y-%m-%d")
|
|
7120
|
+
local_date = LocalDate.of(
|
|
7121
|
+
value.year, value.month, value.day
|
|
7122
|
+
)
|
|
7123
|
+
formatted_date = date_formatter.format(local_date)
|
|
7124
|
+
result.append(escape_and_quote_string(formatted_date))
|
|
7125
|
+
case "timestamp":
|
|
7126
|
+
try:
|
|
7127
|
+
value = datetime.datetime.strptime(
|
|
7128
|
+
value, "%Y-%m-%d %H:%M:%S.%f %z"
|
|
7129
|
+
)
|
|
7130
|
+
except ValueError:
|
|
7131
|
+
# Fallback to the format without microseconds
|
|
7132
|
+
value = datetime.datetime.strptime(
|
|
7133
|
+
value, "%Y-%m-%d %H:%M:%S %z"
|
|
7134
|
+
)
|
|
7135
|
+
instant = Instant.ofEpochMilli(
|
|
7136
|
+
int(value.timestamp() * 1000)
|
|
7137
|
+
)
|
|
7138
|
+
zdt = ZonedDateTime.ofInstant(
|
|
7139
|
+
instant, ZoneId.of(timezone_conf)
|
|
7140
|
+
)
|
|
7141
|
+
str_value = timestamp_formatter.format(zdt)
|
|
7142
|
+
result.append(escape_and_quote_string(str_value))
|
|
7143
|
+
case "timestamp_ntz":
|
|
7144
|
+
try:
|
|
7145
|
+
value = datetime.datetime.strptime(
|
|
7146
|
+
value, "%Y-%m-%d %H:%M:%S.%f"
|
|
7147
|
+
)
|
|
7148
|
+
except ValueError:
|
|
7149
|
+
# Fallback to the format without microseconds
|
|
7150
|
+
value = datetime.datetime.strptime(
|
|
7151
|
+
value, "%Y-%m-%d %H:%M:%S"
|
|
7152
|
+
)
|
|
7153
|
+
timestamp_ntz = LocalDateTime.of(
|
|
7154
|
+
value.year,
|
|
7155
|
+
value.month,
|
|
7156
|
+
value.day,
|
|
7157
|
+
value.hour,
|
|
7158
|
+
value.minute,
|
|
7159
|
+
value.second,
|
|
7160
|
+
value.microsecond * 1000,
|
|
7161
|
+
)
|
|
7162
|
+
str_value = timestamp_ntz_formatter.format(
|
|
7163
|
+
timestamp_ntz
|
|
7164
|
+
)
|
|
7165
|
+
result.append(escape_and_quote_string(str_value))
|
|
7166
|
+
case _:
|
|
7167
|
+
raise ValueError(
|
|
7168
|
+
f"Unable to determine type for value: {python_type}"
|
|
7169
|
+
)
|
|
6787
7170
|
elif isinstance(value, str):
|
|
6788
|
-
|
|
6789
|
-
|
|
7171
|
+
strip_value = (
|
|
7172
|
+
value.lstrip() if ignore_leading_white_space else value
|
|
7173
|
+
)
|
|
7174
|
+
strip_value = (
|
|
7175
|
+
strip_value.rstrip()
|
|
7176
|
+
if ignore_trailing_white_space
|
|
7177
|
+
else strip_value
|
|
7178
|
+
)
|
|
7179
|
+
if strip_value == "":
|
|
7180
|
+
result.append(escape_and_quote_string(empty_value))
|
|
7181
|
+
elif (
|
|
7182
|
+
any(c in value for c in (sep, "\r", "\n", quote))
|
|
7183
|
+
or quote_all
|
|
6790
7184
|
):
|
|
6791
|
-
|
|
6792
|
-
result.append(
|
|
7185
|
+
strip_value = escape_str(strip_value)
|
|
7186
|
+
result.append(quote + strip_value + quote)
|
|
6793
7187
|
else:
|
|
6794
|
-
result.append(
|
|
7188
|
+
result.append(escape_and_quote_string(strip_value))
|
|
6795
7189
|
elif isinstance(value, bool):
|
|
6796
|
-
result.append(str(value).lower())
|
|
7190
|
+
result.append(escape_and_quote_string(str(value).lower()))
|
|
6797
7191
|
else:
|
|
6798
|
-
result.append(str(value))
|
|
7192
|
+
result.append(escape_and_quote_string(str(value)))
|
|
6799
7193
|
|
|
6800
7194
|
return sep.join(result)
|
|
6801
7195
|
|
|
@@ -6808,11 +7202,36 @@ def map_unresolved_function(
|
|
|
6808
7202
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
6809
7203
|
)
|
|
6810
7204
|
|
|
7205
|
+
def get_snowpark_type_name(snowpark_type: DataType) -> str:
|
|
7206
|
+
return (
|
|
7207
|
+
(
|
|
7208
|
+
"timestamp"
|
|
7209
|
+
if not snowpark_type.tz == snowpark.types.TimestampTimeZone.NTZ
|
|
7210
|
+
else "timestamp_ntz"
|
|
7211
|
+
)
|
|
7212
|
+
if snowpark_type == TimestampType()
|
|
7213
|
+
else snowpark_type.type_name().lower()
|
|
7214
|
+
)
|
|
7215
|
+
|
|
7216
|
+
field_names = snowpark_fn.array_construct(
|
|
7217
|
+
*[
|
|
7218
|
+
snowpark_fn.lit(value)
|
|
7219
|
+
for value in snowpark_typed_args[0].typ.fieldNames
|
|
7220
|
+
]
|
|
7221
|
+
)
|
|
7222
|
+
field_types = snowpark_fn.array_construct(
|
|
7223
|
+
*[
|
|
7224
|
+
snowpark_fn.lit(get_snowpark_type_name(value.datatype))
|
|
7225
|
+
for value in snowpark_typed_args[0].typ.fields
|
|
7226
|
+
]
|
|
7227
|
+
)
|
|
6811
7228
|
match snowpark_args:
|
|
6812
7229
|
case [csv_data]:
|
|
6813
|
-
result_exp = _to_csv(
|
|
7230
|
+
result_exp = _to_csv(
|
|
7231
|
+
csv_data, field_names, field_types, snowpark_fn.lit(None)
|
|
7232
|
+
)
|
|
6814
7233
|
case [csv_data, options]:
|
|
6815
|
-
result_exp = _to_csv(csv_data, options)
|
|
7234
|
+
result_exp = _to_csv(csv_data, field_names, field_types, options)
|
|
6816
7235
|
case _:
|
|
6817
7236
|
raise ValueError("Unrecognized from_csv parameters")
|
|
6818
7237
|
result_type = StringType()
|
|
@@ -6913,10 +7332,8 @@ def map_unresolved_function(
|
|
|
6913
7332
|
)
|
|
6914
7333
|
case _:
|
|
6915
7334
|
raise ValueError(f"Invalid number of arguments to {function_name}")
|
|
6916
|
-
result_exp = snowpark_fn.cast(
|
|
6917
|
-
|
|
6918
|
-
)
|
|
6919
|
-
result_type = TimestampType()
|
|
7335
|
+
result_exp = snowpark_fn.cast(result_exp, get_timestamp_type())
|
|
7336
|
+
result_type = get_timestamp_type()
|
|
6920
7337
|
|
|
6921
7338
|
case "to_timestamp_ltz":
|
|
6922
7339
|
match (snowpark_typed_args, exp.unresolved_function.arguments):
|
|
@@ -6942,7 +7359,12 @@ def map_unresolved_function(
|
|
|
6942
7359
|
match (snowpark_typed_args, exp.unresolved_function.arguments):
|
|
6943
7360
|
case ([e], _):
|
|
6944
7361
|
result_exp = snowpark_fn.builtin("to_timestamp_ntz")(e.col)
|
|
6945
|
-
case ([e, _], _) if
|
|
7362
|
+
case ([e, _], _) if isinstance(e.typ, DateType):
|
|
7363
|
+
result_exp = snowpark_fn.convert_timezone(
|
|
7364
|
+
snowpark_fn.lit("UTC"),
|
|
7365
|
+
snowpark_fn.builtin("to_timestamp_ntz")(e.col),
|
|
7366
|
+
)
|
|
7367
|
+
case ([e, _], _) if isinstance(e.typ, TimestampType):
|
|
6946
7368
|
result_exp = snowpark_fn.builtin("to_timestamp_ntz")(e.col)
|
|
6947
7369
|
case ([e, _], [_, fmt]):
|
|
6948
7370
|
result_exp = snowpark_fn.builtin("to_timestamp_ntz")(
|
|
@@ -7003,25 +7425,17 @@ def map_unresolved_function(
|
|
|
7003
7425
|
result_type = LongType()
|
|
7004
7426
|
|
|
7005
7427
|
case "to_utc_timestamp":
|
|
7006
|
-
|
|
7007
|
-
@cached_udf(
|
|
7008
|
-
input_types=[StringType()],
|
|
7009
|
-
return_type=StringType(),
|
|
7010
|
-
)
|
|
7011
|
-
def map_timezone(short_tz: str) -> str:
|
|
7012
|
-
return SPARK_TZ_ABBREVIATIONS_OVERRIDES.get(short_tz, short_tz)
|
|
7013
|
-
|
|
7428
|
+
result_type = TimestampType()
|
|
7014
7429
|
result_exp = _try_to_cast(
|
|
7015
7430
|
"try_to_timestamp",
|
|
7016
7431
|
snowpark_fn.cast(
|
|
7017
7432
|
snowpark_fn.to_utc_timestamp(
|
|
7018
|
-
snowpark_args[0],
|
|
7433
|
+
snowpark_args[0], _map_from_spark_tz(snowpark_args[1])
|
|
7019
7434
|
),
|
|
7020
|
-
|
|
7435
|
+
result_type,
|
|
7021
7436
|
),
|
|
7022
7437
|
snowpark_args[0],
|
|
7023
7438
|
)
|
|
7024
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.NTZ)
|
|
7025
7439
|
case "translate":
|
|
7026
7440
|
src_alphabet = unwrap_literal(exp.unresolved_function.arguments[1])
|
|
7027
7441
|
target_alphabet = unwrap_literal(exp.unresolved_function.arguments[2])
|
|
@@ -7414,8 +7828,8 @@ def map_unresolved_function(
|
|
|
7414
7828
|
)
|
|
7415
7829
|
case _:
|
|
7416
7830
|
raise ValueError(f"Invalid number of arguments to {function_name}")
|
|
7417
|
-
|
|
7418
|
-
|
|
7831
|
+
result_type = get_timestamp_type()
|
|
7832
|
+
result_exp = snowpark_fn.cast(result_exp, result_type)
|
|
7419
7833
|
case "typeof":
|
|
7420
7834
|
col_snowpark_typ = snowpark_typed_args[0].typ
|
|
7421
7835
|
spark_typ = map_snowpark_to_pyspark_types(col_snowpark_typ)
|
|
@@ -7685,14 +8099,10 @@ def map_unresolved_function(
|
|
|
7685
8099
|
result_exp = result_exp.when(snowpark_args[i], snowpark_args[i + 1])
|
|
7686
8100
|
result_type_indexes.append(i + 1)
|
|
7687
8101
|
name_components.append("END")
|
|
7688
|
-
|
|
7689
|
-
|
|
7690
|
-
lambda: [
|
|
7691
|
-
_find_common_type(
|
|
7692
|
-
[snowpark_typed_args[i].typ for i in result_type_indexes]
|
|
7693
|
-
)
|
|
7694
|
-
],
|
|
8102
|
+
result_type = _find_common_type(
|
|
8103
|
+
[snowpark_typed_args[i].typ for i in result_type_indexes]
|
|
7695
8104
|
)
|
|
8105
|
+
result_exp = snowpark_fn.cast(result_exp, result_type)
|
|
7696
8106
|
spark_function_name = " ".join(name_components)
|
|
7697
8107
|
case "width_bucket":
|
|
7698
8108
|
width_bucket_fn = snowpark_fn.function("width_bucket")
|
|
@@ -7966,6 +8376,18 @@ def map_unresolved_function(
|
|
|
7966
8376
|
return spark_col_names, typed_col
|
|
7967
8377
|
|
|
7968
8378
|
|
|
8379
|
+
def _cast_helper(column: Column, to: DataType) -> Column:
|
|
8380
|
+
if global_config.spark_sql_ansi_enabled:
|
|
8381
|
+
column_mediator = (
|
|
8382
|
+
snowpark_fn.cast(column, StringType())
|
|
8383
|
+
if isinstance(to, DecimalType)
|
|
8384
|
+
else column
|
|
8385
|
+
)
|
|
8386
|
+
return snowpark_fn.cast(column_mediator, to)
|
|
8387
|
+
else:
|
|
8388
|
+
return _try_cast_helper(column, to)
|
|
8389
|
+
|
|
8390
|
+
|
|
7969
8391
|
def _try_cast_helper(column: Column, to: DataType) -> Column:
|
|
7970
8392
|
"""
|
|
7971
8393
|
Attempts to cast a given column to a specified data type using the same behaviour as Spark.
|
|
@@ -9120,7 +9542,7 @@ def _get_type_precision(typ: DataType) -> tuple[int, int]:
|
|
|
9120
9542
|
case LongType():
|
|
9121
9543
|
return 20, 0 # -9223372036854775808 to 9223372036854775807
|
|
9122
9544
|
case NullType():
|
|
9123
|
-
return
|
|
9545
|
+
return 0, 0 # NULL
|
|
9124
9546
|
case _:
|
|
9125
9547
|
return 38, 0 # Default to maximum precision for other types
|
|
9126
9548
|
|
|
@@ -9163,58 +9585,70 @@ def _decimal_add_sub_result_type_helper(p1, s1, p2, s2):
|
|
|
9163
9585
|
return result_precision, min_scale, return_type_precision, return_type_scale
|
|
9164
9586
|
|
|
9165
9587
|
|
|
9166
|
-
def
|
|
9167
|
-
|
|
9168
|
-
|
|
9169
|
-
|
|
9170
|
-
|
|
9171
|
-
if
|
|
9172
|
-
|
|
9588
|
+
def _get_decimal_multiplication_result_exp(
|
|
9589
|
+
result_type: DecimalType | DataType,
|
|
9590
|
+
other_type: DataType,
|
|
9591
|
+
snowpark_args: list[Column],
|
|
9592
|
+
) -> Column:
|
|
9593
|
+
if global_config.spark_sql_ansi_enabled:
|
|
9594
|
+
result_exp = snowpark_args[0] * snowpark_args[1]
|
|
9173
9595
|
else:
|
|
9174
|
-
|
|
9175
|
-
|
|
9176
|
-
|
|
9177
|
-
while result_scale > min_scale:
|
|
9178
|
-
result_scale -= 1
|
|
9179
|
-
return_type_scale = result_scale
|
|
9180
|
-
result_precision = (p1 - s1) + (p2 - s2) + result_scale + 1
|
|
9181
|
-
if result_precision <= 38:
|
|
9182
|
-
return (
|
|
9183
|
-
result_precision,
|
|
9184
|
-
result_scale,
|
|
9185
|
-
return_type_precision,
|
|
9186
|
-
return_type_scale,
|
|
9596
|
+
if isinstance(other_type, _IntegralType):
|
|
9597
|
+
result_exp = snowpark_args[0].cast(result_type) * snowpark_args[1].cast(
|
|
9598
|
+
result_type
|
|
9187
9599
|
)
|
|
9188
|
-
|
|
9189
|
-
|
|
9190
|
-
|
|
9600
|
+
else:
|
|
9601
|
+
result_exp = snowpark_args[0].cast(DoubleType()) * snowpark_args[1].cast(
|
|
9602
|
+
DoubleType()
|
|
9603
|
+
)
|
|
9604
|
+
result_exp = _try_cast_helper(result_exp, result_type)
|
|
9605
|
+
return result_exp
|
|
9191
9606
|
|
|
9192
9607
|
|
|
9193
|
-
def
|
|
9194
|
-
|
|
9195
|
-
|
|
9196
|
-
|
|
9197
|
-
|
|
9198
|
-
|
|
9199
|
-
|
|
9608
|
+
def _get_decimal_multiplication_result_type(p1, s1, p2, s2) -> DecimalType:
|
|
9609
|
+
result_precision = p1 + p2 + 1
|
|
9610
|
+
result_scale = s1 + s2
|
|
9611
|
+
if result_precision > 38:
|
|
9612
|
+
if result_scale > 6:
|
|
9613
|
+
overflow = result_precision - 38
|
|
9614
|
+
result_scale = max(6, result_scale - overflow)
|
|
9615
|
+
result_precision = 38
|
|
9616
|
+
return DecimalType(result_precision, result_scale)
|
|
9617
|
+
|
|
9618
|
+
|
|
9619
|
+
def _get_decimal_division_result_exp(
|
|
9620
|
+
result_type: DecimalType | DataType,
|
|
9621
|
+
other_type: DataType,
|
|
9622
|
+
overflow_detected: bool,
|
|
9623
|
+
snowpark_args: list[Column],
|
|
9624
|
+
spark_function_name: str,
|
|
9625
|
+
) -> Column:
|
|
9626
|
+
if isinstance(other_type, DecimalType) and overflow_detected:
|
|
9627
|
+
if global_config.spark_sql_ansi_enabled:
|
|
9628
|
+
raise ArithmeticException(
|
|
9629
|
+
f'[NUMERIC_VALUE_OUT_OF_RANGE] {spark_function_name} cannot be represented as Decimal({result_type.precision}, {result_type.scale}). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
|
|
9630
|
+
)
|
|
9631
|
+
else:
|
|
9632
|
+
result_exp = snowpark_fn.lit(None).cast(result_type)
|
|
9200
9633
|
else:
|
|
9201
|
-
|
|
9634
|
+
dividend = snowpark_args[0].cast(DoubleType())
|
|
9635
|
+
divisor = snowpark_args[1]
|
|
9636
|
+
result_exp = _divnull(dividend, divisor)
|
|
9637
|
+
result_exp = _cast_helper(result_exp, result_type)
|
|
9638
|
+
return result_exp
|
|
9202
9639
|
|
|
9203
|
-
min_scale = 6
|
|
9204
|
-
while result_scale > min_scale:
|
|
9205
|
-
result_scale -= 1
|
|
9206
|
-
return_type_scale = result_scale
|
|
9207
|
-
result_precision = p1 - s1 + s2 + result_scale
|
|
9208
|
-
if result_precision <= 38:
|
|
9209
|
-
return (
|
|
9210
|
-
result_precision,
|
|
9211
|
-
result_scale,
|
|
9212
|
-
return_type_precision,
|
|
9213
|
-
return_type_scale,
|
|
9214
|
-
)
|
|
9215
9640
|
|
|
9216
|
-
|
|
9217
|
-
|
|
9641
|
+
def _get_decimal_division_result_type(p1, s1, p2, s2) -> tuple[DecimalType, bool]:
|
|
9642
|
+
overflow_detected = False
|
|
9643
|
+
result_scale = max(6, s1 + p2 + 1)
|
|
9644
|
+
result_precision = p1 - s1 + s2 + result_scale
|
|
9645
|
+
if result_precision > 38:
|
|
9646
|
+
if result_precision > 40:
|
|
9647
|
+
overflow_detected = True
|
|
9648
|
+
overflow = result_precision - 38
|
|
9649
|
+
result_scale = max(6, result_scale - overflow)
|
|
9650
|
+
result_precision = 38
|
|
9651
|
+
return DecimalType(result_precision, result_scale), overflow_detected
|
|
9218
9652
|
|
|
9219
9653
|
|
|
9220
9654
|
def _try_arithmetic_helper(
|
|
@@ -9409,102 +9843,108 @@ def _try_arithmetic_helper(
|
|
|
9409
9843
|
return snowpark_fn.lit(None)
|
|
9410
9844
|
|
|
9411
9845
|
|
|
9412
|
-
def
|
|
9413
|
-
|
|
9414
|
-
|
|
9415
|
-
|
|
9416
|
-
|
|
9417
|
-
|
|
9418
|
-
|
|
9419
|
-
|
|
9420
|
-
|
|
9421
|
-
|
|
9422
|
-
|
|
9423
|
-
|
|
9424
|
-
|
|
9425
|
-
|
|
9426
|
-
|
|
9427
|
-
|
|
9428
|
-
|
|
9429
|
-
|
|
9430
|
-
|
|
9431
|
-
|
|
9432
|
-
|
|
9433
|
-
|
|
9434
|
-
|
|
9435
|
-
|
|
9436
|
-
|
|
9437
|
-
|
|
9438
|
-
|
|
9439
|
-
|
|
9440
|
-
|
|
9441
|
-
|
|
9846
|
+
def _get_add_sub_result_type(
|
|
9847
|
+
type1: DataType,
|
|
9848
|
+
type2: DataType,
|
|
9849
|
+
spark_function_name: str,
|
|
9850
|
+
) -> DataType:
|
|
9851
|
+
result_type = _find_common_type([type1, type2])
|
|
9852
|
+
match result_type:
|
|
9853
|
+
case DecimalType():
|
|
9854
|
+
p1, s1 = _get_type_precision(type1)
|
|
9855
|
+
p2, s2 = _get_type_precision(type2)
|
|
9856
|
+
result_scale = max(s1, s2)
|
|
9857
|
+
result_precision = max(p1 - s1, p2 - s2) + result_scale + 1
|
|
9858
|
+
if result_precision > 38:
|
|
9859
|
+
if result_scale > 6:
|
|
9860
|
+
overflow = result_precision - 38
|
|
9861
|
+
result_scale = max(6, result_scale - overflow)
|
|
9862
|
+
result_precision = 38
|
|
9863
|
+
result_type = DecimalType(result_precision, result_scale)
|
|
9864
|
+
case NullType():
|
|
9865
|
+
result_type = DoubleType()
|
|
9866
|
+
case StringType():
|
|
9867
|
+
match (type1, type2):
|
|
9868
|
+
case (_FractionalType(), _) | (_, _FractionalType()):
|
|
9869
|
+
result_type = DoubleType()
|
|
9870
|
+
case (_IntegralType(), _) | (_, _IntegralType()):
|
|
9871
|
+
result_type = (
|
|
9872
|
+
LongType()
|
|
9873
|
+
if global_config.spark_sql_ansi_enabled
|
|
9874
|
+
else DoubleType()
|
|
9875
|
+
)
|
|
9876
|
+
case _:
|
|
9877
|
+
if global_config.spark_sql_ansi_enabled:
|
|
9878
|
+
raise AnalysisException(
|
|
9879
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".',
|
|
9880
|
+
)
|
|
9881
|
+
else:
|
|
9882
|
+
result_type = DoubleType()
|
|
9883
|
+
case BooleanType():
|
|
9884
|
+
raise AnalysisException(
|
|
9885
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "BOOLEAN".',
|
|
9442
9886
|
)
|
|
9443
|
-
|
|
9887
|
+
return result_type
|
|
9444
9888
|
|
|
9445
|
-
left_operand, right_operand = snowpark_args[0], snowpark_args[1]
|
|
9446
9889
|
|
|
9447
|
-
|
|
9448
|
-
|
|
9449
|
-
|
|
9450
|
-
|
|
9451
|
-
|
|
9452
|
-
|
|
9453
|
-
|
|
9454
|
-
|
|
9455
|
-
|
|
9890
|
+
def _get_spark_function_name(
|
|
9891
|
+
col1: TypedColumn,
|
|
9892
|
+
col2: TypedColumn,
|
|
9893
|
+
snowpark_arg_names: list[str],
|
|
9894
|
+
exp: expressions_proto.Expression,
|
|
9895
|
+
default_spark_function_name: str,
|
|
9896
|
+
function_name: str,
|
|
9897
|
+
):
|
|
9898
|
+
operation_op = function_name
|
|
9899
|
+
match function_name:
|
|
9900
|
+
case "+":
|
|
9901
|
+
operation_func = "date_add"
|
|
9902
|
+
case "-":
|
|
9903
|
+
operation_func = "date_sub"
|
|
9904
|
+
case _:
|
|
9905
|
+
return default_spark_function_name
|
|
9906
|
+
match (col1.typ, col2.typ):
|
|
9907
|
+
case (DateType(), DateType()):
|
|
9908
|
+
date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
|
|
9909
|
+
date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
|
|
9910
|
+
return f"({date_param_name1} {operation_op} {date_param_name2})"
|
|
9911
|
+
case (StringType(), DateType()):
|
|
9912
|
+
date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
|
|
9913
|
+
if "INTERVAL" == col1.col._expr1.pretty_name:
|
|
9914
|
+
return f"{date_param_name2} {operation_op} {snowpark_arg_names[0]}"
|
|
9915
|
+
elif global_config.spark_sql_ansi_enabled and function_name == "+":
|
|
9916
|
+
return f"{operation_func}(cast({date_param_name2} as date), cast({snowpark_arg_names[0]} as double))"
|
|
9917
|
+
else:
|
|
9918
|
+
return f"({snowpark_arg_names[0]} {operation_op} {date_param_name2})"
|
|
9919
|
+
case (DateType(), StringType()):
|
|
9920
|
+
date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
|
|
9921
|
+
if (
|
|
9922
|
+
global_config.spark_sql_ansi_enabled
|
|
9923
|
+
or "INTERVAL" == col2.col._expr1.pretty_name
|
|
9924
|
+
):
|
|
9925
|
+
return f"{date_param_name1} {operation_op} {snowpark_arg_names[1]}"
|
|
9926
|
+
else:
|
|
9927
|
+
return f"{operation_func}(cast({date_param_name1} as date), cast({snowpark_arg_names[1]} as double))"
|
|
9928
|
+
case (DateType() as dt, _) | (_, DateType() as dt):
|
|
9929
|
+
date_param_index = 0 if dt == col1.typ else 1
|
|
9930
|
+
date_param_name = _get_literal_param_name(
|
|
9931
|
+
exp, date_param_index, snowpark_arg_names[date_param_index]
|
|
9932
|
+
)
|
|
9933
|
+
return f"{operation_func}({date_param_name}, {snowpark_arg_names[1 - date_param_index]})"
|
|
9934
|
+
case _:
|
|
9935
|
+
return default_spark_function_name
|
|
9456
9936
|
|
|
9457
9937
|
|
|
9458
|
-
def
|
|
9459
|
-
|
|
9460
|
-
|
|
9461
|
-
|
|
9462
|
-
|
|
9463
|
-
|
|
9464
|
-
|
|
9465
|
-
|
|
9466
|
-
|
|
9467
|
-
|
|
9468
|
-
return_type_scale,
|
|
9469
|
-
) = _decimal_multiply_result_type_helper(p1, s1, p2, s2)
|
|
9470
|
-
else: # division
|
|
9471
|
-
(
|
|
9472
|
-
new_precision,
|
|
9473
|
-
new_scale,
|
|
9474
|
-
return_type_precision,
|
|
9475
|
-
return_type_scale,
|
|
9476
|
-
) = _decimal_divide_result_type_helper(p1, s1, p2, s2)
|
|
9477
|
-
|
|
9478
|
-
if isinstance(typed_args[0].typ, DecimalType) and isinstance(
|
|
9479
|
-
typed_args[1].typ, DecimalType
|
|
9480
|
-
):
|
|
9481
|
-
# Overflow check for both decimal types
|
|
9482
|
-
if new_precision > 38:
|
|
9483
|
-
if global_config.spark_sql_ansi_enabled:
|
|
9484
|
-
raise ArithmeticException(
|
|
9485
|
-
f'[NUMERIC_VALUE_OUT_OF_RANGE] Precision {new_precision} exceeds maximum allowed precision of 38. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
|
|
9486
|
-
)
|
|
9487
|
-
return snowpark_fn.lit(None), (return_type_precision, return_type_scale)
|
|
9488
|
-
|
|
9489
|
-
# Overflow check if one operand is an Integral
|
|
9490
|
-
if new_precision > 38:
|
|
9491
|
-
if global_config.spark_sql_ansi_enabled:
|
|
9492
|
-
raise ArithmeticException(
|
|
9493
|
-
f'[NUMERIC_VALUE_OUT_OF_RANGE] Precision {new_precision} exceeds maximum allowed precision of 38. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
|
|
9494
|
-
)
|
|
9495
|
-
new_precision = 38
|
|
9496
|
-
if new_scale > new_precision:
|
|
9497
|
-
new_scale = new_precision
|
|
9498
|
-
|
|
9499
|
-
left_operand, right_operand = snowpark_args[0], snowpark_args[1]
|
|
9500
|
-
if operation_type == 0: # multiplication
|
|
9501
|
-
result = left_operand * right_operand
|
|
9502
|
-
else: # division
|
|
9503
|
-
result = _divnull(left_operand, right_operand)
|
|
9504
|
-
return snowpark_fn.cast(result, DecimalType(new_precision, new_scale)), (
|
|
9505
|
-
return_type_precision,
|
|
9506
|
-
return_type_scale,
|
|
9507
|
-
)
|
|
9938
|
+
def _get_literal_param_name(exp, arg_index: int, default_param_name: str):
|
|
9939
|
+
try:
|
|
9940
|
+
date_param_name = (
|
|
9941
|
+
exp.unresolved_function.arguments[arg_index]
|
|
9942
|
+
.unresolved_function.arguments[0]
|
|
9943
|
+
.literal.string
|
|
9944
|
+
)
|
|
9945
|
+
except (IndexError, AttributeError):
|
|
9946
|
+
date_param_name = default_param_name
|
|
9947
|
+
return date_param_name
|
|
9508
9948
|
|
|
9509
9949
|
|
|
9510
9950
|
def _raise_error_helper(return_type: DataType, error_class=None):
|
|
@@ -9819,3 +10259,34 @@ def _trim_helper(value: Column, trim_value: Column, trim_type: Column) -> Column
|
|
|
9819
10259
|
return value
|
|
9820
10260
|
|
|
9821
10261
|
return _binary_trim_udf(value, trim_value, trim_type)
|
|
10262
|
+
|
|
10263
|
+
|
|
10264
|
+
def _map_from_spark_tz(value: Column) -> Column:
|
|
10265
|
+
return (
|
|
10266
|
+
snowpark_fn.when(value == "ACT", snowpark_fn.lit("Australia/Darwin"))
|
|
10267
|
+
.when(value == "AET", snowpark_fn.lit("Australia/Sydney"))
|
|
10268
|
+
.when(value == "AGT", snowpark_fn.lit("America/Argentina/Buenos_Aires"))
|
|
10269
|
+
.when(value == "ART", snowpark_fn.lit("Africa/Cairo"))
|
|
10270
|
+
.when(value == "AST", snowpark_fn.lit("America/Anchorage"))
|
|
10271
|
+
.when(value == "BET", snowpark_fn.lit("America/Sao_Paulo"))
|
|
10272
|
+
.when(value == "BST", snowpark_fn.lit("Asia/Dhaka"))
|
|
10273
|
+
.when(value == "CAT", snowpark_fn.lit("Africa/Harare"))
|
|
10274
|
+
.when(value == "CNT", snowpark_fn.lit("America/St_Johns"))
|
|
10275
|
+
.when(value == "CST", snowpark_fn.lit("America/Chicago"))
|
|
10276
|
+
.when(value == "CTT", snowpark_fn.lit("Asia/Shanghai"))
|
|
10277
|
+
.when(value == "EAT", snowpark_fn.lit("Africa/Addis_Ababa"))
|
|
10278
|
+
.when(value == "ECT", snowpark_fn.lit("Europe/Paris"))
|
|
10279
|
+
.when(value == "IET", snowpark_fn.lit("America/Indiana/Indianapolis"))
|
|
10280
|
+
.when(value == "IST", snowpark_fn.lit("Asia/Kolkata"))
|
|
10281
|
+
.when(value == "JST", snowpark_fn.lit("Asia/Tokyo"))
|
|
10282
|
+
.when(value == "MIT", snowpark_fn.lit("Pacific/Apia"))
|
|
10283
|
+
.when(value == "NET", snowpark_fn.lit("Asia/Yerevan"))
|
|
10284
|
+
.when(value == "NST", snowpark_fn.lit("Pacific/Auckland"))
|
|
10285
|
+
.when(value == "PLT", snowpark_fn.lit("Asia/Karachi"))
|
|
10286
|
+
.when(value == "PNT", snowpark_fn.lit("America/Phoenix"))
|
|
10287
|
+
.when(value == "PRT", snowpark_fn.lit("America/Puerto_Rico"))
|
|
10288
|
+
.when(value == "PST", snowpark_fn.lit("America/Los_Angeles"))
|
|
10289
|
+
.when(value == "SST", snowpark_fn.lit("Pacific/Guadalcanal"))
|
|
10290
|
+
.when(value == "VST", snowpark_fn.lit("Asia/Ho_Chi_Minh"))
|
|
10291
|
+
.otherwise(value) # Return original timezone if no mapping found
|
|
10292
|
+
)
|