snowpark-connect 0.24.0__py3-none-any.whl → 0.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +116 -4
- snowflake/snowpark_connect/config.py +23 -0
- snowflake/snowpark_connect/constants.py +0 -29
- snowflake/snowpark_connect/dataframe_container.py +22 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
- snowflake/snowpark_connect/expression/literal.py +13 -2
- snowflake/snowpark_connect/expression/map_cast.py +5 -8
- snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
- snowflake/snowpark_connect/expression/map_udf.py +88 -29
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
- snowflake/snowpark_connect/expression/map_unresolved_function.py +840 -367
- snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
- snowflake/snowpark_connect/hidden_column.py +39 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
- snowflake/snowpark_connect/relation/map_extension.py +52 -11
- snowflake/snowpark_connect/relation/map_join.py +258 -62
- snowflake/snowpark_connect/relation/map_map_partitions.py +9 -4
- snowflake/snowpark_connect/relation/map_relation.py +12 -1
- snowflake/snowpark_connect/relation/map_row_ops.py +8 -1
- snowflake/snowpark_connect/relation/map_sql.py +88 -11
- snowflake/snowpark_connect/relation/map_udtf.py +100 -46
- snowflake/snowpark_connect/relation/read/map_read.py +3 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
- snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
- snowflake/snowpark_connect/relation/utils.py +44 -0
- snowflake/snowpark_connect/relation/write/map_write.py +175 -75
- snowflake/snowpark_connect/resources_initializer.py +47 -6
- snowflake/snowpark_connect/server.py +26 -4
- snowflake/snowpark_connect/type_mapping.py +29 -25
- snowflake/snowpark_connect/typed_column.py +14 -0
- snowflake/snowpark_connect/utils/artifacts.py +23 -0
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/context.py +6 -1
- snowflake/snowpark_connect/utils/external_udxf_cache.py +36 -0
- snowflake/snowpark_connect/utils/scala_udf_utils.py +596 -0
- snowflake/snowpark_connect/utils/session.py +4 -0
- snowflake/snowpark_connect/utils/telemetry.py +6 -17
- snowflake/snowpark_connect/utils/udf_helper.py +2 -0
- snowflake/snowpark_connect/utils/udf_utils.py +22 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +1 -0
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/METADATA +1 -1
- snowpark_connect-0.26.0.dist-info/RECORD +481 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
- snowpark_connect-0.24.0.dist-info/RECORD +0 -898
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/top_level.txt +0 -0
|
@@ -73,11 +73,11 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
73
73
|
)
|
|
74
74
|
from snowflake.snowpark_connect.config import (
|
|
75
75
|
get_boolean_session_config_param,
|
|
76
|
+
get_timestamp_type,
|
|
76
77
|
global_config,
|
|
77
78
|
)
|
|
78
79
|
from snowflake.snowpark_connect.constants import (
|
|
79
80
|
DUPLICATE_KEY_FOUND_ERROR_TEMPLATE,
|
|
80
|
-
SPARK_TZ_ABBREVIATIONS_OVERRIDES,
|
|
81
81
|
STRUCTURED_TYPES_ENABLED,
|
|
82
82
|
)
|
|
83
83
|
from snowflake.snowpark_connect.expression.function_defaults import (
|
|
@@ -100,7 +100,6 @@ from snowflake.snowpark_connect.type_mapping import (
|
|
|
100
100
|
map_snowpark_to_pyspark_types,
|
|
101
101
|
map_spark_timestamp_format_expression,
|
|
102
102
|
map_type_string_to_snowpark_type,
|
|
103
|
-
parse_ddl_string,
|
|
104
103
|
)
|
|
105
104
|
from snowflake.snowpark_connect.typed_column import (
|
|
106
105
|
TypedColumn,
|
|
@@ -209,19 +208,6 @@ def _validate_numeric_args(
|
|
|
209
208
|
return modified_args
|
|
210
209
|
|
|
211
210
|
|
|
212
|
-
def get_timestamp_type():
|
|
213
|
-
match global_config["spark.sql.timestampType"]:
|
|
214
|
-
case "TIMESTAMP_LTZ":
|
|
215
|
-
timestamp_type = TimestampType(TimestampTimeZone.LTZ)
|
|
216
|
-
case "TIMESTAMP_NTZ":
|
|
217
|
-
timestamp_type = TimestampType(TimestampTimeZone.NTZ)
|
|
218
|
-
case "TIMESTAMP_TZ":
|
|
219
|
-
timestamp_type = TimestampType(TimestampTimeZone.TZ)
|
|
220
|
-
case _:
|
|
221
|
-
timestamp_type = TimestampType(TimestampTimeZone.DEFAULT)
|
|
222
|
-
return timestamp_type
|
|
223
|
-
|
|
224
|
-
|
|
225
211
|
def unwrap_literal(exp: expressions_proto.Expression):
|
|
226
212
|
"""Workaround for Snowpark functions generating invalid SQL when used with fn.lit (SNOW-1871954)"""
|
|
227
213
|
return get_literal_field_and_name(exp.literal)[0]
|
|
@@ -490,11 +476,8 @@ def map_unresolved_function(
|
|
|
490
476
|
return TypedColumn(result, lambda: expected_types)
|
|
491
477
|
|
|
492
478
|
match function_name:
|
|
493
|
-
case func_name if (
|
|
494
|
-
|
|
495
|
-
):
|
|
496
|
-
# TODO: In Spark, UDFs can override built-in functions in SQL,
|
|
497
|
-
# but not in DataFrame ops.
|
|
479
|
+
case func_name if func_name.lower() in session._udfs:
|
|
480
|
+
# In Spark, UDFs can override built-in functions
|
|
498
481
|
udf = session._udfs[func_name.lower()]
|
|
499
482
|
result_exp = snowpark_fn.call_udf(
|
|
500
483
|
udf.name,
|
|
@@ -522,146 +505,401 @@ def map_unresolved_function(
|
|
|
522
505
|
result_exp = snowpark_fn.when(
|
|
523
506
|
snowpark_args[1] == 0, snowpark_fn.lit(None)
|
|
524
507
|
).otherwise(snowpark_args[0] % snowpark_args[1])
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
530
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
531
|
-
or isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
532
|
-
and isinstance(snowpark_typed_args[1].typ, _IntegralType)
|
|
533
|
-
or isinstance(snowpark_typed_args[0].typ, _IntegralType)
|
|
534
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
535
|
-
):
|
|
536
|
-
result_exp, (
|
|
537
|
-
return_type_precision,
|
|
538
|
-
return_type_scale,
|
|
539
|
-
) = _mul_div_precision_helper(snowpark_typed_args, snowpark_args, 0)
|
|
540
|
-
result_type = DecimalType(return_type_precision, return_type_scale)
|
|
541
|
-
else:
|
|
542
|
-
result_exp = snowpark_args[0] * snowpark_args[1]
|
|
543
|
-
result_type = _find_common_type(
|
|
544
|
-
[arg.typ for arg in snowpark_typed_args]
|
|
545
|
-
)
|
|
546
|
-
case "+":
|
|
547
|
-
if isinstance(snowpark_typed_args[0].typ, DateType) and not isinstance(
|
|
548
|
-
snowpark_typed_args[1].typ,
|
|
549
|
-
(_IntegralType, StringType),
|
|
550
|
-
):
|
|
551
|
-
raise AnalysisException(
|
|
552
|
-
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{snowpark_arg_names[0]} + {snowpark_arg_names[1]}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
|
|
553
|
-
)
|
|
554
|
-
elif (
|
|
555
|
-
isinstance(snowpark_typed_args[0].typ, BinaryType)
|
|
556
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
557
|
-
or isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
558
|
-
and isinstance(snowpark_typed_args[1].typ, BinaryType)
|
|
559
|
-
) or (
|
|
560
|
-
isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
561
|
-
and isinstance(snowpark_typed_args[1].typ, TimestampType)
|
|
562
|
-
or isinstance(snowpark_typed_args[0].typ, TimestampType)
|
|
563
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
564
|
-
):
|
|
565
|
-
raise AnalysisException(
|
|
566
|
-
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "({snowpark_arg_names[0]} + {snowpark_arg_names[1]})" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
567
|
-
)
|
|
568
|
-
elif isinstance(snowpark_typed_args[1].typ, DateType) and not isinstance(
|
|
569
|
-
snowpark_typed_args[0].typ,
|
|
570
|
-
(_IntegralType, StringType),
|
|
571
|
-
):
|
|
572
|
-
raise AnalysisException(
|
|
573
|
-
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{snowpark_arg_names[0]} + {snowpark_arg_names[1]}" due to data type mismatch: Parameter 1 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".',
|
|
574
|
-
)
|
|
575
|
-
elif (
|
|
576
|
-
isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
577
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
578
|
-
or isinstance(snowpark_typed_args[0].typ, DecimalType)
|
|
579
|
-
and isinstance(snowpark_typed_args[1].typ, _IntegralType)
|
|
580
|
-
or isinstance(snowpark_typed_args[0].typ, _IntegralType)
|
|
581
|
-
and isinstance(snowpark_typed_args[1].typ, DecimalType)
|
|
582
|
-
):
|
|
583
|
-
result_exp, (
|
|
584
|
-
return_type_precision,
|
|
585
|
-
return_type_scale,
|
|
586
|
-
) = _add_sub_precision_helper(snowpark_typed_args, snowpark_args, 0)
|
|
587
|
-
result_type = DecimalType(return_type_precision, return_type_scale)
|
|
588
|
-
else:
|
|
589
|
-
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
590
|
-
if any(isinstance(arg.typ, DateType) for arg in snowpark_typed_args):
|
|
591
|
-
# TODO SNOW-2034420: resolve return type
|
|
592
|
-
result_exp = _type_with_typer(result_exp)
|
|
593
|
-
else:
|
|
508
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
509
|
+
case (NullType(), NullType()):
|
|
510
|
+
result_type = DoubleType()
|
|
511
|
+
case _:
|
|
594
512
|
result_type = _find_common_type(
|
|
595
513
|
[arg.typ for arg in snowpark_typed_args]
|
|
596
514
|
)
|
|
597
|
-
case "
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
515
|
+
case "*":
|
|
516
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
517
|
+
case (DecimalType() as t, NullType()) | (
|
|
518
|
+
NullType(),
|
|
519
|
+
DecimalType() as t,
|
|
520
|
+
):
|
|
521
|
+
p1, s1 = _get_type_precision(t)
|
|
522
|
+
result_type = _get_decimal_multiplication_result_type(
|
|
523
|
+
p1, s1, p1, s1
|
|
524
|
+
)
|
|
525
|
+
result_exp = snowpark_fn.lit(None)
|
|
526
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
527
|
+
t, (DecimalType, _IntegralType)
|
|
528
|
+
):
|
|
529
|
+
p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
|
|
530
|
+
p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
|
|
531
|
+
result_type = _get_decimal_multiplication_result_type(
|
|
532
|
+
p1, s1, p2, s2
|
|
533
|
+
)
|
|
534
|
+
result_exp = _get_decimal_multiplication_result_exp(
|
|
535
|
+
result_type, t, snowpark_args
|
|
536
|
+
)
|
|
537
|
+
case (NullType(), NullType()):
|
|
538
|
+
result_type = DoubleType()
|
|
539
|
+
result_exp = snowpark_fn.lit(None)
|
|
540
|
+
case (StringType(), StringType()):
|
|
541
|
+
if spark_sql_ansi_enabled:
|
|
542
|
+
raise AnalysisException(
|
|
543
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
|
|
544
|
+
)
|
|
545
|
+
else:
|
|
546
|
+
result_type = DoubleType()
|
|
547
|
+
result_exp = snowpark_args[0].try_cast(
|
|
548
|
+
result_type
|
|
549
|
+
) * snowpark_args[1].try_cast(result_type)
|
|
550
|
+
case (StringType(), _IntegralType()):
|
|
551
|
+
if spark_sql_ansi_enabled:
|
|
552
|
+
result_type = LongType()
|
|
553
|
+
result_exp = (
|
|
554
|
+
snowpark_args[0].cast(result_type) * snowpark_args[1]
|
|
555
|
+
)
|
|
556
|
+
else:
|
|
557
|
+
result_type = DoubleType()
|
|
558
|
+
result_exp = (
|
|
559
|
+
snowpark_args[0].try_cast(result_type) * snowpark_args[1]
|
|
560
|
+
)
|
|
561
|
+
case (StringType(), _FractionalType()):
|
|
562
|
+
result_type = DoubleType()
|
|
563
|
+
if spark_sql_ansi_enabled:
|
|
564
|
+
result_exp = (
|
|
565
|
+
snowpark_args[0].cast(result_type) * snowpark_args[1]
|
|
566
|
+
)
|
|
567
|
+
else:
|
|
568
|
+
result_exp = (
|
|
569
|
+
snowpark_args[0].try_cast(result_type) * snowpark_args[1]
|
|
570
|
+
)
|
|
571
|
+
case (_IntegralType(), StringType()):
|
|
572
|
+
if spark_sql_ansi_enabled:
|
|
573
|
+
result_type = LongType()
|
|
574
|
+
result_exp = snowpark_args[0] * snowpark_args[1].cast(
|
|
575
|
+
result_type
|
|
576
|
+
)
|
|
577
|
+
else:
|
|
578
|
+
result_type = DoubleType()
|
|
579
|
+
result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
|
|
580
|
+
result_type
|
|
581
|
+
)
|
|
582
|
+
case (_FractionalType(), StringType()):
|
|
583
|
+
result_type = DoubleType()
|
|
584
|
+
if spark_sql_ansi_enabled:
|
|
585
|
+
result_exp = snowpark_args[0] * snowpark_args[1].cast(
|
|
586
|
+
result_type
|
|
587
|
+
)
|
|
588
|
+
else:
|
|
589
|
+
result_exp = snowpark_args[0] * snowpark_args[1].try_cast(
|
|
590
|
+
result_type
|
|
591
|
+
)
|
|
592
|
+
case (_NumericType() as t, NullType()) | (
|
|
593
|
+
NullType(),
|
|
594
|
+
_NumericType() as t,
|
|
595
|
+
):
|
|
596
|
+
result_type = t
|
|
597
|
+
result_exp = snowpark_fn.lit(None)
|
|
598
|
+
case (_NumericType(), _NumericType()):
|
|
645
599
|
result_type = _find_common_type(
|
|
646
600
|
[arg.typ for arg in snowpark_typed_args]
|
|
647
601
|
)
|
|
648
|
-
|
|
602
|
+
result_exp = snowpark_args[0].cast(result_type) * snowpark_args[
|
|
603
|
+
1
|
|
604
|
+
].cast(result_type)
|
|
605
|
+
case _:
|
|
606
|
+
raise AnalysisException(
|
|
607
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
608
|
+
)
|
|
609
|
+
case "+":
|
|
610
|
+
spark_function_name = _get_spark_function_name(
|
|
611
|
+
snowpark_typed_args[0],
|
|
612
|
+
snowpark_typed_args[1],
|
|
613
|
+
snowpark_arg_names,
|
|
614
|
+
exp,
|
|
615
|
+
spark_function_name,
|
|
616
|
+
"+",
|
|
617
|
+
)
|
|
649
618
|
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
619
|
+
case (NullType(), _) | (_, NullType()):
|
|
620
|
+
result_type = _get_add_sub_result_type(
|
|
621
|
+
snowpark_typed_args[0].typ,
|
|
622
|
+
snowpark_typed_args[1].typ,
|
|
623
|
+
spark_function_name,
|
|
624
|
+
)
|
|
625
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
626
|
+
result_exp = result_exp.cast(result_type)
|
|
627
|
+
case (DateType(), t) | (t, DateType()):
|
|
628
|
+
date_param_index = (
|
|
629
|
+
0 if isinstance(snowpark_typed_args[0].typ, DateType) else 1
|
|
630
|
+
)
|
|
631
|
+
t_param_index = 1 - date_param_index
|
|
632
|
+
if isinstance(t, (IntegerType, ShortType, ByteType)):
|
|
633
|
+
result_type = DateType()
|
|
634
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
635
|
+
elif (
|
|
636
|
+
"INTERVAL"
|
|
637
|
+
== snowpark_typed_args[t_param_index].col._expr1.pretty_name
|
|
638
|
+
):
|
|
639
|
+
result_type = TimestampType()
|
|
640
|
+
result_exp = (
|
|
641
|
+
snowpark_args[date_param_index]
|
|
642
|
+
+ snowpark_args[t_param_index]
|
|
643
|
+
)
|
|
644
|
+
else:
|
|
645
|
+
raise AnalysisException(
|
|
646
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[t_param_index]}" has the type "{t}".',
|
|
647
|
+
)
|
|
648
|
+
case (StringType(), StringType()):
|
|
649
|
+
if spark_sql_ansi_enabled:
|
|
650
|
+
raise AnalysisException(
|
|
651
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
|
|
652
|
+
)
|
|
653
|
+
else:
|
|
654
|
+
result_type = DoubleType()
|
|
655
|
+
result_exp = snowpark_fn.try_cast(
|
|
656
|
+
snowpark_args[0], result_type
|
|
657
|
+
) + snowpark_fn.try_cast(snowpark_args[1], result_type)
|
|
658
|
+
case (StringType(), _NumericType() as t):
|
|
659
|
+
if spark_sql_ansi_enabled:
|
|
660
|
+
result_type = (
|
|
661
|
+
DoubleType()
|
|
662
|
+
if isinstance(t, _FractionalType)
|
|
663
|
+
else LongType()
|
|
664
|
+
)
|
|
665
|
+
result_exp = (
|
|
666
|
+
snowpark_args[0].cast(result_type) + snowpark_args[1]
|
|
667
|
+
)
|
|
668
|
+
else:
|
|
669
|
+
result_type = DoubleType()
|
|
670
|
+
result_exp = (
|
|
671
|
+
snowpark_fn.try_cast(snowpark_args[0], result_type)
|
|
672
|
+
+ snowpark_args[1]
|
|
673
|
+
)
|
|
674
|
+
case (_NumericType() as t, StringType()):
|
|
675
|
+
if spark_sql_ansi_enabled:
|
|
676
|
+
result_type = (
|
|
677
|
+
DoubleType()
|
|
678
|
+
if isinstance(t, _FractionalType)
|
|
679
|
+
else LongType()
|
|
680
|
+
)
|
|
681
|
+
result_exp = snowpark_args[0] + snowpark_args[1].cast(
|
|
682
|
+
result_type
|
|
683
|
+
)
|
|
684
|
+
else:
|
|
685
|
+
result_type = DoubleType()
|
|
686
|
+
result_exp = snowpark_args[0] + snowpark_fn.try_cast(
|
|
687
|
+
snowpark_args[1], result_type
|
|
688
|
+
)
|
|
689
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
690
|
+
t, (BinaryType, TimestampType)
|
|
691
|
+
):
|
|
692
|
+
raise AnalysisException(
|
|
693
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
694
|
+
)
|
|
695
|
+
case _:
|
|
696
|
+
result_type = _get_add_sub_result_type(
|
|
697
|
+
snowpark_typed_args[0].typ,
|
|
698
|
+
snowpark_typed_args[1].typ,
|
|
699
|
+
spark_function_name,
|
|
700
|
+
)
|
|
701
|
+
result_exp = snowpark_args[0] + snowpark_args[1]
|
|
702
|
+
if isinstance(result_type, DecimalType):
|
|
703
|
+
result_exp = _cast_helper(result_exp, result_type)
|
|
704
|
+
case "-":
|
|
705
|
+
spark_function_name = _get_spark_function_name(
|
|
706
|
+
snowpark_typed_args[0],
|
|
707
|
+
snowpark_typed_args[1],
|
|
708
|
+
snowpark_arg_names,
|
|
709
|
+
exp,
|
|
710
|
+
spark_function_name,
|
|
711
|
+
"-",
|
|
712
|
+
)
|
|
713
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
714
|
+
case (NullType(), _) | (_, NullType()):
|
|
715
|
+
result_type = _get_add_sub_result_type(
|
|
716
|
+
snowpark_typed_args[0].typ,
|
|
717
|
+
snowpark_typed_args[1].typ,
|
|
718
|
+
spark_function_name,
|
|
719
|
+
)
|
|
720
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
721
|
+
result_exp = result_exp.cast(result_type)
|
|
722
|
+
case (DateType(), DateType()):
|
|
723
|
+
# TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
|
|
724
|
+
result_type = LongType()
|
|
725
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
726
|
+
case (DateType(), StringType()):
|
|
727
|
+
if "INTERVAL" == snowpark_typed_args[1].col._expr1.pretty_name:
|
|
728
|
+
result_type = TimestampType()
|
|
729
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
730
|
+
else:
|
|
731
|
+
# TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
|
|
732
|
+
result_type = LongType()
|
|
733
|
+
input_type = (
|
|
734
|
+
DateType() if spark_sql_ansi_enabled else DoubleType()
|
|
735
|
+
)
|
|
736
|
+
result_exp = snowpark_args[0] - snowpark_args[1].cast(
|
|
737
|
+
input_type
|
|
738
|
+
)
|
|
739
|
+
case (StringType(), DateType()):
|
|
740
|
+
# TODO SNOW-2034420: resolve return type (it should be INTERVAL DAY)
|
|
741
|
+
result_type = LongType()
|
|
742
|
+
input_type = DateType()
|
|
743
|
+
result_exp = snowpark_args[0].cast(input_type) - snowpark_args[1]
|
|
744
|
+
case (DateType(), (IntegerType() | ShortType() | ByteType())):
|
|
745
|
+
result_type = DateType()
|
|
746
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
747
|
+
case (DateType(), _):
|
|
748
|
+
raise AnalysisException(
|
|
749
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 2 requires the ("INT" or "SMALLINT" or "TINYINT") type, however "{snowpark_arg_names[1]}" has the type "{snowpark_typed_args[1].typ}".',
|
|
750
|
+
)
|
|
751
|
+
case (_, DateType()):
|
|
752
|
+
raise AnalysisException(
|
|
753
|
+
f'[DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: Parameter 1 requires the "DATE" type, however "{snowpark_arg_names[0]}" has the type "{snowpark_typed_args[0].typ}".',
|
|
754
|
+
)
|
|
755
|
+
case (StringType(), StringType()):
|
|
756
|
+
if spark_sql_ansi_enabled:
|
|
757
|
+
raise AnalysisException(
|
|
758
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".'
|
|
759
|
+
)
|
|
760
|
+
else:
|
|
761
|
+
result_type = DoubleType()
|
|
762
|
+
result_exp = snowpark_fn.try_cast(
|
|
763
|
+
snowpark_args[0], result_type
|
|
764
|
+
) - snowpark_fn.try_cast(snowpark_args[1], result_type)
|
|
765
|
+
case (StringType(), _NumericType() as t):
|
|
766
|
+
if spark_sql_ansi_enabled:
|
|
767
|
+
result_type = (
|
|
768
|
+
DoubleType()
|
|
769
|
+
if isinstance(t, _FractionalType)
|
|
770
|
+
else LongType()
|
|
771
|
+
)
|
|
772
|
+
result_exp = (
|
|
773
|
+
snowpark_args[0].cast(result_type) - snowpark_args[1]
|
|
774
|
+
)
|
|
775
|
+
else:
|
|
776
|
+
result_type = DoubleType()
|
|
777
|
+
result_exp = (
|
|
778
|
+
snowpark_fn.try_cast(snowpark_args[0], result_type)
|
|
779
|
+
- snowpark_args[1]
|
|
780
|
+
)
|
|
781
|
+
case (_NumericType() as t, StringType()):
|
|
782
|
+
if spark_sql_ansi_enabled:
|
|
783
|
+
result_type = (
|
|
784
|
+
DoubleType()
|
|
785
|
+
if isinstance(t, _FractionalType)
|
|
786
|
+
else LongType()
|
|
787
|
+
)
|
|
788
|
+
result_exp = snowpark_args[0] - snowpark_args[1].cast(
|
|
789
|
+
result_type
|
|
790
|
+
)
|
|
791
|
+
else:
|
|
792
|
+
result_type = DoubleType()
|
|
793
|
+
result_exp = snowpark_args[0] - snowpark_fn.try_cast(
|
|
794
|
+
snowpark_args[1], result_type
|
|
795
|
+
)
|
|
650
796
|
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
651
|
-
t,
|
|
652
|
-
) or isinstance(t, _IntegralType) or isinstance(
|
|
653
|
-
snowpark_typed_args[1].typ, NullType
|
|
797
|
+
t, (BinaryType, TimestampType)
|
|
654
798
|
):
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
) = _mul_div_precision_helper(snowpark_typed_args, snowpark_args, 1)
|
|
659
|
-
result_type = DecimalType(return_type_precision, return_type_scale)
|
|
799
|
+
raise AnalysisException(
|
|
800
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
801
|
+
)
|
|
660
802
|
case _:
|
|
803
|
+
result_type = _get_add_sub_result_type(
|
|
804
|
+
snowpark_typed_args[0].typ,
|
|
805
|
+
snowpark_typed_args[1].typ,
|
|
806
|
+
spark_function_name,
|
|
807
|
+
)
|
|
808
|
+
result_exp = snowpark_args[0] - snowpark_args[1]
|
|
809
|
+
if isinstance(result_type, DecimalType):
|
|
810
|
+
result_exp = _cast_helper(result_exp, result_type)
|
|
811
|
+
case "/":
|
|
812
|
+
match (snowpark_typed_args[0].typ, snowpark_typed_args[1].typ):
|
|
813
|
+
case (DecimalType() as t1, NullType()):
|
|
814
|
+
p1, s1 = _get_type_precision(t1)
|
|
815
|
+
result_type, _ = _get_decimal_division_result_type(p1, s1, p1, s1)
|
|
816
|
+
result_exp = snowpark_fn.lit(None).cast(result_type)
|
|
817
|
+
case (DecimalType(), t) | (t, DecimalType()) if isinstance(
|
|
818
|
+
t, (DecimalType, _IntegralType)
|
|
819
|
+
):
|
|
820
|
+
p1, s1 = _get_type_precision(snowpark_typed_args[0].typ)
|
|
821
|
+
p2, s2 = _get_type_precision(snowpark_typed_args[1].typ)
|
|
822
|
+
result_type, overflow_detected = _get_decimal_division_result_type(
|
|
823
|
+
p1, s1, p2, s2
|
|
824
|
+
)
|
|
825
|
+
result_exp = _get_decimal_division_result_exp(
|
|
826
|
+
result_type,
|
|
827
|
+
t,
|
|
828
|
+
overflow_detected,
|
|
829
|
+
snowpark_args,
|
|
830
|
+
spark_function_name,
|
|
831
|
+
)
|
|
832
|
+
case (NullType(), NullType()):
|
|
661
833
|
result_type = DoubleType()
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
834
|
+
result_exp = snowpark_fn.lit(None)
|
|
835
|
+
case (StringType(), StringType()):
|
|
836
|
+
if spark_sql_ansi_enabled:
|
|
837
|
+
raise AnalysisException(
|
|
838
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("DOUBLE" or "DECIMAL"), not "STRING".'
|
|
839
|
+
)
|
|
840
|
+
else:
|
|
841
|
+
result_type = DoubleType()
|
|
842
|
+
result_exp = _divnull(
|
|
843
|
+
snowpark_args[0].try_cast(result_type),
|
|
844
|
+
snowpark_args[1].try_cast(result_type),
|
|
845
|
+
)
|
|
846
|
+
case (StringType(), _IntegralType()):
|
|
847
|
+
result_type = DoubleType()
|
|
848
|
+
if spark_sql_ansi_enabled:
|
|
849
|
+
result_exp = _divnull(
|
|
850
|
+
snowpark_args[0].cast(LongType()),
|
|
851
|
+
snowpark_args[1].cast(result_type),
|
|
852
|
+
)
|
|
853
|
+
else:
|
|
854
|
+
result_exp = _divnull(
|
|
855
|
+
snowpark_args[0].try_cast(result_type), snowpark_args[1]
|
|
856
|
+
)
|
|
857
|
+
result_exp = result_exp.cast(result_type)
|
|
858
|
+
case (StringType(), _FractionalType()):
|
|
859
|
+
result_type = DoubleType()
|
|
860
|
+
if spark_sql_ansi_enabled:
|
|
861
|
+
result_exp = _divnull(
|
|
862
|
+
snowpark_args[0].cast(result_type), snowpark_args[1]
|
|
863
|
+
)
|
|
864
|
+
else:
|
|
865
|
+
result_exp = _divnull(
|
|
866
|
+
snowpark_args[0].try_cast(result_type), snowpark_args[1]
|
|
867
|
+
)
|
|
868
|
+
case (_IntegralType(), StringType()):
|
|
869
|
+
result_type = DoubleType()
|
|
870
|
+
if spark_sql_ansi_enabled:
|
|
871
|
+
result_exp = _divnull(
|
|
872
|
+
snowpark_args[0].cast(result_type),
|
|
873
|
+
snowpark_args[1].cast(LongType()),
|
|
874
|
+
)
|
|
875
|
+
else:
|
|
876
|
+
result_exp = _divnull(
|
|
877
|
+
snowpark_args[0], snowpark_args[1].try_cast(result_type)
|
|
878
|
+
)
|
|
879
|
+
result_exp = result_exp.cast(result_type)
|
|
880
|
+
case (_FractionalType(), StringType()):
|
|
881
|
+
result_type = DoubleType()
|
|
882
|
+
if spark_sql_ansi_enabled:
|
|
883
|
+
result_exp = _divnull(
|
|
884
|
+
snowpark_args[0], snowpark_args[1].cast(result_type)
|
|
885
|
+
)
|
|
886
|
+
else:
|
|
887
|
+
result_exp = _divnull(
|
|
888
|
+
snowpark_args[0], snowpark_args[1].try_cast(result_type)
|
|
889
|
+
)
|
|
890
|
+
case (_NumericType(), NullType()) | (NullType(), _NumericType()):
|
|
891
|
+
result_type = DoubleType()
|
|
892
|
+
result_exp = snowpark_fn.lit(None)
|
|
893
|
+
case (_NumericType(), _NumericType()):
|
|
894
|
+
result_type = DoubleType()
|
|
895
|
+
result_exp = _divnull(
|
|
896
|
+
snowpark_args[0].cast(result_type),
|
|
897
|
+
snowpark_args[1].cast(result_type),
|
|
898
|
+
)
|
|
899
|
+
case _:
|
|
900
|
+
raise AnalysisException(
|
|
901
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "{spark_function_name}" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("{snowpark_typed_args[0].typ}" and "{snowpark_typed_args[1].typ}").'
|
|
902
|
+
)
|
|
665
903
|
case "~":
|
|
666
904
|
result_exp = TypedColumn(
|
|
667
905
|
snowpark_fn.bitnot(snowpark_args[0]),
|
|
@@ -1765,9 +2003,11 @@ def map_unresolved_function(
|
|
|
1765
2003
|
snowpark_args[1], snowpark_args[2], snowpark_args[0]
|
|
1766
2004
|
)
|
|
1767
2005
|
else:
|
|
2006
|
+
spark_function_name = f"convert_timezone(current_timezone(), {', '.join(snowpark_arg_names)})"
|
|
1768
2007
|
result_exp = snowpark_fn.convert_timezone(*snowpark_args)
|
|
1769
2008
|
|
|
1770
|
-
|
|
2009
|
+
result_type = TimestampType(TimestampTimeZone.NTZ)
|
|
2010
|
+
result_exp = result_exp.cast(result_type)
|
|
1771
2011
|
|
|
1772
2012
|
case "corr":
|
|
1773
2013
|
col1_type = snowpark_typed_args[0].typ
|
|
@@ -2055,11 +2295,8 @@ def map_unresolved_function(
|
|
|
2055
2295
|
result_exp = TypedColumn(snowpark_fn.current_date(), lambda: [DateType()])
|
|
2056
2296
|
spark_function_name = "current_date()"
|
|
2057
2297
|
case "current_timestamp" | "now":
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
get_timestamp_type(),
|
|
2061
|
-
)
|
|
2062
|
-
result_exp = TypedColumn(result_exp, lambda: [get_timestamp_type()])
|
|
2298
|
+
result_type = TimestampType(TimestampTimeZone.LTZ)
|
|
2299
|
+
result_exp = snowpark_fn.to_timestamp_ltz(snowpark_fn.current_timestamp())
|
|
2063
2300
|
case "current_timezone":
|
|
2064
2301
|
result_exp = snowpark_fn.lit(global_config.spark_sql_session_timeZone)
|
|
2065
2302
|
result_type = StringType()
|
|
@@ -2650,7 +2887,7 @@ def map_unresolved_function(
|
|
|
2650
2887
|
return results
|
|
2651
2888
|
|
|
2652
2889
|
spark_function_name = f"from_csv({snowpark_arg_names[0]})"
|
|
2653
|
-
|
|
2890
|
+
result_type = map_type_string_to_snowpark_type(snowpark_arg_names[1])
|
|
2654
2891
|
|
|
2655
2892
|
if len(snowpark_arg_names) > 2 and snowpark_arg_names[2].startswith(
|
|
2656
2893
|
"named_struct"
|
|
@@ -2675,8 +2912,7 @@ def map_unresolved_function(
|
|
|
2675
2912
|
|
|
2676
2913
|
result_exp = snowpark_fn.when(
|
|
2677
2914
|
snowpark_args[0].is_null(), snowpark_fn.lit(None)
|
|
2678
|
-
).otherwise(snowpark_fn.cast(csv_result,
|
|
2679
|
-
result_type = ddl_schema
|
|
2915
|
+
).otherwise(snowpark_fn.cast(csv_result, result_type))
|
|
2680
2916
|
case "from_json":
|
|
2681
2917
|
# TODO: support options.
|
|
2682
2918
|
if len(snowpark_args) > 2:
|
|
@@ -2903,21 +3139,15 @@ def map_unresolved_function(
|
|
|
2903
3139
|
)
|
|
2904
3140
|
result_type = StringType()
|
|
2905
3141
|
case "from_utc_timestamp":
|
|
2906
|
-
|
|
2907
|
-
@cached_udf(
|
|
2908
|
-
input_types=[StringType()],
|
|
2909
|
-
return_type=StringType(),
|
|
2910
|
-
)
|
|
2911
|
-
def map_from_spark_tz(tz):
|
|
2912
|
-
return SPARK_TZ_ABBREVIATIONS_OVERRIDES.get(tz, tz)
|
|
2913
|
-
|
|
2914
|
-
target_tz = map_from_spark_tz(snowpark_args[1])
|
|
3142
|
+
target_tz = _map_from_spark_tz(snowpark_args[1])
|
|
2915
3143
|
result_exp = _try_to_cast(
|
|
2916
3144
|
"try_to_timestamp",
|
|
2917
|
-
snowpark_fn.from_utc_timestamp(snowpark_args[0], target_tz)
|
|
3145
|
+
snowpark_fn.from_utc_timestamp(snowpark_args[0], target_tz).cast(
|
|
3146
|
+
TimestampType()
|
|
3147
|
+
),
|
|
2918
3148
|
snowpark_args[0],
|
|
2919
3149
|
)
|
|
2920
|
-
result_type = TimestampType(
|
|
3150
|
+
result_type = TimestampType()
|
|
2921
3151
|
case "get":
|
|
2922
3152
|
if exp.unresolved_function.arguments[1].HasField("literal"):
|
|
2923
3153
|
index = unwrap_literal(exp.unresolved_function.arguments[1])
|
|
@@ -3823,8 +4053,10 @@ def map_unresolved_function(
|
|
|
3823
4053
|
).otherwise(snowpark_fn.ln(snowpark_args[0]))
|
|
3824
4054
|
result_type = DoubleType()
|
|
3825
4055
|
case "localtimestamp":
|
|
3826
|
-
|
|
3827
|
-
|
|
4056
|
+
result_type = TimestampType(TimestampTimeZone.NTZ)
|
|
4057
|
+
result_exp = snowpark_fn.to_timestamp_ntz(
|
|
4058
|
+
snowpark_fn.builtin("localtimestamp")()
|
|
4059
|
+
)
|
|
3828
4060
|
case "locate":
|
|
3829
4061
|
substr = unwrap_literal(exp.unresolved_function.arguments[0])
|
|
3830
4062
|
value = snowpark_args[1]
|
|
@@ -3999,7 +4231,7 @@ def map_unresolved_function(
|
|
|
3999
4231
|
match function_name:
|
|
4000
4232
|
case "make_timestamp":
|
|
4001
4233
|
make_function_name = "timestamp_tz_from_parts"
|
|
4002
|
-
result_type =
|
|
4234
|
+
result_type = get_timestamp_type()
|
|
4003
4235
|
case "make_timestamp_ltz":
|
|
4004
4236
|
make_function_name = "timestamp_ltz_from_parts"
|
|
4005
4237
|
result_type = TimestampType(TimestampTimeZone.LTZ)
|
|
@@ -6244,6 +6476,18 @@ def map_unresolved_function(
|
|
|
6244
6476
|
if pattern_value is None:
|
|
6245
6477
|
return snowpark_fn.lit(None)
|
|
6246
6478
|
|
|
6479
|
+
# Optimization: treat escaped regex that resolves to a pure literal delimiter
|
|
6480
|
+
# - Single char: "\\."
|
|
6481
|
+
# - Multi char: e.g., "\\.505\\."
|
|
6482
|
+
if re.fullmatch(r"(?:\\.)+", pattern_value):
|
|
6483
|
+
literal_delim = re.sub(r"\\(.)", r"\1", pattern_value)
|
|
6484
|
+
return snowpark_fn.when(
|
|
6485
|
+
limit <= 0,
|
|
6486
|
+
snowpark_fn.split(
|
|
6487
|
+
str_, snowpark_fn.lit(literal_delim)
|
|
6488
|
+
).cast(result_type),
|
|
6489
|
+
).otherwise(native_split)
|
|
6490
|
+
|
|
6247
6491
|
is_regexp = re.match(
|
|
6248
6492
|
".*[\\[\\.\\]\\*\\?\\+\\^\\$\\{\\}\\|\\(\\)\\\\].*",
|
|
6249
6493
|
pattern_value,
|
|
@@ -6552,15 +6796,22 @@ def map_unresolved_function(
|
|
|
6552
6796
|
case "timestamp_add":
|
|
6553
6797
|
# Added to DataFrame functions in 4.0.0 - but can be called from SQL in 3.5.3.
|
|
6554
6798
|
spark_function_name = f"timestampadd({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, {snowpark_arg_names[2]})"
|
|
6799
|
+
|
|
6800
|
+
typ = snowpark_typed_args[2].typ
|
|
6801
|
+
result_type = (
|
|
6802
|
+
typ
|
|
6803
|
+
if isinstance(typ, TimestampType)
|
|
6804
|
+
else TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6805
|
+
)
|
|
6806
|
+
|
|
6555
6807
|
result_exp = snowpark_fn.cast(
|
|
6556
6808
|
snowpark_fn.dateadd(
|
|
6557
6809
|
unwrap_literal(exp.unresolved_function.arguments[0]),
|
|
6558
6810
|
snowpark_args[1],
|
|
6559
6811
|
snowpark_args[2],
|
|
6560
6812
|
),
|
|
6561
|
-
|
|
6813
|
+
result_type,
|
|
6562
6814
|
)
|
|
6563
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.NTZ)
|
|
6564
6815
|
case "timestamp_diff":
|
|
6565
6816
|
# Added to DataFrame functions in 4.0.0 - but can be called from SQL in 3.5.3.
|
|
6566
6817
|
spark_function_name = f"timestampdiff({snowpark_arg_names[0]}, {snowpark_arg_names[1]}, {snowpark_arg_names[2]})"
|
|
@@ -6573,9 +6824,9 @@ def map_unresolved_function(
|
|
|
6573
6824
|
case "timestamp_micros":
|
|
6574
6825
|
result_exp = snowpark_fn.cast(
|
|
6575
6826
|
snowpark_fn.to_timestamp(snowpark_args[0], 6),
|
|
6576
|
-
TimestampType(snowpark.types.TimestampTimeZone.
|
|
6827
|
+
TimestampType(snowpark.types.TimestampTimeZone.LTZ),
|
|
6577
6828
|
)
|
|
6578
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.
|
|
6829
|
+
result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6579
6830
|
case "timestamp_millis":
|
|
6580
6831
|
if not isinstance(snowpark_typed_args[0].typ, _IntegralType):
|
|
6581
6832
|
raise AnalysisException(
|
|
@@ -6583,9 +6834,9 @@ def map_unresolved_function(
|
|
|
6583
6834
|
)
|
|
6584
6835
|
result_exp = snowpark_fn.cast(
|
|
6585
6836
|
snowpark_fn.to_timestamp(snowpark_args[0] * 1_000, 6),
|
|
6586
|
-
TimestampType(snowpark.types.TimestampTimeZone.
|
|
6837
|
+
TimestampType(snowpark.types.TimestampTimeZone.LTZ),
|
|
6587
6838
|
)
|
|
6588
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.
|
|
6839
|
+
result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6589
6840
|
case "timestamp_seconds":
|
|
6590
6841
|
# Spark allows seconds to be fractional. Snowflake does not allow that
|
|
6591
6842
|
# even though the documentation explicitly says that it does.
|
|
@@ -6598,9 +6849,9 @@ def map_unresolved_function(
|
|
|
6598
6849
|
snowpark_fn.to_timestamp(
|
|
6599
6850
|
snowpark_fn.cast(snowpark_args[0] * 1_000_000, LongType()), 6
|
|
6600
6851
|
),
|
|
6601
|
-
TimestampType(snowpark.types.TimestampTimeZone.
|
|
6852
|
+
TimestampType(snowpark.types.TimestampTimeZone.LTZ),
|
|
6602
6853
|
)
|
|
6603
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.
|
|
6854
|
+
result_type = TimestampType(snowpark.types.TimestampTimeZone.LTZ)
|
|
6604
6855
|
case "to_char" | "to_varchar":
|
|
6605
6856
|
# The structure of the Spark format string must match: [MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]
|
|
6606
6857
|
# Note the grammar above was retrieved from an error message from PySpark, but it is not entirely accurate.
|
|
@@ -6746,20 +6997,29 @@ def map_unresolved_function(
|
|
|
6746
6997
|
for typed_arg in snowpark_typed_args
|
|
6747
6998
|
]
|
|
6748
6999
|
|
|
7000
|
+
timezone_conf = global_config.get("spark.sql.session.timeZone")
|
|
7001
|
+
|
|
7002
|
+
# Objects do not preserve keys order in Snowflake, so we need to pass them in the array
|
|
7003
|
+
# Not all the types are preserved in Snowflake Object, timestamps and dates are converted to strings
|
|
7004
|
+
# to properly format them types have to be passed as argument
|
|
6749
7005
|
@cached_udf(
|
|
6750
|
-
input_types=[VariantType(), VariantType()],
|
|
7006
|
+
input_types=[VariantType(), ArrayType(), ArrayType(), VariantType()],
|
|
6751
7007
|
return_type=StringType(),
|
|
7008
|
+
packages=["jpype1"],
|
|
6752
7009
|
)
|
|
6753
|
-
def _to_csv(
|
|
6754
|
-
|
|
7010
|
+
def _to_csv(
|
|
7011
|
+
col: dict, keys: list, types: list, options: Optional[dict]
|
|
7012
|
+
) -> str:
|
|
7013
|
+
import datetime
|
|
7014
|
+
|
|
7015
|
+
import jpype
|
|
7016
|
+
|
|
6755
7017
|
if options is not None:
|
|
6756
7018
|
if not isinstance(options, dict):
|
|
6757
7019
|
raise TypeError(
|
|
6758
7020
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
6759
7021
|
)
|
|
6760
7022
|
|
|
6761
|
-
sep = options.get("sep") or sep
|
|
6762
|
-
|
|
6763
7023
|
python_to_snowflake_type = {
|
|
6764
7024
|
"str": "STRING",
|
|
6765
7025
|
"bool": "BOOLEAN",
|
|
@@ -6779,22 +7039,166 @@ def map_unresolved_function(
|
|
|
6779
7039
|
f'[INVALID_OPTIONS.NON_STRING_TYPE] Invalid options: A type of keys and values in `map()` must be string, but got "MAP<{k_type}, {v_type}>".'
|
|
6780
7040
|
)
|
|
6781
7041
|
|
|
7042
|
+
options = options or {}
|
|
7043
|
+
lowercased_options = {
|
|
7044
|
+
key.lower(): value for key, value in options.items()
|
|
7045
|
+
}
|
|
7046
|
+
|
|
7047
|
+
sep = lowercased_options.get("sep") or (
|
|
7048
|
+
lowercased_options.get("delimiter") or ","
|
|
7049
|
+
)
|
|
7050
|
+
quote = lowercased_options.get("quote") or '"'
|
|
7051
|
+
quote_all = lowercased_options.get("quoteall", "false")
|
|
7052
|
+
escape = lowercased_options.get("escape") or "\\"
|
|
7053
|
+
|
|
7054
|
+
ignore_leading_white_space = lowercased_options.get(
|
|
7055
|
+
"ignoreleadingwhitespace", "true"
|
|
7056
|
+
)
|
|
7057
|
+
ignore_trailing_white_space = lowercased_options.get(
|
|
7058
|
+
"ignoretrailingwhitespace", "true"
|
|
7059
|
+
)
|
|
7060
|
+
null_value = lowercased_options.get("nullvalue") or ""
|
|
7061
|
+
empty_value = lowercased_options.get("emptyvalue") or '""'
|
|
7062
|
+
char_to_escape_quote_escaping = (
|
|
7063
|
+
lowercased_options.get("chartoescapequoteescaping") or escape
|
|
7064
|
+
)
|
|
7065
|
+
|
|
7066
|
+
date_format = lowercased_options.get("dateformat") or "yyyy-MM-dd"
|
|
7067
|
+
timestamp_format = (
|
|
7068
|
+
lowercased_options.get("timestampformat")
|
|
7069
|
+
or "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"
|
|
7070
|
+
)
|
|
7071
|
+
timestamp_NTZ_format = (
|
|
7072
|
+
lowercased_options.get("timestampntzformat")
|
|
7073
|
+
or "yyyy-MM-dd'T'HH:mm:ss[.SSS]"
|
|
7074
|
+
)
|
|
7075
|
+
|
|
7076
|
+
def to_boolean(value: str) -> bool:
|
|
7077
|
+
return value.lower() == "true"
|
|
7078
|
+
|
|
7079
|
+
quote_all = to_boolean(quote_all)
|
|
7080
|
+
ignore_leading_white_space = to_boolean(ignore_leading_white_space)
|
|
7081
|
+
ignore_trailing_white_space = to_boolean(ignore_trailing_white_space)
|
|
7082
|
+
|
|
7083
|
+
def escape_str(value: str) -> str:
|
|
7084
|
+
escape_quote = escape + quote if escape != quote else escape
|
|
7085
|
+
return (
|
|
7086
|
+
value.replace(escape, char_to_escape_quote_escaping + escape)
|
|
7087
|
+
.replace(quote, escape_quote)
|
|
7088
|
+
.replace("\r", "\\r")
|
|
7089
|
+
)
|
|
7090
|
+
|
|
7091
|
+
def escape_and_quote_string(value) -> str:
|
|
7092
|
+
if quote_all:
|
|
7093
|
+
return f"{quote}{escape_str(str(value))}{quote}"
|
|
7094
|
+
return str(value)
|
|
7095
|
+
|
|
7096
|
+
time_types = ("date", "timestamp", "timestamp_ntz")
|
|
7097
|
+
maps_timestamps = any(
|
|
7098
|
+
python_type in time_types for python_type in types
|
|
7099
|
+
)
|
|
7100
|
+
|
|
7101
|
+
# Multiple execution of the UDF are done within the same process, that's why we need to check if the JVM was not already started
|
|
7102
|
+
if maps_timestamps and not jpype.isJVMStarted():
|
|
7103
|
+
jpype.startJVM()
|
|
7104
|
+
|
|
7105
|
+
if maps_timestamps:
|
|
7106
|
+
ZonedDateTime = jpype.JClass("java.time.ZonedDateTime")
|
|
7107
|
+
ZoneId = jpype.JClass("java.time.ZoneId")
|
|
7108
|
+
DateTimeFormatter = jpype.JClass(
|
|
7109
|
+
"java.time.format.DateTimeFormatter"
|
|
7110
|
+
)
|
|
7111
|
+
Instant = jpype.JClass("java.time.Instant")
|
|
7112
|
+
LocalDate = jpype.JClass("java.time.LocalDate")
|
|
7113
|
+
LocalDateTime = jpype.JClass("java.time.LocalDateTime")
|
|
7114
|
+
timestamp_formatter = DateTimeFormatter.ofPattern(timestamp_format)
|
|
7115
|
+
timestamp_ntz_formatter = DateTimeFormatter.ofPattern(
|
|
7116
|
+
timestamp_NTZ_format
|
|
7117
|
+
)
|
|
7118
|
+
date_formatter = DateTimeFormatter.ofPattern(date_format)
|
|
7119
|
+
|
|
6782
7120
|
result = []
|
|
6783
|
-
for
|
|
7121
|
+
for key, python_type in zip(keys, types):
|
|
7122
|
+
value = col.get(key)
|
|
6784
7123
|
if value is None:
|
|
6785
|
-
result.append(
|
|
7124
|
+
result.append(escape_and_quote_string(null_value))
|
|
7125
|
+
elif python_type in ("date", "timestamp", "timestamp_ntz"):
|
|
7126
|
+
match python_type:
|
|
7127
|
+
case "date":
|
|
7128
|
+
value = datetime.datetime.strptime(value, "%Y-%m-%d")
|
|
7129
|
+
local_date = LocalDate.of(
|
|
7130
|
+
value.year, value.month, value.day
|
|
7131
|
+
)
|
|
7132
|
+
formatted_date = date_formatter.format(local_date)
|
|
7133
|
+
result.append(escape_and_quote_string(formatted_date))
|
|
7134
|
+
case "timestamp":
|
|
7135
|
+
try:
|
|
7136
|
+
value = datetime.datetime.strptime(
|
|
7137
|
+
value, "%Y-%m-%d %H:%M:%S.%f %z"
|
|
7138
|
+
)
|
|
7139
|
+
except ValueError:
|
|
7140
|
+
# Fallback to the format without microseconds
|
|
7141
|
+
value = datetime.datetime.strptime(
|
|
7142
|
+
value, "%Y-%m-%d %H:%M:%S %z"
|
|
7143
|
+
)
|
|
7144
|
+
instant = Instant.ofEpochMilli(
|
|
7145
|
+
int(value.timestamp() * 1000)
|
|
7146
|
+
)
|
|
7147
|
+
zdt = ZonedDateTime.ofInstant(
|
|
7148
|
+
instant, ZoneId.of(timezone_conf)
|
|
7149
|
+
)
|
|
7150
|
+
str_value = timestamp_formatter.format(zdt)
|
|
7151
|
+
result.append(escape_and_quote_string(str_value))
|
|
7152
|
+
case "timestamp_ntz":
|
|
7153
|
+
try:
|
|
7154
|
+
value = datetime.datetime.strptime(
|
|
7155
|
+
value, "%Y-%m-%d %H:%M:%S.%f"
|
|
7156
|
+
)
|
|
7157
|
+
except ValueError:
|
|
7158
|
+
# Fallback to the format without microseconds
|
|
7159
|
+
value = datetime.datetime.strptime(
|
|
7160
|
+
value, "%Y-%m-%d %H:%M:%S"
|
|
7161
|
+
)
|
|
7162
|
+
timestamp_ntz = LocalDateTime.of(
|
|
7163
|
+
value.year,
|
|
7164
|
+
value.month,
|
|
7165
|
+
value.day,
|
|
7166
|
+
value.hour,
|
|
7167
|
+
value.minute,
|
|
7168
|
+
value.second,
|
|
7169
|
+
value.microsecond * 1000,
|
|
7170
|
+
)
|
|
7171
|
+
str_value = timestamp_ntz_formatter.format(
|
|
7172
|
+
timestamp_ntz
|
|
7173
|
+
)
|
|
7174
|
+
result.append(escape_and_quote_string(str_value))
|
|
7175
|
+
case _:
|
|
7176
|
+
raise ValueError(
|
|
7177
|
+
f"Unable to determine type for value: {python_type}"
|
|
7178
|
+
)
|
|
6786
7179
|
elif isinstance(value, str):
|
|
6787
|
-
|
|
6788
|
-
|
|
7180
|
+
strip_value = (
|
|
7181
|
+
value.lstrip() if ignore_leading_white_space else value
|
|
7182
|
+
)
|
|
7183
|
+
strip_value = (
|
|
7184
|
+
strip_value.rstrip()
|
|
7185
|
+
if ignore_trailing_white_space
|
|
7186
|
+
else strip_value
|
|
7187
|
+
)
|
|
7188
|
+
if strip_value == "":
|
|
7189
|
+
result.append(escape_and_quote_string(empty_value))
|
|
7190
|
+
elif (
|
|
7191
|
+
any(c in value for c in (sep, "\r", "\n", quote))
|
|
7192
|
+
or quote_all
|
|
6789
7193
|
):
|
|
6790
|
-
|
|
6791
|
-
result.append(
|
|
7194
|
+
strip_value = escape_str(strip_value)
|
|
7195
|
+
result.append(quote + strip_value + quote)
|
|
6792
7196
|
else:
|
|
6793
|
-
result.append(
|
|
7197
|
+
result.append(escape_and_quote_string(strip_value))
|
|
6794
7198
|
elif isinstance(value, bool):
|
|
6795
|
-
result.append(str(value).lower())
|
|
7199
|
+
result.append(escape_and_quote_string(str(value).lower()))
|
|
6796
7200
|
else:
|
|
6797
|
-
result.append(str(value))
|
|
7201
|
+
result.append(escape_and_quote_string(str(value)))
|
|
6798
7202
|
|
|
6799
7203
|
return sep.join(result)
|
|
6800
7204
|
|
|
@@ -6807,11 +7211,36 @@ def map_unresolved_function(
|
|
|
6807
7211
|
"[INVALID_OPTIONS.NON_MAP_FUNCTION] Invalid options: Must use the `map()` function for options."
|
|
6808
7212
|
)
|
|
6809
7213
|
|
|
7214
|
+
def get_snowpark_type_name(snowpark_type: DataType) -> str:
|
|
7215
|
+
return (
|
|
7216
|
+
(
|
|
7217
|
+
"timestamp"
|
|
7218
|
+
if not snowpark_type.tz == snowpark.types.TimestampTimeZone.NTZ
|
|
7219
|
+
else "timestamp_ntz"
|
|
7220
|
+
)
|
|
7221
|
+
if snowpark_type == TimestampType()
|
|
7222
|
+
else snowpark_type.type_name().lower()
|
|
7223
|
+
)
|
|
7224
|
+
|
|
7225
|
+
field_names = snowpark_fn.array_construct(
|
|
7226
|
+
*[
|
|
7227
|
+
snowpark_fn.lit(value)
|
|
7228
|
+
for value in snowpark_typed_args[0].typ.fieldNames
|
|
7229
|
+
]
|
|
7230
|
+
)
|
|
7231
|
+
field_types = snowpark_fn.array_construct(
|
|
7232
|
+
*[
|
|
7233
|
+
snowpark_fn.lit(get_snowpark_type_name(value.datatype))
|
|
7234
|
+
for value in snowpark_typed_args[0].typ.fields
|
|
7235
|
+
]
|
|
7236
|
+
)
|
|
6810
7237
|
match snowpark_args:
|
|
6811
7238
|
case [csv_data]:
|
|
6812
|
-
result_exp = _to_csv(
|
|
7239
|
+
result_exp = _to_csv(
|
|
7240
|
+
csv_data, field_names, field_types, snowpark_fn.lit(None)
|
|
7241
|
+
)
|
|
6813
7242
|
case [csv_data, options]:
|
|
6814
|
-
result_exp = _to_csv(csv_data, options)
|
|
7243
|
+
result_exp = _to_csv(csv_data, field_names, field_types, options)
|
|
6815
7244
|
case _:
|
|
6816
7245
|
raise ValueError("Unrecognized from_csv parameters")
|
|
6817
7246
|
result_type = StringType()
|
|
@@ -6912,10 +7341,8 @@ def map_unresolved_function(
|
|
|
6912
7341
|
)
|
|
6913
7342
|
case _:
|
|
6914
7343
|
raise ValueError(f"Invalid number of arguments to {function_name}")
|
|
6915
|
-
result_exp = snowpark_fn.cast(
|
|
6916
|
-
|
|
6917
|
-
)
|
|
6918
|
-
result_type = TimestampType()
|
|
7344
|
+
result_exp = snowpark_fn.cast(result_exp, get_timestamp_type())
|
|
7345
|
+
result_type = get_timestamp_type()
|
|
6919
7346
|
|
|
6920
7347
|
case "to_timestamp_ltz":
|
|
6921
7348
|
match (snowpark_typed_args, exp.unresolved_function.arguments):
|
|
@@ -6941,7 +7368,12 @@ def map_unresolved_function(
|
|
|
6941
7368
|
match (snowpark_typed_args, exp.unresolved_function.arguments):
|
|
6942
7369
|
case ([e], _):
|
|
6943
7370
|
result_exp = snowpark_fn.builtin("to_timestamp_ntz")(e.col)
|
|
6944
|
-
case ([e, _], _) if
|
|
7371
|
+
case ([e, _], _) if isinstance(e.typ, DateType):
|
|
7372
|
+
result_exp = snowpark_fn.convert_timezone(
|
|
7373
|
+
snowpark_fn.lit("UTC"),
|
|
7374
|
+
snowpark_fn.builtin("to_timestamp_ntz")(e.col),
|
|
7375
|
+
)
|
|
7376
|
+
case ([e, _], _) if isinstance(e.typ, TimestampType):
|
|
6945
7377
|
result_exp = snowpark_fn.builtin("to_timestamp_ntz")(e.col)
|
|
6946
7378
|
case ([e, _], [_, fmt]):
|
|
6947
7379
|
result_exp = snowpark_fn.builtin("to_timestamp_ntz")(
|
|
@@ -7002,25 +7434,17 @@ def map_unresolved_function(
|
|
|
7002
7434
|
result_type = LongType()
|
|
7003
7435
|
|
|
7004
7436
|
case "to_utc_timestamp":
|
|
7005
|
-
|
|
7006
|
-
@cached_udf(
|
|
7007
|
-
input_types=[StringType()],
|
|
7008
|
-
return_type=StringType(),
|
|
7009
|
-
)
|
|
7010
|
-
def map_timezone(short_tz: str) -> str:
|
|
7011
|
-
return SPARK_TZ_ABBREVIATIONS_OVERRIDES.get(short_tz, short_tz)
|
|
7012
|
-
|
|
7437
|
+
result_type = TimestampType()
|
|
7013
7438
|
result_exp = _try_to_cast(
|
|
7014
7439
|
"try_to_timestamp",
|
|
7015
7440
|
snowpark_fn.cast(
|
|
7016
7441
|
snowpark_fn.to_utc_timestamp(
|
|
7017
|
-
snowpark_args[0],
|
|
7442
|
+
snowpark_args[0], _map_from_spark_tz(snowpark_args[1])
|
|
7018
7443
|
),
|
|
7019
|
-
|
|
7444
|
+
result_type,
|
|
7020
7445
|
),
|
|
7021
7446
|
snowpark_args[0],
|
|
7022
7447
|
)
|
|
7023
|
-
result_type = TimestampType(snowpark.types.TimestampTimeZone.NTZ)
|
|
7024
7448
|
case "translate":
|
|
7025
7449
|
src_alphabet = unwrap_literal(exp.unresolved_function.arguments[1])
|
|
7026
7450
|
target_alphabet = unwrap_literal(exp.unresolved_function.arguments[2])
|
|
@@ -7413,8 +7837,8 @@ def map_unresolved_function(
|
|
|
7413
7837
|
)
|
|
7414
7838
|
case _:
|
|
7415
7839
|
raise ValueError(f"Invalid number of arguments to {function_name}")
|
|
7416
|
-
|
|
7417
|
-
|
|
7840
|
+
result_type = get_timestamp_type()
|
|
7841
|
+
result_exp = snowpark_fn.cast(result_exp, result_type)
|
|
7418
7842
|
case "typeof":
|
|
7419
7843
|
col_snowpark_typ = snowpark_typed_args[0].typ
|
|
7420
7844
|
spark_typ = map_snowpark_to_pyspark_types(col_snowpark_typ)
|
|
@@ -7684,14 +8108,10 @@ def map_unresolved_function(
|
|
|
7684
8108
|
result_exp = result_exp.when(snowpark_args[i], snowpark_args[i + 1])
|
|
7685
8109
|
result_type_indexes.append(i + 1)
|
|
7686
8110
|
name_components.append("END")
|
|
7687
|
-
|
|
7688
|
-
|
|
7689
|
-
lambda: [
|
|
7690
|
-
_find_common_type(
|
|
7691
|
-
[snowpark_typed_args[i].typ for i in result_type_indexes]
|
|
7692
|
-
)
|
|
7693
|
-
],
|
|
8111
|
+
result_type = _find_common_type(
|
|
8112
|
+
[snowpark_typed_args[i].typ for i in result_type_indexes]
|
|
7694
8113
|
)
|
|
8114
|
+
result_exp = snowpark_fn.cast(result_exp, result_type)
|
|
7695
8115
|
spark_function_name = " ".join(name_components)
|
|
7696
8116
|
case "width_bucket":
|
|
7697
8117
|
width_bucket_fn = snowpark_fn.function("width_bucket")
|
|
@@ -7874,15 +8294,6 @@ def map_unresolved_function(
|
|
|
7874
8294
|
),
|
|
7875
8295
|
)
|
|
7876
8296
|
result_type = BinaryType()
|
|
7877
|
-
case udf_name if udf_name.lower() in session._udfs:
|
|
7878
|
-
# TODO: In Spark, UDFs can override built-in functions in SQL,
|
|
7879
|
-
# but not in DataFrame ops.
|
|
7880
|
-
udf = session._udfs[udf_name.lower()]
|
|
7881
|
-
result_exp = snowpark_fn.call_udf(
|
|
7882
|
-
udf.name,
|
|
7883
|
-
*(snowpark_fn.cast(arg, VariantType()) for arg in snowpark_args),
|
|
7884
|
-
)
|
|
7885
|
-
result_type = udf.return_type
|
|
7886
8297
|
case udtf_name if udtf_name.lower() in session._udtfs:
|
|
7887
8298
|
udtf, spark_col_names = session._udtfs[udtf_name.lower()]
|
|
7888
8299
|
result_exp = snowpark_fn.call_table_function(
|
|
@@ -7965,6 +8376,18 @@ def map_unresolved_function(
|
|
|
7965
8376
|
return spark_col_names, typed_col
|
|
7966
8377
|
|
|
7967
8378
|
|
|
8379
|
+
def _cast_helper(column: Column, to: DataType) -> Column:
|
|
8380
|
+
if global_config.spark_sql_ansi_enabled:
|
|
8381
|
+
column_mediator = (
|
|
8382
|
+
snowpark_fn.cast(column, StringType())
|
|
8383
|
+
if isinstance(to, DecimalType)
|
|
8384
|
+
else column
|
|
8385
|
+
)
|
|
8386
|
+
return snowpark_fn.cast(column_mediator, to)
|
|
8387
|
+
else:
|
|
8388
|
+
return _try_cast_helper(column, to)
|
|
8389
|
+
|
|
8390
|
+
|
|
7968
8391
|
def _try_cast_helper(column: Column, to: DataType) -> Column:
|
|
7969
8392
|
"""
|
|
7970
8393
|
Attempts to cast a given column to a specified data type using the same behaviour as Spark.
|
|
@@ -9119,7 +9542,7 @@ def _get_type_precision(typ: DataType) -> tuple[int, int]:
|
|
|
9119
9542
|
case LongType():
|
|
9120
9543
|
return 20, 0 # -9223372036854775808 to 9223372036854775807
|
|
9121
9544
|
case NullType():
|
|
9122
|
-
return
|
|
9545
|
+
return 0, 0 # NULL
|
|
9123
9546
|
case _:
|
|
9124
9547
|
return 38, 0 # Default to maximum precision for other types
|
|
9125
9548
|
|
|
@@ -9162,58 +9585,71 @@ def _decimal_add_sub_result_type_helper(p1, s1, p2, s2):
|
|
|
9162
9585
|
return result_precision, min_scale, return_type_precision, return_type_scale
|
|
9163
9586
|
|
|
9164
9587
|
|
|
9165
|
-
def
|
|
9166
|
-
|
|
9167
|
-
|
|
9168
|
-
|
|
9169
|
-
|
|
9170
|
-
if
|
|
9171
|
-
|
|
9588
|
+
def _get_decimal_multiplication_result_exp(
|
|
9589
|
+
result_type: DecimalType | DataType,
|
|
9590
|
+
other_type: DataType,
|
|
9591
|
+
snowpark_args: list[Column],
|
|
9592
|
+
) -> Column:
|
|
9593
|
+
if global_config.spark_sql_ansi_enabled:
|
|
9594
|
+
result_exp = snowpark_args[0] * snowpark_args[1]
|
|
9172
9595
|
else:
|
|
9173
|
-
|
|
9174
|
-
|
|
9175
|
-
|
|
9176
|
-
while result_scale > min_scale:
|
|
9177
|
-
result_scale -= 1
|
|
9178
|
-
return_type_scale = result_scale
|
|
9179
|
-
result_precision = (p1 - s1) + (p2 - s2) + result_scale + 1
|
|
9180
|
-
if result_precision <= 38:
|
|
9181
|
-
return (
|
|
9182
|
-
result_precision,
|
|
9183
|
-
result_scale,
|
|
9184
|
-
return_type_precision,
|
|
9185
|
-
return_type_scale,
|
|
9596
|
+
if isinstance(other_type, _IntegralType):
|
|
9597
|
+
result_exp = snowpark_args[0].cast(result_type) * snowpark_args[1].cast(
|
|
9598
|
+
result_type
|
|
9186
9599
|
)
|
|
9600
|
+
else:
|
|
9601
|
+
result_exp = snowpark_args[0].cast(DoubleType()) * snowpark_args[1].cast(
|
|
9602
|
+
DoubleType()
|
|
9603
|
+
)
|
|
9604
|
+
result_exp = _try_cast_helper(result_exp, result_type)
|
|
9605
|
+
return result_exp
|
|
9187
9606
|
|
|
9188
|
-
result_precision = (p1 - s1) + (p2 - s2) + min_scale + 1
|
|
9189
|
-
return result_precision, min_scale, return_type_precision, return_type_scale
|
|
9190
|
-
|
|
9191
|
-
|
|
9192
|
-
def _decimal_divide_result_type_helper(p1, s1, p2, s2):
|
|
9193
|
-
result_scale = max(6, s1 + p2 + 1)
|
|
9194
|
-
result_precision = p1 - s1 + s2 + result_scale
|
|
9195
|
-
return_type_precision, return_type_scale = result_precision, result_scale
|
|
9196
9607
|
|
|
9197
|
-
|
|
9198
|
-
|
|
9608
|
+
def _get_decimal_multiplication_result_type(p1, s1, p2, s2) -> DecimalType:
|
|
9609
|
+
result_precision = p1 + p2 + 1
|
|
9610
|
+
result_scale = s1 + s2
|
|
9611
|
+
if result_precision > 38:
|
|
9612
|
+
if result_scale > 6:
|
|
9613
|
+
overflow = result_precision - 38
|
|
9614
|
+
result_scale = max(6, result_scale - overflow)
|
|
9615
|
+
result_precision = 38
|
|
9616
|
+
return DecimalType(result_precision, result_scale)
|
|
9617
|
+
|
|
9618
|
+
|
|
9619
|
+
def _get_decimal_division_result_exp(
|
|
9620
|
+
result_type: DecimalType | DataType,
|
|
9621
|
+
other_type: DataType,
|
|
9622
|
+
overflow_detected: bool,
|
|
9623
|
+
snowpark_args: list[Column],
|
|
9624
|
+
spark_function_name: str,
|
|
9625
|
+
) -> Column:
|
|
9626
|
+
if (
|
|
9627
|
+
isinstance(other_type, DecimalType)
|
|
9628
|
+
and overflow_detected
|
|
9629
|
+
and global_config.spark_sql_ansi_enabled
|
|
9630
|
+
):
|
|
9631
|
+
raise ArithmeticException(
|
|
9632
|
+
f'[NUMERIC_VALUE_OUT_OF_RANGE] {spark_function_name} cannot be represented as Decimal({result_type.precision}, {result_type.scale}). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
|
|
9633
|
+
)
|
|
9199
9634
|
else:
|
|
9200
|
-
|
|
9635
|
+
dividend = snowpark_args[0].cast(DoubleType())
|
|
9636
|
+
divisor = snowpark_args[1]
|
|
9637
|
+
result_exp = _divnull(dividend, divisor)
|
|
9638
|
+
result_exp = _cast_helper(result_exp, result_type)
|
|
9639
|
+
return result_exp
|
|
9201
9640
|
|
|
9202
|
-
min_scale = 6
|
|
9203
|
-
while result_scale > min_scale:
|
|
9204
|
-
result_scale -= 1
|
|
9205
|
-
return_type_scale = result_scale
|
|
9206
|
-
result_precision = p1 - s1 + s2 + result_scale
|
|
9207
|
-
if result_precision <= 38:
|
|
9208
|
-
return (
|
|
9209
|
-
result_precision,
|
|
9210
|
-
result_scale,
|
|
9211
|
-
return_type_precision,
|
|
9212
|
-
return_type_scale,
|
|
9213
|
-
)
|
|
9214
9641
|
|
|
9215
|
-
|
|
9216
|
-
|
|
9642
|
+
def _get_decimal_division_result_type(p1, s1, p2, s2) -> tuple[DecimalType, bool]:
|
|
9643
|
+
overflow_detected = False
|
|
9644
|
+
result_scale = max(6, s1 + p2 + 1)
|
|
9645
|
+
result_precision = p1 - s1 + s2 + result_scale
|
|
9646
|
+
if result_precision > 38:
|
|
9647
|
+
if result_precision > 40:
|
|
9648
|
+
overflow_detected = True
|
|
9649
|
+
overflow = result_precision - 38
|
|
9650
|
+
result_scale = max(6, result_scale - overflow)
|
|
9651
|
+
result_precision = 38
|
|
9652
|
+
return DecimalType(result_precision, result_scale), overflow_detected
|
|
9217
9653
|
|
|
9218
9654
|
|
|
9219
9655
|
def _try_arithmetic_helper(
|
|
@@ -9408,102 +9844,108 @@ def _try_arithmetic_helper(
|
|
|
9408
9844
|
return snowpark_fn.lit(None)
|
|
9409
9845
|
|
|
9410
9846
|
|
|
9411
|
-
def
|
|
9412
|
-
|
|
9413
|
-
|
|
9414
|
-
|
|
9415
|
-
|
|
9416
|
-
|
|
9417
|
-
|
|
9418
|
-
|
|
9419
|
-
|
|
9420
|
-
|
|
9421
|
-
|
|
9422
|
-
|
|
9423
|
-
|
|
9424
|
-
|
|
9425
|
-
|
|
9426
|
-
|
|
9427
|
-
|
|
9428
|
-
|
|
9429
|
-
|
|
9430
|
-
|
|
9431
|
-
|
|
9432
|
-
|
|
9433
|
-
|
|
9434
|
-
|
|
9435
|
-
|
|
9436
|
-
|
|
9437
|
-
|
|
9438
|
-
|
|
9439
|
-
|
|
9440
|
-
|
|
9847
|
+
def _get_add_sub_result_type(
|
|
9848
|
+
type1: DataType,
|
|
9849
|
+
type2: DataType,
|
|
9850
|
+
spark_function_name: str,
|
|
9851
|
+
) -> DataType:
|
|
9852
|
+
result_type = _find_common_type([type1, type2])
|
|
9853
|
+
match result_type:
|
|
9854
|
+
case DecimalType():
|
|
9855
|
+
p1, s1 = _get_type_precision(type1)
|
|
9856
|
+
p2, s2 = _get_type_precision(type2)
|
|
9857
|
+
result_scale = max(s1, s2)
|
|
9858
|
+
result_precision = max(p1 - s1, p2 - s2) + result_scale + 1
|
|
9859
|
+
if result_precision > 38:
|
|
9860
|
+
if result_scale > 6:
|
|
9861
|
+
overflow = result_precision - 38
|
|
9862
|
+
result_scale = max(6, result_scale - overflow)
|
|
9863
|
+
result_precision = 38
|
|
9864
|
+
result_type = DecimalType(result_precision, result_scale)
|
|
9865
|
+
case NullType():
|
|
9866
|
+
result_type = DoubleType()
|
|
9867
|
+
case StringType():
|
|
9868
|
+
match (type1, type2):
|
|
9869
|
+
case (_FractionalType(), _) | (_, _FractionalType()):
|
|
9870
|
+
result_type = DoubleType()
|
|
9871
|
+
case (_IntegralType(), _) | (_, _IntegralType()):
|
|
9872
|
+
result_type = (
|
|
9873
|
+
LongType()
|
|
9874
|
+
if global_config.spark_sql_ansi_enabled
|
|
9875
|
+
else DoubleType()
|
|
9876
|
+
)
|
|
9877
|
+
case _:
|
|
9878
|
+
if global_config.spark_sql_ansi_enabled:
|
|
9879
|
+
raise AnalysisException(
|
|
9880
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "STRING".',
|
|
9881
|
+
)
|
|
9882
|
+
else:
|
|
9883
|
+
result_type = DoubleType()
|
|
9884
|
+
case BooleanType():
|
|
9885
|
+
raise AnalysisException(
|
|
9886
|
+
f'[DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE] Cannot resolve "{spark_function_name}" due to data type mismatch: the binary operator requires the input type ("NUMERIC" or "INTERVAL DAY TO SECOND" or "INTERVAL YEAR TO MONTH" or "INTERVAL"), not "BOOLEAN".',
|
|
9441
9887
|
)
|
|
9442
|
-
|
|
9888
|
+
return result_type
|
|
9443
9889
|
|
|
9444
|
-
left_operand, right_operand = snowpark_args[0], snowpark_args[1]
|
|
9445
9890
|
|
|
9446
|
-
|
|
9447
|
-
|
|
9448
|
-
|
|
9449
|
-
|
|
9450
|
-
|
|
9451
|
-
|
|
9452
|
-
|
|
9453
|
-
|
|
9454
|
-
|
|
9891
|
+
def _get_spark_function_name(
|
|
9892
|
+
col1: TypedColumn,
|
|
9893
|
+
col2: TypedColumn,
|
|
9894
|
+
snowpark_arg_names: list[str],
|
|
9895
|
+
exp: expressions_proto.Expression,
|
|
9896
|
+
default_spark_function_name: str,
|
|
9897
|
+
function_name: str,
|
|
9898
|
+
):
|
|
9899
|
+
operation_op = function_name
|
|
9900
|
+
match function_name:
|
|
9901
|
+
case "+":
|
|
9902
|
+
operation_func = "date_add"
|
|
9903
|
+
case "-":
|
|
9904
|
+
operation_func = "date_sub"
|
|
9905
|
+
case _:
|
|
9906
|
+
return default_spark_function_name
|
|
9907
|
+
match (col1.typ, col2.typ):
|
|
9908
|
+
case (DateType(), DateType()):
|
|
9909
|
+
date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
|
|
9910
|
+
date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
|
|
9911
|
+
return f"({date_param_name1} {operation_op} {date_param_name2})"
|
|
9912
|
+
case (StringType(), DateType()):
|
|
9913
|
+
date_param_name2 = _get_literal_param_name(exp, 1, snowpark_arg_names[1])
|
|
9914
|
+
if "INTERVAL" == col1.col._expr1.pretty_name:
|
|
9915
|
+
return f"{date_param_name2} {operation_op} {snowpark_arg_names[0]}"
|
|
9916
|
+
elif global_config.spark_sql_ansi_enabled and function_name == "+":
|
|
9917
|
+
return f"{operation_func}(cast({date_param_name2} as date), cast({snowpark_arg_names[0]} as double))"
|
|
9918
|
+
else:
|
|
9919
|
+
return f"({snowpark_arg_names[0]} {operation_op} {date_param_name2})"
|
|
9920
|
+
case (DateType(), StringType()):
|
|
9921
|
+
date_param_name1 = _get_literal_param_name(exp, 0, snowpark_arg_names[0])
|
|
9922
|
+
if (
|
|
9923
|
+
global_config.spark_sql_ansi_enabled
|
|
9924
|
+
or "INTERVAL" == col2.col._expr1.pretty_name
|
|
9925
|
+
):
|
|
9926
|
+
return f"{date_param_name1} {operation_op} {snowpark_arg_names[1]}"
|
|
9927
|
+
else:
|
|
9928
|
+
return f"{operation_func}(cast({date_param_name1} as date), cast({snowpark_arg_names[1]} as double))"
|
|
9929
|
+
case (DateType() as dt, _) | (_, DateType() as dt):
|
|
9930
|
+
date_param_index = 0 if dt == col1.typ else 1
|
|
9931
|
+
date_param_name = _get_literal_param_name(
|
|
9932
|
+
exp, date_param_index, snowpark_arg_names[date_param_index]
|
|
9933
|
+
)
|
|
9934
|
+
return f"{operation_func}({date_param_name}, {snowpark_arg_names[1 - date_param_index]})"
|
|
9935
|
+
case _:
|
|
9936
|
+
return default_spark_function_name
|
|
9455
9937
|
|
|
9456
9938
|
|
|
9457
|
-
def
|
|
9458
|
-
|
|
9459
|
-
|
|
9460
|
-
|
|
9461
|
-
|
|
9462
|
-
|
|
9463
|
-
|
|
9464
|
-
|
|
9465
|
-
|
|
9466
|
-
|
|
9467
|
-
return_type_scale,
|
|
9468
|
-
) = _decimal_multiply_result_type_helper(p1, s1, p2, s2)
|
|
9469
|
-
else: # division
|
|
9470
|
-
(
|
|
9471
|
-
new_precision,
|
|
9472
|
-
new_scale,
|
|
9473
|
-
return_type_precision,
|
|
9474
|
-
return_type_scale,
|
|
9475
|
-
) = _decimal_divide_result_type_helper(p1, s1, p2, s2)
|
|
9476
|
-
|
|
9477
|
-
if isinstance(typed_args[0].typ, DecimalType) and isinstance(
|
|
9478
|
-
typed_args[1].typ, DecimalType
|
|
9479
|
-
):
|
|
9480
|
-
# Overflow check for both decimal types
|
|
9481
|
-
if new_precision > 38:
|
|
9482
|
-
if global_config.spark_sql_ansi_enabled:
|
|
9483
|
-
raise ArithmeticException(
|
|
9484
|
-
f'[NUMERIC_VALUE_OUT_OF_RANGE] Precision {new_precision} exceeds maximum allowed precision of 38. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
|
|
9485
|
-
)
|
|
9486
|
-
return snowpark_fn.lit(None), (return_type_precision, return_type_scale)
|
|
9487
|
-
|
|
9488
|
-
# Overflow check if one operand is an Integral
|
|
9489
|
-
if new_precision > 38:
|
|
9490
|
-
if global_config.spark_sql_ansi_enabled:
|
|
9491
|
-
raise ArithmeticException(
|
|
9492
|
-
f'[NUMERIC_VALUE_OUT_OF_RANGE] Precision {new_precision} exceeds maximum allowed precision of 38. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead.'
|
|
9493
|
-
)
|
|
9494
|
-
new_precision = 38
|
|
9495
|
-
if new_scale > new_precision:
|
|
9496
|
-
new_scale = new_precision
|
|
9497
|
-
|
|
9498
|
-
left_operand, right_operand = snowpark_args[0], snowpark_args[1]
|
|
9499
|
-
if operation_type == 0: # multiplication
|
|
9500
|
-
result = left_operand * right_operand
|
|
9501
|
-
else: # division
|
|
9502
|
-
result = _divnull(left_operand, right_operand)
|
|
9503
|
-
return snowpark_fn.cast(result, DecimalType(new_precision, new_scale)), (
|
|
9504
|
-
return_type_precision,
|
|
9505
|
-
return_type_scale,
|
|
9506
|
-
)
|
|
9939
|
+
def _get_literal_param_name(exp, arg_index: int, default_param_name: str):
|
|
9940
|
+
try:
|
|
9941
|
+
date_param_name = (
|
|
9942
|
+
exp.unresolved_function.arguments[arg_index]
|
|
9943
|
+
.unresolved_function.arguments[0]
|
|
9944
|
+
.literal.string
|
|
9945
|
+
)
|
|
9946
|
+
except (IndexError, AttributeError):
|
|
9947
|
+
date_param_name = default_param_name
|
|
9948
|
+
return date_param_name
|
|
9507
9949
|
|
|
9508
9950
|
|
|
9509
9951
|
def _raise_error_helper(return_type: DataType, error_class=None):
|
|
@@ -9818,3 +10260,34 @@ def _trim_helper(value: Column, trim_value: Column, trim_type: Column) -> Column
|
|
|
9818
10260
|
return value
|
|
9819
10261
|
|
|
9820
10262
|
return _binary_trim_udf(value, trim_value, trim_type)
|
|
10263
|
+
|
|
10264
|
+
|
|
10265
|
+
def _map_from_spark_tz(value: Column) -> Column:
|
|
10266
|
+
return (
|
|
10267
|
+
snowpark_fn.when(value == "ACT", snowpark_fn.lit("Australia/Darwin"))
|
|
10268
|
+
.when(value == "AET", snowpark_fn.lit("Australia/Sydney"))
|
|
10269
|
+
.when(value == "AGT", snowpark_fn.lit("America/Argentina/Buenos_Aires"))
|
|
10270
|
+
.when(value == "ART", snowpark_fn.lit("Africa/Cairo"))
|
|
10271
|
+
.when(value == "AST", snowpark_fn.lit("America/Anchorage"))
|
|
10272
|
+
.when(value == "BET", snowpark_fn.lit("America/Sao_Paulo"))
|
|
10273
|
+
.when(value == "BST", snowpark_fn.lit("Asia/Dhaka"))
|
|
10274
|
+
.when(value == "CAT", snowpark_fn.lit("Africa/Harare"))
|
|
10275
|
+
.when(value == "CNT", snowpark_fn.lit("America/St_Johns"))
|
|
10276
|
+
.when(value == "CST", snowpark_fn.lit("America/Chicago"))
|
|
10277
|
+
.when(value == "CTT", snowpark_fn.lit("Asia/Shanghai"))
|
|
10278
|
+
.when(value == "EAT", snowpark_fn.lit("Africa/Addis_Ababa"))
|
|
10279
|
+
.when(value == "ECT", snowpark_fn.lit("Europe/Paris"))
|
|
10280
|
+
.when(value == "IET", snowpark_fn.lit("America/Indiana/Indianapolis"))
|
|
10281
|
+
.when(value == "IST", snowpark_fn.lit("Asia/Kolkata"))
|
|
10282
|
+
.when(value == "JST", snowpark_fn.lit("Asia/Tokyo"))
|
|
10283
|
+
.when(value == "MIT", snowpark_fn.lit("Pacific/Apia"))
|
|
10284
|
+
.when(value == "NET", snowpark_fn.lit("Asia/Yerevan"))
|
|
10285
|
+
.when(value == "NST", snowpark_fn.lit("Pacific/Auckland"))
|
|
10286
|
+
.when(value == "PLT", snowpark_fn.lit("Asia/Karachi"))
|
|
10287
|
+
.when(value == "PNT", snowpark_fn.lit("America/Phoenix"))
|
|
10288
|
+
.when(value == "PRT", snowpark_fn.lit("America/Puerto_Rico"))
|
|
10289
|
+
.when(value == "PST", snowpark_fn.lit("America/Los_Angeles"))
|
|
10290
|
+
.when(value == "SST", snowpark_fn.lit("Pacific/Guadalcanal"))
|
|
10291
|
+
.when(value == "VST", snowpark_fn.lit("Asia/Ho_Chi_Minh"))
|
|
10292
|
+
.otherwise(value) # Return original timezone if no mapping found
|
|
10293
|
+
)
|