snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +116 -4
- snowflake/snowpark_connect/config.py +13 -0
- snowflake/snowpark_connect/constants.py +0 -29
- snowflake/snowpark_connect/dataframe_container.py +6 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
- snowflake/snowpark_connect/expression/function_defaults.py +207 -0
- snowflake/snowpark_connect/expression/literal.py +18 -2
- snowflake/snowpark_connect/expression/map_cast.py +5 -8
- snowflake/snowpark_connect/expression/map_expression.py +10 -1
- snowflake/snowpark_connect/expression/map_extension.py +12 -2
- snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
- snowflake/snowpark_connect/expression/map_udf.py +26 -8
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
- snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
- snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
- snowflake/snowpark_connect/hidden_column.py +39 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
- snowflake/snowpark_connect/relation/map_extension.py +56 -15
- snowflake/snowpark_connect/relation/map_join.py +258 -62
- snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
- snowflake/snowpark_connect/relation/map_sql.py +88 -11
- snowflake/snowpark_connect/relation/map_udtf.py +4 -2
- snowflake/snowpark_connect/relation/read/map_read.py +3 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
- snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
- snowflake/snowpark_connect/relation/read/utils.py +6 -7
- snowflake/snowpark_connect/relation/utils.py +1 -170
- snowflake/snowpark_connect/relation/write/map_write.py +62 -53
- snowflake/snowpark_connect/resources_initializer.py +29 -1
- snowflake/snowpark_connect/server.py +18 -3
- snowflake/snowpark_connect/type_mapping.py +29 -25
- snowflake/snowpark_connect/typed_column.py +14 -0
- snowflake/snowpark_connect/utils/artifacts.py +23 -0
- snowflake/snowpark_connect/utils/context.py +6 -1
- snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
- snowflake/snowpark_connect/utils/telemetry.py +6 -17
- snowflake/snowpark_connect/utils/udf_helper.py +2 -0
- snowflake/snowpark_connect/utils/udf_utils.py +38 -7
- snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
- snowpark_connect-0.25.0.dist-info/RECORD +477 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
- snowpark_connect-0.23.0.dist-info/RECORD +0 -893
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
from collections import Counter
|
|
5
5
|
from functools import reduce
|
|
6
6
|
|
|
7
7
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
8
|
+
from pyspark.errors.exceptions.base import AnalysisException
|
|
8
9
|
|
|
9
10
|
import snowflake.snowpark.functions as snowpark_fn
|
|
10
11
|
from snowflake import snowpark
|
|
12
|
+
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
13
|
+
quote_name_without_upper_casing,
|
|
14
|
+
unquote_if_quoted,
|
|
15
|
+
)
|
|
11
16
|
from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
|
|
12
17
|
from snowflake.snowpark_connect.config import global_config
|
|
13
18
|
from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
|
|
@@ -17,6 +22,7 @@ from snowflake.snowpark_connect.expression.map_expression import (
|
|
|
17
22
|
map_single_column_expression,
|
|
18
23
|
)
|
|
19
24
|
from snowflake.snowpark_connect.expression.typer import JoinExpressionTyper
|
|
25
|
+
from snowflake.snowpark_connect.hidden_column import HiddenColumn
|
|
20
26
|
from snowflake.snowpark_connect.relation.map_relation import (
|
|
21
27
|
NATURAL_JOIN_TYPE_BASE,
|
|
22
28
|
map_relation,
|
|
@@ -24,7 +30,6 @@ from snowflake.snowpark_connect.relation.map_relation import (
|
|
|
24
30
|
from snowflake.snowpark_connect.utils.context import (
|
|
25
31
|
push_evaluating_join_condition,
|
|
26
32
|
push_sql_scope,
|
|
27
|
-
set_plan_id_map,
|
|
28
33
|
set_sql_plan_name,
|
|
29
34
|
)
|
|
30
35
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
@@ -33,6 +38,9 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
33
38
|
|
|
34
39
|
USING_COLUMN_NOT_FOUND_ERROR = "[UNRESOLVED_USING_COLUMN_FOR_JOIN] USING column `{0}` not found on the {1} side of the join. The {1}-side columns: {2}"
|
|
35
40
|
|
|
41
|
+
DUPLICATED_JOIN_COL_LSUFFIX = "_left"
|
|
42
|
+
DUPLICATED_JOIN_COL_RSUFFIX = "_right"
|
|
43
|
+
|
|
36
44
|
|
|
37
45
|
def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
38
46
|
left_container: DataFrameContainer = map_relation(rel.join.left)
|
|
@@ -74,6 +82,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
74
82
|
|
|
75
83
|
# This handles case sensitivity for using_columns
|
|
76
84
|
case_corrected_right_columns: list[str] = []
|
|
85
|
+
hidden_columns = set()
|
|
86
|
+
# Propagate the hidden columns from left/right inputs to the result in case of chained joins
|
|
87
|
+
if left_container.column_map.hidden_columns:
|
|
88
|
+
hidden_columns.update(left_container.column_map.hidden_columns)
|
|
89
|
+
|
|
90
|
+
if right_container.column_map.hidden_columns:
|
|
91
|
+
hidden_columns.update(right_container.column_map.hidden_columns)
|
|
77
92
|
|
|
78
93
|
if rel.join.HasField("join_condition"):
|
|
79
94
|
assert not using_columns
|
|
@@ -105,8 +120,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
105
120
|
right=right_input,
|
|
106
121
|
on=join_expression.col,
|
|
107
122
|
how=join_type,
|
|
108
|
-
lsuffix=
|
|
109
|
-
rsuffix=
|
|
123
|
+
lsuffix=DUPLICATED_JOIN_COL_LSUFFIX,
|
|
124
|
+
rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
|
|
110
125
|
)
|
|
111
126
|
elif using_columns:
|
|
112
127
|
if any(
|
|
@@ -156,12 +171,24 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
156
171
|
)
|
|
157
172
|
)
|
|
158
173
|
|
|
174
|
+
using_columns_snowpark_names = (
|
|
175
|
+
left_container.column_map.get_snowpark_column_names_from_spark_column_names(
|
|
176
|
+
list(using_columns), return_first=True
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
using_columns_snowpark_types = [
|
|
181
|
+
left_container.dataframe.schema.fields[idx].datatype
|
|
182
|
+
for idx, col in enumerate(left_container.column_map.get_snowpark_columns())
|
|
183
|
+
if col in using_columns_snowpark_names
|
|
184
|
+
]
|
|
185
|
+
|
|
159
186
|
# Round trip the using columns through the column map to get the correct names
|
|
160
187
|
# in order to support case sensitivity.
|
|
161
188
|
# TODO: case_corrected_left_columns / case_corrected_right_columns may no longer be required as Snowpark dataframe preserves the column casing now.
|
|
162
|
-
case_corrected_left_columns =
|
|
163
|
-
left_container.column_map.
|
|
164
|
-
|
|
189
|
+
case_corrected_left_columns = (
|
|
190
|
+
left_container.column_map.get_spark_column_names_from_snowpark_column_names(
|
|
191
|
+
using_columns_snowpark_names
|
|
165
192
|
)
|
|
166
193
|
)
|
|
167
194
|
case_corrected_right_columns = right_container.column_map.get_spark_column_names_from_snowpark_column_names(
|
|
@@ -195,28 +222,141 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
195
222
|
(left == right for left, right in snowpark_using_columns),
|
|
196
223
|
),
|
|
197
224
|
how=join_type,
|
|
225
|
+
rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
|
|
198
226
|
)
|
|
227
|
+
# If we disambiguated the snowpark_using_columns during the join, we need to update 'snowpark_using_columns' to
|
|
228
|
+
# use the disambiguated names.
|
|
229
|
+
disambiguated_snowpark_using_columns = []
|
|
230
|
+
|
|
231
|
+
# Ignore disambiguation for LEFT SEMI JOIN and LEFT ANTI JOIN because they drop the right columns, so it'll never disambiguate.
|
|
232
|
+
if join_type in ["leftsemi", "leftanti"]:
|
|
233
|
+
disambiguated_snowpark_using_columns = snowpark_using_columns
|
|
234
|
+
else:
|
|
235
|
+
normalized_joined_columns = [
|
|
236
|
+
unquote_if_quoted(col) for col in joined_df.columns
|
|
237
|
+
]
|
|
238
|
+
# snowpark_using_columns is a list of tuples of snowpark columns, joined_df.columns is a list of strings of column names
|
|
239
|
+
for (left, right) in snowpark_using_columns:
|
|
240
|
+
normalized_left_name = unquote_if_quoted(left.getName())
|
|
241
|
+
normalized_right_name = unquote_if_quoted(right.getName())
|
|
242
|
+
|
|
243
|
+
# are both left and right in joined_df? if not, it's been disambiguated
|
|
244
|
+
if (
|
|
245
|
+
normalized_left_name in normalized_joined_columns
|
|
246
|
+
and normalized_right_name in normalized_joined_columns
|
|
247
|
+
):
|
|
248
|
+
# we want to just add this
|
|
249
|
+
disambiguated_snowpark_using_columns.append((left, right))
|
|
250
|
+
else:
|
|
251
|
+
# we need to figure out the disambiguated names and add those - it only disambiguates if left == right
|
|
252
|
+
disambiguated_left: snowpark.Column | None = None
|
|
253
|
+
disambiguated_right: snowpark.Column | None = None
|
|
254
|
+
|
|
255
|
+
for col in normalized_joined_columns:
|
|
256
|
+
quoted_col = f'"{col}"'
|
|
257
|
+
# get the column name and cross check it to see if it ends with the og name
|
|
258
|
+
if col.endswith(normalized_left_name) and col.startswith("l_"):
|
|
259
|
+
disambiguated_left = joined_df[quoted_col]
|
|
260
|
+
elif col.endswith(normalized_right_name) and col.startswith(
|
|
261
|
+
"r_"
|
|
262
|
+
):
|
|
263
|
+
disambiguated_right = joined_df[quoted_col]
|
|
264
|
+
|
|
265
|
+
# If we have both disambiguated columns, we can break out of the loop to save processing time
|
|
266
|
+
if (
|
|
267
|
+
disambiguated_left is not None
|
|
268
|
+
and disambiguated_right is not None
|
|
269
|
+
):
|
|
270
|
+
break
|
|
271
|
+
if disambiguated_left is None or disambiguated_right is None:
|
|
272
|
+
raise AnalysisException(
|
|
273
|
+
f"Disambiguated columns not found for {normalized_left_name} and {normalized_right_name}."
|
|
274
|
+
)
|
|
275
|
+
disambiguated_snowpark_using_columns.append(
|
|
276
|
+
(disambiguated_left, disambiguated_right)
|
|
277
|
+
)
|
|
278
|
+
|
|
199
279
|
# For outer joins, we need to preserve join keys from both sides using COALESCE
|
|
280
|
+
"""
|
|
281
|
+
CHANGES:
|
|
282
|
+
- IF CASE
|
|
283
|
+
- Need to drop the using columns
|
|
284
|
+
- Need to create the hidden_columns DF with the using columns from right and left
|
|
285
|
+
- ELSE CASE
|
|
286
|
+
- Need to drop the right side using columns
|
|
287
|
+
- Need to create the hidden_columns DF with the using columns from right
|
|
288
|
+
"""
|
|
200
289
|
if join_type == "full_outer":
|
|
201
290
|
coalesced_columns = []
|
|
202
|
-
|
|
203
|
-
for i, (left_col, right_col) in enumerate(snowpark_using_columns):
|
|
291
|
+
for i, (left_col, _right_col) in enumerate(snowpark_using_columns):
|
|
204
292
|
# Use the original user-specified column name to preserve case sensitivity
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
293
|
+
# Use the disambiguated columns for coalescing
|
|
294
|
+
disambiguated_left_col = disambiguated_snowpark_using_columns[i][0]
|
|
295
|
+
disambiguated_right_col = disambiguated_snowpark_using_columns[i][1]
|
|
296
|
+
|
|
297
|
+
coalesced_col = snowpark_fn.coalesce(
|
|
298
|
+
disambiguated_left_col, disambiguated_right_col
|
|
299
|
+
).alias(left_col.get_name())
|
|
209
300
|
coalesced_columns.append(coalesced_col)
|
|
210
|
-
columns_to_drop.extend([left_col, right_col])
|
|
211
301
|
|
|
302
|
+
# Create HiddenColumn objects for each hidden column
|
|
303
|
+
hidden_left = HiddenColumn(
|
|
304
|
+
hidden_snowpark_name=disambiguated_left_col.getName(),
|
|
305
|
+
spark_name=case_corrected_left_columns[i],
|
|
306
|
+
visible_snowpark_name=left_col.get_name(),
|
|
307
|
+
qualifiers=left_container.column_map.get_qualifier_for_spark_column(
|
|
308
|
+
case_corrected_left_columns[i]
|
|
309
|
+
),
|
|
310
|
+
original_position=left_container.column_map.get_spark_columns().index(
|
|
311
|
+
case_corrected_left_columns[i]
|
|
312
|
+
),
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
hidden_right = HiddenColumn(
|
|
316
|
+
hidden_snowpark_name=disambiguated_right_col.getName(),
|
|
317
|
+
spark_name=case_corrected_right_columns[i],
|
|
318
|
+
visible_snowpark_name=left_col.get_name(),
|
|
319
|
+
qualifiers=right_container.column_map.get_qualifier_for_spark_column(
|
|
320
|
+
case_corrected_right_columns[i]
|
|
321
|
+
),
|
|
322
|
+
original_position=right_container.column_map.get_spark_columns().index(
|
|
323
|
+
case_corrected_right_columns[i]
|
|
324
|
+
),
|
|
325
|
+
)
|
|
326
|
+
hidden_columns.update(
|
|
327
|
+
[
|
|
328
|
+
hidden_left,
|
|
329
|
+
hidden_right,
|
|
330
|
+
]
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# All non-hidden columns (not including the coalesced columns)
|
|
212
334
|
other_columns = [
|
|
213
335
|
snowpark_fn.col(col_name)
|
|
214
336
|
for col_name in joined_df.columns
|
|
215
|
-
if col_name not in [col.
|
|
337
|
+
if col_name not in [col.hidden_snowpark_name for col in hidden_columns]
|
|
216
338
|
]
|
|
217
339
|
result = joined_df.select(coalesced_columns + other_columns)
|
|
340
|
+
|
|
218
341
|
else:
|
|
219
342
|
result = joined_df.drop(*(right for _, right in snowpark_using_columns))
|
|
343
|
+
# We never run into the disambiguation case unless it's a full outer join.
|
|
344
|
+
for i, (left_col, right_col) in enumerate(
|
|
345
|
+
disambiguated_snowpark_using_columns
|
|
346
|
+
):
|
|
347
|
+
# Only right side columns are hidden
|
|
348
|
+
hidden_col = HiddenColumn(
|
|
349
|
+
hidden_snowpark_name=right_col.getName(),
|
|
350
|
+
spark_name=case_corrected_right_columns[i],
|
|
351
|
+
visible_snowpark_name=left_col.getName(),
|
|
352
|
+
qualifiers=right_container.column_map.get_qualifier_for_spark_column(
|
|
353
|
+
case_corrected_right_columns[i]
|
|
354
|
+
),
|
|
355
|
+
original_position=right_container.column_map.get_spark_columns().index(
|
|
356
|
+
case_corrected_right_columns[i]
|
|
357
|
+
),
|
|
358
|
+
)
|
|
359
|
+
hidden_columns.add(hidden_col)
|
|
220
360
|
else:
|
|
221
361
|
if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
|
|
222
362
|
raise SparkException.implicit_cartesian_product("inner")
|
|
@@ -230,35 +370,110 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
230
370
|
# - LEFT SEMI JOIN: Returns left rows that have matches in right table (no right columns)
|
|
231
371
|
# - LEFT ANTI JOIN: Returns left rows that have NO matches in right table (no right columns)
|
|
232
372
|
# Both preserve only the columns from the left DataFrame without adding any columns from the right.
|
|
233
|
-
spark_cols_after_join
|
|
373
|
+
spark_cols_after_join = left_container.column_map.get_spark_columns()
|
|
374
|
+
snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
|
|
375
|
+
snowpark_col_types = [
|
|
376
|
+
f.datatype for f in left_container.dataframe.schema.fields
|
|
377
|
+
]
|
|
234
378
|
qualifiers = left_container.column_map.get_qualifiers()
|
|
379
|
+
elif join_type == "full_outer" and using_columns:
|
|
380
|
+
# We want the coalesced columns to be first, followed by all the left and right columns (excluding using columns)
|
|
381
|
+
spark_cols_after_join: list[str] = []
|
|
382
|
+
snowpark_cols_after_join: list[str] = []
|
|
383
|
+
snowpark_col_types: list[str] = []
|
|
384
|
+
|
|
385
|
+
left_container_snowpark_columns = (
|
|
386
|
+
left_container.column_map.get_snowpark_columns()
|
|
387
|
+
)
|
|
388
|
+
right_container_snowpark_columns = (
|
|
389
|
+
right_container.column_map.get_snowpark_columns()
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
qualifiers = []
|
|
393
|
+
for i in range(len(case_corrected_left_columns)):
|
|
394
|
+
spark_cols_after_join.append(case_corrected_left_columns[i])
|
|
395
|
+
snowpark_cols_after_join.append(using_columns_snowpark_names[i])
|
|
396
|
+
snowpark_col_types.append(using_columns_snowpark_types[i])
|
|
397
|
+
qualifiers.append([])
|
|
398
|
+
|
|
399
|
+
# Handle adding left and right columns, excluding the using columns
|
|
400
|
+
for i, spark_col in enumerate(left_container.column_map.get_spark_columns()):
|
|
401
|
+
if (
|
|
402
|
+
spark_col not in case_corrected_left_columns
|
|
403
|
+
or spark_col in left_container.column_map.get_spark_columns()[:i]
|
|
404
|
+
):
|
|
405
|
+
spark_cols_after_join.append(spark_col)
|
|
406
|
+
snowpark_cols_after_join.append(left_container_snowpark_columns[i])
|
|
407
|
+
qualifiers.append(
|
|
408
|
+
left_container.column_map.get_qualifier_for_spark_column(spark_col)
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
snowpark_col_types.append(
|
|
412
|
+
left_container.dataframe.schema.fields[i].datatype
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
|
|
416
|
+
if (
|
|
417
|
+
spark_col not in case_corrected_right_columns
|
|
418
|
+
or spark_col in right_container.column_map.get_spark_columns()[:i]
|
|
419
|
+
):
|
|
420
|
+
spark_cols_after_join.append(spark_col)
|
|
421
|
+
snowpark_cols_after_join.append(right_container_snowpark_columns[i])
|
|
422
|
+
qualifiers.append(
|
|
423
|
+
right_container.column_map.get_qualifier_for_spark_column(spark_col)
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
snowpark_col_types.append(
|
|
427
|
+
right_container.dataframe.schema.fields[i].datatype
|
|
428
|
+
)
|
|
429
|
+
|
|
235
430
|
else:
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
spark_col
|
|
241
|
-
for i, spark_col in enumerate(
|
|
242
|
-
right_container.column_map.get_spark_columns()
|
|
243
|
-
)
|
|
244
|
-
if spark_col not in case_corrected_right_columns
|
|
245
|
-
or spark_col
|
|
246
|
-
in right_container.column_map.get_spark_columns()[
|
|
247
|
-
:i
|
|
248
|
-
] # this is to make sure we only remove the column once
|
|
431
|
+
spark_cols_after_join = left_container.column_map.get_spark_columns()
|
|
432
|
+
snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
|
|
433
|
+
snowpark_col_types = [
|
|
434
|
+
f.datatype for f in left_container.dataframe.schema.fields
|
|
249
435
|
]
|
|
250
436
|
|
|
251
|
-
qualifiers =
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
437
|
+
qualifiers = left_container.column_map.get_qualifiers()
|
|
438
|
+
|
|
439
|
+
right_df_snowpark_columns = right_container.column_map.get_snowpark_columns()
|
|
440
|
+
|
|
441
|
+
for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
|
|
442
|
+
if (
|
|
443
|
+
spark_col not in case_corrected_right_columns
|
|
444
|
+
or spark_col in right_container.column_map.get_spark_columns()[:i]
|
|
445
|
+
):
|
|
446
|
+
spark_cols_after_join.append(spark_col)
|
|
447
|
+
snowpark_cols_after_join.append(right_df_snowpark_columns[i])
|
|
448
|
+
snowpark_col_types.append(
|
|
449
|
+
right_container.dataframe.schema.fields[i].datatype
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
qualifiers.append(
|
|
453
|
+
right_container.column_map.get_qualifier_for_spark_column(spark_col)
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
snowpark_cols_after_join_deduplicated = []
|
|
457
|
+
snowpark_cols_after_join_counter = Counter(snowpark_cols_after_join)
|
|
458
|
+
seen_duplicated_columns = set()
|
|
459
|
+
|
|
460
|
+
for col in snowpark_cols_after_join:
|
|
461
|
+
if snowpark_cols_after_join_counter[col] == 2:
|
|
462
|
+
# This means that the same column exists twice in the joined df, likely due to a self-join and
|
|
463
|
+
# we need to lsuffix and rsuffix to the names of both columns, similar to what Snowpark did under the hood.
|
|
464
|
+
|
|
465
|
+
suffix = (
|
|
466
|
+
DUPLICATED_JOIN_COL_RSUFFIX
|
|
467
|
+
if col in seen_duplicated_columns
|
|
468
|
+
else DUPLICATED_JOIN_COL_LSUFFIX
|
|
255
469
|
)
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
470
|
+
unquoted_col = unquote_if_quoted(col)
|
|
471
|
+
quoted = quote_name_without_upper_casing(unquoted_col + suffix)
|
|
472
|
+
snowpark_cols_after_join_deduplicated.append(quoted)
|
|
473
|
+
|
|
474
|
+
seen_duplicated_columns.add(col)
|
|
475
|
+
else:
|
|
476
|
+
snowpark_cols_after_join_deduplicated.append(col)
|
|
262
477
|
|
|
263
478
|
column_metadata = {}
|
|
264
479
|
if left_container.column_map.column_metadata:
|
|
@@ -287,33 +502,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
287
502
|
result_container = DataFrameContainer.create_with_column_mapping(
|
|
288
503
|
dataframe=result,
|
|
289
504
|
spark_column_names=spark_cols_after_join,
|
|
290
|
-
snowpark_column_names=
|
|
505
|
+
snowpark_column_names=snowpark_cols_after_join_deduplicated,
|
|
291
506
|
column_metadata=column_metadata,
|
|
292
507
|
column_qualifiers=qualifiers,
|
|
508
|
+
hidden_columns=hidden_columns,
|
|
509
|
+
snowpark_column_types=snowpark_col_types,
|
|
293
510
|
)
|
|
294
511
|
|
|
295
|
-
# Fix for USING join column references with different plan IDs
|
|
296
|
-
# After a USING join, references to the right dataframe's columns should resolve
|
|
297
|
-
# to the result dataframe that contains the merged columns
|
|
298
|
-
if (
|
|
299
|
-
using_columns
|
|
300
|
-
and rel.join.right.HasField("common")
|
|
301
|
-
and rel.join.right.common.HasField("plan_id")
|
|
302
|
-
):
|
|
303
|
-
right_plan_id = rel.join.right.common.plan_id
|
|
304
|
-
set_plan_id_map(right_plan_id, result_container)
|
|
305
|
-
|
|
306
|
-
# For FULL OUTER joins, we also need to map the left dataframe's plan_id
|
|
307
|
-
# since both columns are replaced with a coalesced column
|
|
308
|
-
if (
|
|
309
|
-
using_columns
|
|
310
|
-
and join_type == "full_outer"
|
|
311
|
-
and rel.join.left.HasField("common")
|
|
312
|
-
and rel.join.left.common.HasField("plan_id")
|
|
313
|
-
):
|
|
314
|
-
left_plan_id = rel.join.left.common.plan_id
|
|
315
|
-
set_plan_id_map(left_plan_id, result_container)
|
|
316
|
-
|
|
317
512
|
if rel.join.using_columns:
|
|
318
513
|
# When join 'using_columns', the 'join columns' should go first in result DF.
|
|
319
514
|
idxs_to_shift = [
|
|
@@ -345,6 +540,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
345
540
|
cached_schema_getter=lambda: snowpark.types.StructType(
|
|
346
541
|
reorder(original_df.schema.fields)
|
|
347
542
|
),
|
|
543
|
+
hidden_columns=hidden_columns,
|
|
348
544
|
)
|
|
349
545
|
|
|
350
546
|
return result_container
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
|
|
5
5
|
|
|
6
6
|
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
7
7
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
@@ -9,7 +9,6 @@ from pyspark.errors.exceptions.base import AnalysisException, IllegalArgumentExc
|
|
|
9
9
|
|
|
10
10
|
import snowflake.snowpark_connect.relation.utils as utils
|
|
11
11
|
from snowflake import snowpark
|
|
12
|
-
from snowflake.snowpark._internal.analyzer.binary_expression import And
|
|
13
12
|
from snowflake.snowpark.functions import col, expr as snowpark_expr
|
|
14
13
|
from snowflake.snowpark.types import (
|
|
15
14
|
BooleanType,
|
|
@@ -31,7 +30,6 @@ from snowflake.snowpark_connect.expression.map_expression import (
|
|
|
31
30
|
)
|
|
32
31
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
33
32
|
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
34
|
-
from snowflake.snowpark_connect.relation.utils import can_filter_be_flattened
|
|
35
33
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
36
34
|
SnowparkConnectNotImplementedError,
|
|
37
35
|
)
|
|
@@ -555,32 +553,7 @@ def map_filter(
|
|
|
555
553
|
rel.filter.condition, input_container.column_map, typer
|
|
556
554
|
)
|
|
557
555
|
|
|
558
|
-
|
|
559
|
-
condition_exp = condition.col._expression
|
|
560
|
-
if (
|
|
561
|
-
can_filter_be_flattened(select_statement, condition_exp)
|
|
562
|
-
and input_df._ops_after_agg is None
|
|
563
|
-
):
|
|
564
|
-
new = copy(select_statement)
|
|
565
|
-
new.from_ = select_statement.from_.to_subqueryable()
|
|
566
|
-
new.pre_actions = new.from_.pre_actions
|
|
567
|
-
new.post_actions = new.from_.post_actions
|
|
568
|
-
new.column_states = select_statement.column_states
|
|
569
|
-
new.where = (
|
|
570
|
-
And(select_statement.where, condition_exp)
|
|
571
|
-
if select_statement.where is not None
|
|
572
|
-
else condition_exp
|
|
573
|
-
)
|
|
574
|
-
new._merge_projection_complexity_with_subquery = False
|
|
575
|
-
new.df_ast_ids = (
|
|
576
|
-
select_statement.df_ast_ids.copy()
|
|
577
|
-
if select_statement.df_ast_ids is not None
|
|
578
|
-
else None
|
|
579
|
-
)
|
|
580
|
-
new.attributes = select_statement.attributes
|
|
581
|
-
result = input_df._with_plan(new)
|
|
582
|
-
else:
|
|
583
|
-
result = input_df.filter(condition.col)
|
|
556
|
+
result = input_df.filter(condition.col)
|
|
584
557
|
|
|
585
558
|
return DataFrameContainer(
|
|
586
559
|
result,
|
|
@@ -77,6 +77,7 @@ from ..expression.map_sql_expression import (
|
|
|
77
77
|
from ..utils.identifiers import spark_to_sf_single_id
|
|
78
78
|
|
|
79
79
|
_ctes = ContextVar[dict[str, relation_proto.Relation]]("_ctes", default={})
|
|
80
|
+
_cte_definitions = ContextVar[dict[str, any]]("_cte_definitions", default={})
|
|
80
81
|
_having_condition = ContextVar[expressions_proto.Expression | None](
|
|
81
82
|
"_having_condition", default=None
|
|
82
83
|
)
|
|
@@ -117,12 +118,15 @@ def _push_cte_scope():
|
|
|
117
118
|
"""
|
|
118
119
|
Creates a new CTE scope when evaluating nested WITH clauses.
|
|
119
120
|
"""
|
|
120
|
-
|
|
121
|
-
|
|
121
|
+
cur_ctes = _ctes.get()
|
|
122
|
+
cur_definitions = _cte_definitions.get()
|
|
123
|
+
cte_token = _ctes.set(cur_ctes.copy())
|
|
124
|
+
def_token = _cte_definitions.set(cur_definitions.copy())
|
|
122
125
|
try:
|
|
123
126
|
yield
|
|
124
127
|
finally:
|
|
125
|
-
_ctes.reset(
|
|
128
|
+
_ctes.reset(cte_token)
|
|
129
|
+
_cte_definitions.reset(def_token)
|
|
126
130
|
|
|
127
131
|
|
|
128
132
|
@contextmanager
|
|
@@ -405,15 +409,21 @@ def map_sql_to_pandas_df(
|
|
|
405
409
|
else:
|
|
406
410
|
# TODO: Unset the schema
|
|
407
411
|
pass
|
|
408
|
-
case "CreateTable":
|
|
412
|
+
case "CreateTable" | "ReplaceTable":
|
|
413
|
+
if class_name == "ReplaceTable":
|
|
414
|
+
replace_table = " OR REPLACE "
|
|
415
|
+
if_not_exists = ""
|
|
416
|
+
else:
|
|
417
|
+
replace_table = ""
|
|
418
|
+
if_not_exists = (
|
|
419
|
+
"IF NOT EXISTS " if logical_plan.ignoreIfExists() else ""
|
|
420
|
+
)
|
|
421
|
+
|
|
409
422
|
name = get_relation_identifier_name(logical_plan.name())
|
|
410
423
|
columns = ", ".join(
|
|
411
424
|
_spark_field_to_sql(f, True)
|
|
412
425
|
for f in logical_plan.tableSchema().fields()
|
|
413
426
|
)
|
|
414
|
-
if_not_exists = (
|
|
415
|
-
"IF NOT EXISTS " if logical_plan.ignoreIfExists() else ""
|
|
416
|
-
)
|
|
417
427
|
comment_opt = logical_plan.tableSpec().comment()
|
|
418
428
|
comment = (
|
|
419
429
|
f"COMMENT = '{_escape_sql_comment(str(comment_opt.get()))}'"
|
|
@@ -422,7 +432,7 @@ def map_sql_to_pandas_df(
|
|
|
422
432
|
)
|
|
423
433
|
# NOTE: We are intentionally ignoring any FORMAT=... parameters here.
|
|
424
434
|
session.sql(
|
|
425
|
-
f"CREATE TABLE {if_not_exists}{name} ({columns}) {comment}"
|
|
435
|
+
f"CREATE {replace_table} TABLE {if_not_exists}{name} ({columns}) {comment}"
|
|
426
436
|
).collect()
|
|
427
437
|
case "CreateTableAsSelect":
|
|
428
438
|
mode = "ignore" if logical_plan.ignoreIfExists() else "errorifexists"
|
|
@@ -1351,8 +1361,28 @@ def map_logical_plan_relation(
|
|
|
1351
1361
|
value_column_names = [e for e in as_java_list(rel.valueColumnNames())]
|
|
1352
1362
|
variable_column_name = rel.variableColumnName()
|
|
1353
1363
|
|
|
1364
|
+
# Check for multi-column UNPIVOT which Snowflake doesn't support
|
|
1365
|
+
if len(value_column_names) > 1:
|
|
1366
|
+
raise UnsupportedOperationException(
|
|
1367
|
+
f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
|
|
1368
|
+
f"multiple value columns ({', '.join(value_column_names)}) in a single operation. "
|
|
1369
|
+
f"Workaround: Use separate UNPIVOT operations for each value column and join the results, "
|
|
1370
|
+
f"or restructure your query to unpivot columns individually."
|
|
1371
|
+
)
|
|
1372
|
+
|
|
1354
1373
|
values = []
|
|
1355
|
-
|
|
1374
|
+
values_groups = as_java_list(rel.values().get())
|
|
1375
|
+
|
|
1376
|
+
# Check if we have multi-column groups in the IN clause
|
|
1377
|
+
if values_groups and len(as_java_list(values_groups[0])) > 1:
|
|
1378
|
+
group_sizes = [len(as_java_list(group)) for group in values_groups]
|
|
1379
|
+
raise UnsupportedOperationException(
|
|
1380
|
+
f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
|
|
1381
|
+
f"multiple columns together in groups. Found groups with {max(group_sizes)} columns. "
|
|
1382
|
+
f"Workaround: Unpivot each column separately and then join/union the results as needed."
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
for e1 in values_groups:
|
|
1356
1386
|
for e in as_java_list(e1):
|
|
1357
1387
|
values.append(map_logical_plan_expression(e))
|
|
1358
1388
|
|
|
@@ -1468,7 +1498,50 @@ def map_logical_plan_relation(
|
|
|
1468
1498
|
# The name corresponds to a `WITH` alias rather than a table.
|
|
1469
1499
|
# TODO: We currently evaluate the query each time its alias is used;
|
|
1470
1500
|
# we should eventually start using `WITH` in Snowflake SQL.
|
|
1471
|
-
|
|
1501
|
+
# Each CTE reference should get completely fresh evaluation to prevent ambiguity
|
|
1502
|
+
# when the same CTE is joined multiple times. Instead of reusing the same cte_proto,
|
|
1503
|
+
# re-evaluate the CTE definition to get fresh column identifiers.
|
|
1504
|
+
|
|
1505
|
+
# Re-evaluate the CTE definition to get fresh column identifiers
|
|
1506
|
+
cte_definition = _cte_definitions.get().get(name)
|
|
1507
|
+
if cte_definition is not None:
|
|
1508
|
+
# Get the original column names for consistency across CTE references
|
|
1509
|
+
original_container = map_relation(cte_proto)
|
|
1510
|
+
original_spark_columns = (
|
|
1511
|
+
original_container.column_map.get_spark_columns()
|
|
1512
|
+
)
|
|
1513
|
+
|
|
1514
|
+
# Re-evaluate the CTE definition with a fresh plan_id
|
|
1515
|
+
fresh_plan_id = gen_sql_plan_id()
|
|
1516
|
+
fresh_cte_proto = map_logical_plan_relation(
|
|
1517
|
+
cte_definition, fresh_plan_id
|
|
1518
|
+
)
|
|
1519
|
+
|
|
1520
|
+
# Use SubqueryColumnAliases to ensure consistent column names across CTE references
|
|
1521
|
+
# This is crucial for CTEs that reference other CTEs
|
|
1522
|
+
any_proto = Any()
|
|
1523
|
+
any_proto.Pack(
|
|
1524
|
+
snowflake_proto.Extension(
|
|
1525
|
+
subquery_column_aliases=snowflake_proto.SubqueryColumnAliases(
|
|
1526
|
+
input=fresh_cte_proto,
|
|
1527
|
+
aliases=original_spark_columns,
|
|
1528
|
+
)
|
|
1529
|
+
)
|
|
1530
|
+
)
|
|
1531
|
+
column_aliased_proto = relation_proto.Relation(extension=any_proto)
|
|
1532
|
+
column_aliased_proto.common.plan_id = gen_sql_plan_id()
|
|
1533
|
+
|
|
1534
|
+
# Wrap in SubqueryAlias with the CTE name
|
|
1535
|
+
proto = relation_proto.Relation(
|
|
1536
|
+
subquery_alias=relation_proto.SubqueryAlias(
|
|
1537
|
+
input=column_aliased_proto,
|
|
1538
|
+
alias=name,
|
|
1539
|
+
)
|
|
1540
|
+
)
|
|
1541
|
+
proto.common.plan_id = gen_sql_plan_id()
|
|
1542
|
+
else:
|
|
1543
|
+
# Fallback to stored CTE if definition not found
|
|
1544
|
+
proto = cte_proto
|
|
1472
1545
|
else:
|
|
1473
1546
|
tmp_views = _get_current_temp_objects()
|
|
1474
1547
|
current_schema = session.connection.schema
|
|
@@ -1600,7 +1673,11 @@ def map_logical_plan_relation(
|
|
|
1600
1673
|
with _push_cte_scope():
|
|
1601
1674
|
for cte in as_java_list(rel.cteRelations()):
|
|
1602
1675
|
name = str(cte._1())
|
|
1603
|
-
|
|
1676
|
+
# Store the original CTE definition for re-evaluation
|
|
1677
|
+
_cte_definitions.get()[name] = cte._2()
|
|
1678
|
+
# Process CTE definition with a unique plan_id to ensure proper column naming
|
|
1679
|
+
cte_plan_id = gen_sql_plan_id()
|
|
1680
|
+
cte_proto = map_logical_plan_relation(cte._2(), cte_plan_id)
|
|
1604
1681
|
_ctes.get()[name] = cte_proto
|
|
1605
1682
|
|
|
1606
1683
|
proto = map_logical_plan_relation(rel.child())
|
|
@@ -27,7 +27,7 @@ from snowflake.snowpark_connect.expression.map_expression import (
|
|
|
27
27
|
)
|
|
28
28
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
29
29
|
from snowflake.snowpark_connect.type_mapping import (
|
|
30
|
-
|
|
30
|
+
map_type_string_to_proto,
|
|
31
31
|
proto_to_snowpark_type,
|
|
32
32
|
)
|
|
33
33
|
from snowflake.snowpark_connect.utils.context import push_udtf_context
|
|
@@ -125,7 +125,9 @@ def process_return_type(
|
|
|
125
125
|
) -> tuple[list[tuple[str, Any]], DataType, StructType, list[str]]:
|
|
126
126
|
try:
|
|
127
127
|
if return_type.HasField("unparsed"):
|
|
128
|
-
parsed_return =
|
|
128
|
+
parsed_return = map_type_string_to_proto(
|
|
129
|
+
return_type.unparsed.data_type_string
|
|
130
|
+
)
|
|
129
131
|
else:
|
|
130
132
|
parsed_return = return_type
|
|
131
133
|
except ValueError as e:
|