snowpark-connect 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +116 -4
- snowflake/snowpark_connect/config.py +13 -0
- snowflake/snowpark_connect/constants.py +0 -29
- snowflake/snowpark_connect/dataframe_container.py +6 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
- snowflake/snowpark_connect/expression/literal.py +13 -2
- snowflake/snowpark_connect/expression/map_cast.py +5 -8
- snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
- snowflake/snowpark_connect/expression/map_udf.py +26 -8
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
- snowflake/snowpark_connect/expression/map_unresolved_function.py +825 -353
- snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
- snowflake/snowpark_connect/hidden_column.py +39 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
- snowflake/snowpark_connect/relation/map_extension.py +52 -11
- snowflake/snowpark_connect/relation/map_join.py +258 -62
- snowflake/snowpark_connect/relation/map_sql.py +88 -11
- snowflake/snowpark_connect/relation/map_udtf.py +4 -2
- snowflake/snowpark_connect/relation/read/map_read.py +3 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
- snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
- snowflake/snowpark_connect/relation/write/map_write.py +62 -53
- snowflake/snowpark_connect/resources_initializer.py +29 -1
- snowflake/snowpark_connect/server.py +18 -3
- snowflake/snowpark_connect/type_mapping.py +29 -25
- snowflake/snowpark_connect/typed_column.py +14 -0
- snowflake/snowpark_connect/utils/artifacts.py +23 -0
- snowflake/snowpark_connect/utils/context.py +6 -1
- snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
- snowflake/snowpark_connect/utils/telemetry.py +6 -17
- snowflake/snowpark_connect/utils/udf_helper.py +2 -0
- snowflake/snowpark_connect/utils/udf_utils.py +38 -7
- snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
- snowpark_connect-0.25.0.dist-info/RECORD +477 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
- snowpark_connect-0.24.0.dist-info/RECORD +0 -898
- {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
|
@@ -34,6 +34,7 @@ def map_unresolved_star(
|
|
|
34
34
|
column_mapping: ColumnNameMap,
|
|
35
35
|
typer: ExpressionTyper,
|
|
36
36
|
) -> tuple[list[str], TypedColumn]:
|
|
37
|
+
|
|
37
38
|
if exp.unresolved_star.HasField("unparsed_target"):
|
|
38
39
|
unparsed_target = exp.unresolved_star.unparsed_target
|
|
39
40
|
name_parts = split_fully_qualified_spark_name(unparsed_target)
|
|
@@ -102,7 +103,7 @@ def map_unresolved_star(
|
|
|
102
103
|
prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
|
|
103
104
|
prefix_candidate = (
|
|
104
105
|
column_mapping.get_snowpark_column_name_from_spark_column_name(
|
|
105
|
-
prefix_candidate_str, allow_non_exists=True
|
|
106
|
+
prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
|
|
106
107
|
)
|
|
107
108
|
)
|
|
108
109
|
if prefix_candidate is None:
|
|
@@ -180,7 +181,7 @@ def map_unresolved_star_struct(
|
|
|
180
181
|
prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
|
|
181
182
|
prefix_candidate = (
|
|
182
183
|
column_mapping.get_snowpark_column_name_from_spark_column_name(
|
|
183
|
-
prefix_candidate_str, allow_non_exists=True
|
|
184
|
+
prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
|
|
184
185
|
)
|
|
185
186
|
)
|
|
186
187
|
if prefix_candidate is None:
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class HiddenColumn:
|
|
7
|
+
"""
|
|
8
|
+
Represents a hidden column in a Snowflake table.
|
|
9
|
+
|
|
10
|
+
Hidden columns are not visible in standard queries but can be accessed
|
|
11
|
+
directly if needed. This class provides a way to reference such columns
|
|
12
|
+
in Snowpark operations
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
hidden_snowpark_name: str,
|
|
18
|
+
spark_name: str,
|
|
19
|
+
visible_snowpark_name: str,
|
|
20
|
+
qualifiers: list[str] | None = None,
|
|
21
|
+
original_position: int | None = None,
|
|
22
|
+
) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Initializes a HiddenColumn instance.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
name (str): The name of the hidden column.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
# The Snowpark internal name for the hidden column
|
|
31
|
+
self.hidden_snowpark_name = hidden_snowpark_name
|
|
32
|
+
# The Spark name for the hidden column
|
|
33
|
+
self.spark_name = spark_name
|
|
34
|
+
# The left side visible Snowpark name for the dropped right side column
|
|
35
|
+
self.visible_snowpark_name = visible_snowpark_name
|
|
36
|
+
# Qualifiers for the hidden column (e.g., table or schema names)
|
|
37
|
+
self.qualifiers = qualifiers if qualifiers is not None else []
|
|
38
|
+
# The position of the hidden column in the original schema
|
|
39
|
+
self.original_position = original_position
|
|
Binary file
|
|
@@ -735,10 +735,17 @@ def map_with_columns(
|
|
|
735
735
|
"""
|
|
736
736
|
input_container = map_relation(rel.with_columns.input)
|
|
737
737
|
input_df = input_container.dataframe
|
|
738
|
-
with_columns = [
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
738
|
+
with_columns = []
|
|
739
|
+
for alias in rel.with_columns.aliases:
|
|
740
|
+
spark_names, typed_alias = map_alias(
|
|
741
|
+
alias, input_container.column_map, ExpressionTyper(input_df)
|
|
742
|
+
)
|
|
743
|
+
register_lca_alias(spark_names[0], typed_alias)
|
|
744
|
+
with_columns.append((spark_names, typed_alias))
|
|
745
|
+
|
|
746
|
+
# we don't need lateral aliases anymore
|
|
747
|
+
clear_lca_alias_map()
|
|
748
|
+
|
|
742
749
|
# TODO: This list needs to contain all unique column names, but the code below doesn't
|
|
743
750
|
# guarantee that.
|
|
744
751
|
with_columns_names = []
|
|
@@ -806,6 +813,12 @@ def map_with_columns(
|
|
|
806
813
|
with_columns_names_deduped, with_columns_exprs_deduped
|
|
807
814
|
).select(*new_snowpark_columns)
|
|
808
815
|
|
|
816
|
+
# SNOW-2306644: the next projection after a withColumn call can completely remove the added column
|
|
817
|
+
# df.withColumn("new").select("foo").filter("new") will fail with a missing column error
|
|
818
|
+
# the column will be preserved if flattening is disabled
|
|
819
|
+
if hasattr(result, "_select_statement"):
|
|
820
|
+
result._select_statement.flatten_disabled = True
|
|
821
|
+
|
|
809
822
|
snowpark_name_to_type = dict(
|
|
810
823
|
[(f.name, f.datatype) for f in input_df.schema.fields]
|
|
811
824
|
+ list(zip(with_columns_names, with_columns_types))
|
|
@@ -350,15 +350,51 @@ def map_aggregate(
|
|
|
350
350
|
if not is_group_by_all:
|
|
351
351
|
raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
|
|
352
352
|
|
|
353
|
-
#
|
|
354
|
-
|
|
355
|
-
|
|
353
|
+
# Determine grouping columns for context
|
|
354
|
+
# For GROUPING SETS, we need to extract the columns from the sets
|
|
355
|
+
grouping_columns_for_context = []
|
|
356
|
+
if aggregate.group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
|
|
357
|
+
# Use a list to preserve order, avoiding duplicates
|
|
358
|
+
for grouping_set in aggregate.grouping_sets:
|
|
359
|
+
for exp in grouping_set.grouping_set:
|
|
360
|
+
spark_names, _ = map_expression(exp, input_container.column_map, typer)
|
|
361
|
+
# map_expression always returns a list, get the first element
|
|
362
|
+
col_name = spark_names[0]
|
|
363
|
+
if col_name not in grouping_columns_for_context:
|
|
364
|
+
grouping_columns_for_context.append(col_name)
|
|
365
|
+
else:
|
|
366
|
+
grouping_columns_for_context = [spark_name for spark_name, _ in raw_groupings]
|
|
367
|
+
|
|
368
|
+
# Set grouping columns context for processing aggregate expressions
|
|
369
|
+
# This context is needed for resolving grouping__id references
|
|
370
|
+
# TODO: This should properly handle nested queries with GROUP BY using push/pop
|
|
371
|
+
# Currently, nested queries may interfere with parent queries
|
|
372
|
+
set_current_grouping_columns(grouping_columns_for_context)
|
|
373
|
+
|
|
374
|
+
# LCA Support for aggregate expressions: Use the LCA alias map
|
|
375
|
+
# Note: We don't clear the map here to preserve any parent context aliases
|
|
376
|
+
from snowflake.snowpark_connect.utils.context import register_lca_alias
|
|
356
377
|
|
|
357
378
|
agg_count = get_sql_aggregate_function_count()
|
|
358
379
|
for exp in aggregate.aggregate_expressions:
|
|
359
380
|
col = _map_column(exp)
|
|
360
381
|
raw_aggregations.append(col)
|
|
361
382
|
|
|
383
|
+
# If this is an alias, register it in the LCA map for subsequent expressions
|
|
384
|
+
if (
|
|
385
|
+
exp.WhichOneof("expr_type") == "alias"
|
|
386
|
+
and exp.alias.name
|
|
387
|
+
and len(exp.alias.name) > 0
|
|
388
|
+
):
|
|
389
|
+
alias_name = exp.alias.name[0]
|
|
390
|
+
spark_name, snowpark_column = col
|
|
391
|
+
|
|
392
|
+
# Register the alias pointing to the result of its expression
|
|
393
|
+
# This handles both simple aliases (k as lca) and complex ones (lca + 1 as col)
|
|
394
|
+
# The snowpark_column already contains the computed expression with its alias wrapper,
|
|
395
|
+
# which is fine - when referenced later, the column's value is what gets used
|
|
396
|
+
register_lca_alias(alias_name, snowpark_column)
|
|
397
|
+
|
|
362
398
|
if is_group_by_all:
|
|
363
399
|
new_agg_count = get_sql_aggregate_function_count()
|
|
364
400
|
if new_agg_count == agg_count:
|
|
@@ -404,15 +440,20 @@ def map_aggregate(
|
|
|
404
440
|
case snowflake_proto.Aggregate.GROUP_TYPE_CUBE:
|
|
405
441
|
result = input_df.cube(groupings)
|
|
406
442
|
case snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
|
|
407
|
-
#
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
443
|
+
# Map each grouping set to columns
|
|
444
|
+
sets_mapped = []
|
|
445
|
+
for grouping_set in aggregate.grouping_sets:
|
|
446
|
+
set_cols = []
|
|
447
|
+
for exp in grouping_set.grouping_set:
|
|
448
|
+
_, typed_col = map_expression(
|
|
449
|
+
exp, input_container.column_map, typer
|
|
450
|
+
)
|
|
451
|
+
set_cols.append(typed_col.col)
|
|
452
|
+
sets_mapped.append(set_cols)
|
|
453
|
+
|
|
454
|
+
result = input_df.group_by_grouping_sets(
|
|
455
|
+
snowpark.GroupingSets(*sets_mapped)
|
|
414
456
|
)
|
|
415
|
-
result = input_df.group_by_grouping_sets(snowpark.GroupingSets(*sets))
|
|
416
457
|
case other:
|
|
417
458
|
raise SnowparkConnectNotImplementedError(
|
|
418
459
|
f"Unsupported GROUP BY type: {other}"
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
from collections import Counter
|
|
5
5
|
from functools import reduce
|
|
6
6
|
|
|
7
7
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
8
|
+
from pyspark.errors.exceptions.base import AnalysisException
|
|
8
9
|
|
|
9
10
|
import snowflake.snowpark.functions as snowpark_fn
|
|
10
11
|
from snowflake import snowpark
|
|
12
|
+
from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
13
|
+
quote_name_without_upper_casing,
|
|
14
|
+
unquote_if_quoted,
|
|
15
|
+
)
|
|
11
16
|
from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
|
|
12
17
|
from snowflake.snowpark_connect.config import global_config
|
|
13
18
|
from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
|
|
@@ -17,6 +22,7 @@ from snowflake.snowpark_connect.expression.map_expression import (
|
|
|
17
22
|
map_single_column_expression,
|
|
18
23
|
)
|
|
19
24
|
from snowflake.snowpark_connect.expression.typer import JoinExpressionTyper
|
|
25
|
+
from snowflake.snowpark_connect.hidden_column import HiddenColumn
|
|
20
26
|
from snowflake.snowpark_connect.relation.map_relation import (
|
|
21
27
|
NATURAL_JOIN_TYPE_BASE,
|
|
22
28
|
map_relation,
|
|
@@ -24,7 +30,6 @@ from snowflake.snowpark_connect.relation.map_relation import (
|
|
|
24
30
|
from snowflake.snowpark_connect.utils.context import (
|
|
25
31
|
push_evaluating_join_condition,
|
|
26
32
|
push_sql_scope,
|
|
27
|
-
set_plan_id_map,
|
|
28
33
|
set_sql_plan_name,
|
|
29
34
|
)
|
|
30
35
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
@@ -33,6 +38,9 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
33
38
|
|
|
34
39
|
USING_COLUMN_NOT_FOUND_ERROR = "[UNRESOLVED_USING_COLUMN_FOR_JOIN] USING column `{0}` not found on the {1} side of the join. The {1}-side columns: {2}"
|
|
35
40
|
|
|
41
|
+
DUPLICATED_JOIN_COL_LSUFFIX = "_left"
|
|
42
|
+
DUPLICATED_JOIN_COL_RSUFFIX = "_right"
|
|
43
|
+
|
|
36
44
|
|
|
37
45
|
def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
38
46
|
left_container: DataFrameContainer = map_relation(rel.join.left)
|
|
@@ -74,6 +82,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
74
82
|
|
|
75
83
|
# This handles case sensitivity for using_columns
|
|
76
84
|
case_corrected_right_columns: list[str] = []
|
|
85
|
+
hidden_columns = set()
|
|
86
|
+
# Propagate the hidden columns from left/right inputs to the result in case of chained joins
|
|
87
|
+
if left_container.column_map.hidden_columns:
|
|
88
|
+
hidden_columns.update(left_container.column_map.hidden_columns)
|
|
89
|
+
|
|
90
|
+
if right_container.column_map.hidden_columns:
|
|
91
|
+
hidden_columns.update(right_container.column_map.hidden_columns)
|
|
77
92
|
|
|
78
93
|
if rel.join.HasField("join_condition"):
|
|
79
94
|
assert not using_columns
|
|
@@ -105,8 +120,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
105
120
|
right=right_input,
|
|
106
121
|
on=join_expression.col,
|
|
107
122
|
how=join_type,
|
|
108
|
-
lsuffix=
|
|
109
|
-
rsuffix=
|
|
123
|
+
lsuffix=DUPLICATED_JOIN_COL_LSUFFIX,
|
|
124
|
+
rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
|
|
110
125
|
)
|
|
111
126
|
elif using_columns:
|
|
112
127
|
if any(
|
|
@@ -156,12 +171,24 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
156
171
|
)
|
|
157
172
|
)
|
|
158
173
|
|
|
174
|
+
using_columns_snowpark_names = (
|
|
175
|
+
left_container.column_map.get_snowpark_column_names_from_spark_column_names(
|
|
176
|
+
list(using_columns), return_first=True
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
using_columns_snowpark_types = [
|
|
181
|
+
left_container.dataframe.schema.fields[idx].datatype
|
|
182
|
+
for idx, col in enumerate(left_container.column_map.get_snowpark_columns())
|
|
183
|
+
if col in using_columns_snowpark_names
|
|
184
|
+
]
|
|
185
|
+
|
|
159
186
|
# Round trip the using columns through the column map to get the correct names
|
|
160
187
|
# in order to support case sensitivity.
|
|
161
188
|
# TODO: case_corrected_left_columns / case_corrected_right_columns may no longer be required as Snowpark dataframe preserves the column casing now.
|
|
162
|
-
case_corrected_left_columns =
|
|
163
|
-
left_container.column_map.
|
|
164
|
-
|
|
189
|
+
case_corrected_left_columns = (
|
|
190
|
+
left_container.column_map.get_spark_column_names_from_snowpark_column_names(
|
|
191
|
+
using_columns_snowpark_names
|
|
165
192
|
)
|
|
166
193
|
)
|
|
167
194
|
case_corrected_right_columns = right_container.column_map.get_spark_column_names_from_snowpark_column_names(
|
|
@@ -195,28 +222,141 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
195
222
|
(left == right for left, right in snowpark_using_columns),
|
|
196
223
|
),
|
|
197
224
|
how=join_type,
|
|
225
|
+
rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
|
|
198
226
|
)
|
|
227
|
+
# If we disambiguated the snowpark_using_columns during the join, we need to update 'snowpark_using_columns' to
|
|
228
|
+
# use the disambiguated names.
|
|
229
|
+
disambiguated_snowpark_using_columns = []
|
|
230
|
+
|
|
231
|
+
# Ignore disambiguation for LEFT SEMI JOIN and LEFT ANTI JOIN because they drop the right columns, so it'll never disambiguate.
|
|
232
|
+
if join_type in ["leftsemi", "leftanti"]:
|
|
233
|
+
disambiguated_snowpark_using_columns = snowpark_using_columns
|
|
234
|
+
else:
|
|
235
|
+
normalized_joined_columns = [
|
|
236
|
+
unquote_if_quoted(col) for col in joined_df.columns
|
|
237
|
+
]
|
|
238
|
+
# snowpark_using_columns is a list of tuples of snowpark columns, joined_df.columns is a list of strings of column names
|
|
239
|
+
for (left, right) in snowpark_using_columns:
|
|
240
|
+
normalized_left_name = unquote_if_quoted(left.getName())
|
|
241
|
+
normalized_right_name = unquote_if_quoted(right.getName())
|
|
242
|
+
|
|
243
|
+
# are both left and right in joined_df? if not, it's been disambiguated
|
|
244
|
+
if (
|
|
245
|
+
normalized_left_name in normalized_joined_columns
|
|
246
|
+
and normalized_right_name in normalized_joined_columns
|
|
247
|
+
):
|
|
248
|
+
# we want to just add this
|
|
249
|
+
disambiguated_snowpark_using_columns.append((left, right))
|
|
250
|
+
else:
|
|
251
|
+
# we need to figure out the disambiguated names and add those - it only disambiguates if left == right
|
|
252
|
+
disambiguated_left: snowpark.Column | None = None
|
|
253
|
+
disambiguated_right: snowpark.Column | None = None
|
|
254
|
+
|
|
255
|
+
for col in normalized_joined_columns:
|
|
256
|
+
quoted_col = f'"{col}"'
|
|
257
|
+
# get the column name and cross check it to see if it ends with the og name
|
|
258
|
+
if col.endswith(normalized_left_name) and col.startswith("l_"):
|
|
259
|
+
disambiguated_left = joined_df[quoted_col]
|
|
260
|
+
elif col.endswith(normalized_right_name) and col.startswith(
|
|
261
|
+
"r_"
|
|
262
|
+
):
|
|
263
|
+
disambiguated_right = joined_df[quoted_col]
|
|
264
|
+
|
|
265
|
+
# If we have both disambiguated columns, we can break out of the loop to save processing time
|
|
266
|
+
if (
|
|
267
|
+
disambiguated_left is not None
|
|
268
|
+
and disambiguated_right is not None
|
|
269
|
+
):
|
|
270
|
+
break
|
|
271
|
+
if disambiguated_left is None or disambiguated_right is None:
|
|
272
|
+
raise AnalysisException(
|
|
273
|
+
f"Disambiguated columns not found for {normalized_left_name} and {normalized_right_name}."
|
|
274
|
+
)
|
|
275
|
+
disambiguated_snowpark_using_columns.append(
|
|
276
|
+
(disambiguated_left, disambiguated_right)
|
|
277
|
+
)
|
|
278
|
+
|
|
199
279
|
# For outer joins, we need to preserve join keys from both sides using COALESCE
|
|
280
|
+
"""
|
|
281
|
+
CHANGES:
|
|
282
|
+
- IF CASE
|
|
283
|
+
- Need to drop the using columns
|
|
284
|
+
- Need to create the hidden_columns DF with the using columns from right and left
|
|
285
|
+
- ELSE CASE
|
|
286
|
+
- Need to drop the right side using columns
|
|
287
|
+
- Need to create the hidden_columns DF with the using columns from right
|
|
288
|
+
"""
|
|
200
289
|
if join_type == "full_outer":
|
|
201
290
|
coalesced_columns = []
|
|
202
|
-
|
|
203
|
-
for i, (left_col, right_col) in enumerate(snowpark_using_columns):
|
|
291
|
+
for i, (left_col, _right_col) in enumerate(snowpark_using_columns):
|
|
204
292
|
# Use the original user-specified column name to preserve case sensitivity
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
293
|
+
# Use the disambiguated columns for coalescing
|
|
294
|
+
disambiguated_left_col = disambiguated_snowpark_using_columns[i][0]
|
|
295
|
+
disambiguated_right_col = disambiguated_snowpark_using_columns[i][1]
|
|
296
|
+
|
|
297
|
+
coalesced_col = snowpark_fn.coalesce(
|
|
298
|
+
disambiguated_left_col, disambiguated_right_col
|
|
299
|
+
).alias(left_col.get_name())
|
|
209
300
|
coalesced_columns.append(coalesced_col)
|
|
210
|
-
columns_to_drop.extend([left_col, right_col])
|
|
211
301
|
|
|
302
|
+
# Create HiddenColumn objects for each hidden column
|
|
303
|
+
hidden_left = HiddenColumn(
|
|
304
|
+
hidden_snowpark_name=disambiguated_left_col.getName(),
|
|
305
|
+
spark_name=case_corrected_left_columns[i],
|
|
306
|
+
visible_snowpark_name=left_col.get_name(),
|
|
307
|
+
qualifiers=left_container.column_map.get_qualifier_for_spark_column(
|
|
308
|
+
case_corrected_left_columns[i]
|
|
309
|
+
),
|
|
310
|
+
original_position=left_container.column_map.get_spark_columns().index(
|
|
311
|
+
case_corrected_left_columns[i]
|
|
312
|
+
),
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
hidden_right = HiddenColumn(
|
|
316
|
+
hidden_snowpark_name=disambiguated_right_col.getName(),
|
|
317
|
+
spark_name=case_corrected_right_columns[i],
|
|
318
|
+
visible_snowpark_name=left_col.get_name(),
|
|
319
|
+
qualifiers=right_container.column_map.get_qualifier_for_spark_column(
|
|
320
|
+
case_corrected_right_columns[i]
|
|
321
|
+
),
|
|
322
|
+
original_position=right_container.column_map.get_spark_columns().index(
|
|
323
|
+
case_corrected_right_columns[i]
|
|
324
|
+
),
|
|
325
|
+
)
|
|
326
|
+
hidden_columns.update(
|
|
327
|
+
[
|
|
328
|
+
hidden_left,
|
|
329
|
+
hidden_right,
|
|
330
|
+
]
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# All non-hidden columns (not including the coalesced columns)
|
|
212
334
|
other_columns = [
|
|
213
335
|
snowpark_fn.col(col_name)
|
|
214
336
|
for col_name in joined_df.columns
|
|
215
|
-
if col_name not in [col.
|
|
337
|
+
if col_name not in [col.hidden_snowpark_name for col in hidden_columns]
|
|
216
338
|
]
|
|
217
339
|
result = joined_df.select(coalesced_columns + other_columns)
|
|
340
|
+
|
|
218
341
|
else:
|
|
219
342
|
result = joined_df.drop(*(right for _, right in snowpark_using_columns))
|
|
343
|
+
# We never run into the disambiguation case unless it's a full outer join.
|
|
344
|
+
for i, (left_col, right_col) in enumerate(
|
|
345
|
+
disambiguated_snowpark_using_columns
|
|
346
|
+
):
|
|
347
|
+
# Only right side columns are hidden
|
|
348
|
+
hidden_col = HiddenColumn(
|
|
349
|
+
hidden_snowpark_name=right_col.getName(),
|
|
350
|
+
spark_name=case_corrected_right_columns[i],
|
|
351
|
+
visible_snowpark_name=left_col.getName(),
|
|
352
|
+
qualifiers=right_container.column_map.get_qualifier_for_spark_column(
|
|
353
|
+
case_corrected_right_columns[i]
|
|
354
|
+
),
|
|
355
|
+
original_position=right_container.column_map.get_spark_columns().index(
|
|
356
|
+
case_corrected_right_columns[i]
|
|
357
|
+
),
|
|
358
|
+
)
|
|
359
|
+
hidden_columns.add(hidden_col)
|
|
220
360
|
else:
|
|
221
361
|
if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
|
|
222
362
|
raise SparkException.implicit_cartesian_product("inner")
|
|
@@ -230,35 +370,110 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
230
370
|
# - LEFT SEMI JOIN: Returns left rows that have matches in right table (no right columns)
|
|
231
371
|
# - LEFT ANTI JOIN: Returns left rows that have NO matches in right table (no right columns)
|
|
232
372
|
# Both preserve only the columns from the left DataFrame without adding any columns from the right.
|
|
233
|
-
spark_cols_after_join
|
|
373
|
+
spark_cols_after_join = left_container.column_map.get_spark_columns()
|
|
374
|
+
snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
|
|
375
|
+
snowpark_col_types = [
|
|
376
|
+
f.datatype for f in left_container.dataframe.schema.fields
|
|
377
|
+
]
|
|
234
378
|
qualifiers = left_container.column_map.get_qualifiers()
|
|
379
|
+
elif join_type == "full_outer" and using_columns:
|
|
380
|
+
# We want the coalesced columns to be first, followed by all the left and right columns (excluding using columns)
|
|
381
|
+
spark_cols_after_join: list[str] = []
|
|
382
|
+
snowpark_cols_after_join: list[str] = []
|
|
383
|
+
snowpark_col_types: list[str] = []
|
|
384
|
+
|
|
385
|
+
left_container_snowpark_columns = (
|
|
386
|
+
left_container.column_map.get_snowpark_columns()
|
|
387
|
+
)
|
|
388
|
+
right_container_snowpark_columns = (
|
|
389
|
+
right_container.column_map.get_snowpark_columns()
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
qualifiers = []
|
|
393
|
+
for i in range(len(case_corrected_left_columns)):
|
|
394
|
+
spark_cols_after_join.append(case_corrected_left_columns[i])
|
|
395
|
+
snowpark_cols_after_join.append(using_columns_snowpark_names[i])
|
|
396
|
+
snowpark_col_types.append(using_columns_snowpark_types[i])
|
|
397
|
+
qualifiers.append([])
|
|
398
|
+
|
|
399
|
+
# Handle adding left and right columns, excluding the using columns
|
|
400
|
+
for i, spark_col in enumerate(left_container.column_map.get_spark_columns()):
|
|
401
|
+
if (
|
|
402
|
+
spark_col not in case_corrected_left_columns
|
|
403
|
+
or spark_col in left_container.column_map.get_spark_columns()[:i]
|
|
404
|
+
):
|
|
405
|
+
spark_cols_after_join.append(spark_col)
|
|
406
|
+
snowpark_cols_after_join.append(left_container_snowpark_columns[i])
|
|
407
|
+
qualifiers.append(
|
|
408
|
+
left_container.column_map.get_qualifier_for_spark_column(spark_col)
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
snowpark_col_types.append(
|
|
412
|
+
left_container.dataframe.schema.fields[i].datatype
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
|
|
416
|
+
if (
|
|
417
|
+
spark_col not in case_corrected_right_columns
|
|
418
|
+
or spark_col in right_container.column_map.get_spark_columns()[:i]
|
|
419
|
+
):
|
|
420
|
+
spark_cols_after_join.append(spark_col)
|
|
421
|
+
snowpark_cols_after_join.append(right_container_snowpark_columns[i])
|
|
422
|
+
qualifiers.append(
|
|
423
|
+
right_container.column_map.get_qualifier_for_spark_column(spark_col)
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
snowpark_col_types.append(
|
|
427
|
+
right_container.dataframe.schema.fields[i].datatype
|
|
428
|
+
)
|
|
429
|
+
|
|
235
430
|
else:
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
spark_col
|
|
241
|
-
for i, spark_col in enumerate(
|
|
242
|
-
right_container.column_map.get_spark_columns()
|
|
243
|
-
)
|
|
244
|
-
if spark_col not in case_corrected_right_columns
|
|
245
|
-
or spark_col
|
|
246
|
-
in right_container.column_map.get_spark_columns()[
|
|
247
|
-
:i
|
|
248
|
-
] # this is to make sure we only remove the column once
|
|
431
|
+
spark_cols_after_join = left_container.column_map.get_spark_columns()
|
|
432
|
+
snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
|
|
433
|
+
snowpark_col_types = [
|
|
434
|
+
f.datatype for f in left_container.dataframe.schema.fields
|
|
249
435
|
]
|
|
250
436
|
|
|
251
|
-
qualifiers =
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
437
|
+
qualifiers = left_container.column_map.get_qualifiers()
|
|
438
|
+
|
|
439
|
+
right_df_snowpark_columns = right_container.column_map.get_snowpark_columns()
|
|
440
|
+
|
|
441
|
+
for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
|
|
442
|
+
if (
|
|
443
|
+
spark_col not in case_corrected_right_columns
|
|
444
|
+
or spark_col in right_container.column_map.get_spark_columns()[:i]
|
|
445
|
+
):
|
|
446
|
+
spark_cols_after_join.append(spark_col)
|
|
447
|
+
snowpark_cols_after_join.append(right_df_snowpark_columns[i])
|
|
448
|
+
snowpark_col_types.append(
|
|
449
|
+
right_container.dataframe.schema.fields[i].datatype
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
qualifiers.append(
|
|
453
|
+
right_container.column_map.get_qualifier_for_spark_column(spark_col)
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
snowpark_cols_after_join_deduplicated = []
|
|
457
|
+
snowpark_cols_after_join_counter = Counter(snowpark_cols_after_join)
|
|
458
|
+
seen_duplicated_columns = set()
|
|
459
|
+
|
|
460
|
+
for col in snowpark_cols_after_join:
|
|
461
|
+
if snowpark_cols_after_join_counter[col] == 2:
|
|
462
|
+
# This means that the same column exists twice in the joined df, likely due to a self-join and
|
|
463
|
+
# we need to lsuffix and rsuffix to the names of both columns, similar to what Snowpark did under the hood.
|
|
464
|
+
|
|
465
|
+
suffix = (
|
|
466
|
+
DUPLICATED_JOIN_COL_RSUFFIX
|
|
467
|
+
if col in seen_duplicated_columns
|
|
468
|
+
else DUPLICATED_JOIN_COL_LSUFFIX
|
|
255
469
|
)
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
470
|
+
unquoted_col = unquote_if_quoted(col)
|
|
471
|
+
quoted = quote_name_without_upper_casing(unquoted_col + suffix)
|
|
472
|
+
snowpark_cols_after_join_deduplicated.append(quoted)
|
|
473
|
+
|
|
474
|
+
seen_duplicated_columns.add(col)
|
|
475
|
+
else:
|
|
476
|
+
snowpark_cols_after_join_deduplicated.append(col)
|
|
262
477
|
|
|
263
478
|
column_metadata = {}
|
|
264
479
|
if left_container.column_map.column_metadata:
|
|
@@ -287,33 +502,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
287
502
|
result_container = DataFrameContainer.create_with_column_mapping(
|
|
288
503
|
dataframe=result,
|
|
289
504
|
spark_column_names=spark_cols_after_join,
|
|
290
|
-
snowpark_column_names=
|
|
505
|
+
snowpark_column_names=snowpark_cols_after_join_deduplicated,
|
|
291
506
|
column_metadata=column_metadata,
|
|
292
507
|
column_qualifiers=qualifiers,
|
|
508
|
+
hidden_columns=hidden_columns,
|
|
509
|
+
snowpark_column_types=snowpark_col_types,
|
|
293
510
|
)
|
|
294
511
|
|
|
295
|
-
# Fix for USING join column references with different plan IDs
|
|
296
|
-
# After a USING join, references to the right dataframe's columns should resolve
|
|
297
|
-
# to the result dataframe that contains the merged columns
|
|
298
|
-
if (
|
|
299
|
-
using_columns
|
|
300
|
-
and rel.join.right.HasField("common")
|
|
301
|
-
and rel.join.right.common.HasField("plan_id")
|
|
302
|
-
):
|
|
303
|
-
right_plan_id = rel.join.right.common.plan_id
|
|
304
|
-
set_plan_id_map(right_plan_id, result_container)
|
|
305
|
-
|
|
306
|
-
# For FULL OUTER joins, we also need to map the left dataframe's plan_id
|
|
307
|
-
# since both columns are replaced with a coalesced column
|
|
308
|
-
if (
|
|
309
|
-
using_columns
|
|
310
|
-
and join_type == "full_outer"
|
|
311
|
-
and rel.join.left.HasField("common")
|
|
312
|
-
and rel.join.left.common.HasField("plan_id")
|
|
313
|
-
):
|
|
314
|
-
left_plan_id = rel.join.left.common.plan_id
|
|
315
|
-
set_plan_id_map(left_plan_id, result_container)
|
|
316
|
-
|
|
317
512
|
if rel.join.using_columns:
|
|
318
513
|
# When join 'using_columns', the 'join columns' should go first in result DF.
|
|
319
514
|
idxs_to_shift = [
|
|
@@ -345,6 +540,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
|
|
|
345
540
|
cached_schema_getter=lambda: snowpark.types.StructType(
|
|
346
541
|
reorder(original_df.schema.fields)
|
|
347
542
|
),
|
|
543
|
+
hidden_columns=hidden_columns,
|
|
348
544
|
)
|
|
349
545
|
|
|
350
546
|
return result_container
|