snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +116 -4
- snowflake/snowpark_connect/config.py +13 -0
- snowflake/snowpark_connect/constants.py +0 -29
- snowflake/snowpark_connect/dataframe_container.py +6 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
- snowflake/snowpark_connect/expression/function_defaults.py +207 -0
- snowflake/snowpark_connect/expression/literal.py +18 -2
- snowflake/snowpark_connect/expression/map_cast.py +5 -8
- snowflake/snowpark_connect/expression/map_expression.py +10 -1
- snowflake/snowpark_connect/expression/map_extension.py +12 -2
- snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
- snowflake/snowpark_connect/expression/map_udf.py +26 -8
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
- snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
- snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
- snowflake/snowpark_connect/hidden_column.py +39 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
- snowflake/snowpark_connect/relation/map_extension.py +56 -15
- snowflake/snowpark_connect/relation/map_join.py +258 -62
- snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
- snowflake/snowpark_connect/relation/map_sql.py +88 -11
- snowflake/snowpark_connect/relation/map_udtf.py +4 -2
- snowflake/snowpark_connect/relation/read/map_read.py +3 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
- snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
- snowflake/snowpark_connect/relation/read/utils.py +6 -7
- snowflake/snowpark_connect/relation/utils.py +1 -170
- snowflake/snowpark_connect/relation/write/map_write.py +62 -53
- snowflake/snowpark_connect/resources_initializer.py +29 -1
- snowflake/snowpark_connect/server.py +18 -3
- snowflake/snowpark_connect/type_mapping.py +29 -25
- snowflake/snowpark_connect/typed_column.py +14 -0
- snowflake/snowpark_connect/utils/artifacts.py +23 -0
- snowflake/snowpark_connect/utils/context.py +6 -1
- snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
- snowflake/snowpark_connect/utils/telemetry.py +6 -17
- snowflake/snowpark_connect/utils/udf_helper.py +2 -0
- snowflake/snowpark_connect/utils/udf_utils.py +38 -7
- snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
- snowpark_connect-0.25.0.dist-info/RECORD +477 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
- snowpark_connect-0.23.0.dist-info/RECORD +0 -893
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
|
@@ -65,7 +65,7 @@ def map_read(
|
|
|
65
65
|
read_format = "parquet"
|
|
66
66
|
|
|
67
67
|
if read_format.lower() == "iceberg":
|
|
68
|
-
telemetry.report_io_read("iceberg"
|
|
68
|
+
telemetry.report_io_read("iceberg")
|
|
69
69
|
return map_read_table(rel)
|
|
70
70
|
|
|
71
71
|
if rel.read.data_source.schema == "":
|
|
@@ -90,7 +90,7 @@ def map_read(
|
|
|
90
90
|
)
|
|
91
91
|
schema = map_json_schema_to_snowpark(parsed_schema)
|
|
92
92
|
options = dict(rel.read.data_source.options)
|
|
93
|
-
telemetry.report_io_read(read_format
|
|
93
|
+
telemetry.report_io_read(read_format)
|
|
94
94
|
session: snowpark.Session = get_or_create_snowpark_session()
|
|
95
95
|
if len(rel.read.data_source.paths) > 0:
|
|
96
96
|
# Normalize paths to ensure consistent behavior
|
|
@@ -175,7 +175,7 @@ def map_read_table_or_file(rel):
|
|
|
175
175
|
rel.read.named_table.unparsed_identifier
|
|
176
176
|
)
|
|
177
177
|
options = {}
|
|
178
|
-
telemetry.report_io_read(read_format
|
|
178
|
+
telemetry.report_io_read(read_format)
|
|
179
179
|
session: snowpark.Session = get_or_create_snowpark_session()
|
|
180
180
|
|
|
181
181
|
clean_source_paths = [
|
|
@@ -94,7 +94,7 @@ def map_read_jdbc(
|
|
|
94
94
|
num_partitions=num_partitions,
|
|
95
95
|
predicates=predicates,
|
|
96
96
|
)
|
|
97
|
-
true_names = list(map(lambda x: unquote_if_quoted(x)
|
|
97
|
+
true_names = list(map(lambda x: unquote_if_quoted(x), df.columns))
|
|
98
98
|
renamed_df, snowpark_cols = rename_columns_as_snowflake_standard(
|
|
99
99
|
df, rel.common.plan_id
|
|
100
100
|
)
|
|
@@ -34,6 +34,7 @@ from snowflake.snowpark_connect.type_mapping import (
|
|
|
34
34
|
cast_to_match_snowpark_type,
|
|
35
35
|
map_simple_types,
|
|
36
36
|
)
|
|
37
|
+
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
37
38
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
38
39
|
SnowparkConnectNotImplementedError,
|
|
39
40
|
)
|
|
@@ -66,6 +67,7 @@ def map_read_json(
|
|
|
66
67
|
|
|
67
68
|
rows_to_infer_schema = snowpark_options.pop("rowstoinferschema", 1000)
|
|
68
69
|
dropFieldIfAllNull = snowpark_options.pop("dropfieldifallnull", False)
|
|
70
|
+
batch_size = snowpark_options.pop("batchsize", 1000)
|
|
69
71
|
|
|
70
72
|
reader = session.read.options(snowpark_options)
|
|
71
73
|
|
|
@@ -99,7 +101,7 @@ def map_read_json(
|
|
|
99
101
|
]
|
|
100
102
|
|
|
101
103
|
df = construct_dataframe_by_schema(
|
|
102
|
-
schema, df.to_local_iterator(), session, snowpark_options
|
|
104
|
+
schema, df.to_local_iterator(), session, snowpark_options, batch_size
|
|
103
105
|
)
|
|
104
106
|
|
|
105
107
|
spark_column_names = get_spark_column_names_from_snowpark_columns(df.columns)
|
|
@@ -277,9 +279,11 @@ def construct_dataframe_by_schema(
|
|
|
277
279
|
result = None
|
|
278
280
|
|
|
279
281
|
current_data = []
|
|
282
|
+
progress = 0
|
|
280
283
|
for row in rows:
|
|
281
284
|
current_data.append(construct_row_by_schema(row, schema, snowpark_options))
|
|
282
285
|
if len(current_data) >= batch_size:
|
|
286
|
+
progress += len(current_data)
|
|
283
287
|
result = union_data_into_df(
|
|
284
288
|
result,
|
|
285
289
|
current_data,
|
|
@@ -287,9 +291,11 @@ def construct_dataframe_by_schema(
|
|
|
287
291
|
session,
|
|
288
292
|
)
|
|
289
293
|
|
|
294
|
+
logger.info(f"JSON reader: finished processing {progress} rows")
|
|
290
295
|
current_data = []
|
|
291
296
|
|
|
292
297
|
if len(current_data) > 0:
|
|
298
|
+
progress += len(current_data)
|
|
293
299
|
result = union_data_into_df(
|
|
294
300
|
result,
|
|
295
301
|
current_data,
|
|
@@ -297,6 +303,7 @@ def construct_dataframe_by_schema(
|
|
|
297
303
|
session,
|
|
298
304
|
)
|
|
299
305
|
|
|
306
|
+
logger.info(f"JSON reader: finished processing {progress} rows")
|
|
300
307
|
current_data = []
|
|
301
308
|
|
|
302
309
|
if result is None:
|
|
@@ -11,7 +11,6 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
11
11
|
unquote_if_quoted,
|
|
12
12
|
)
|
|
13
13
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
14
|
-
from snowflake.snowpark_connect.column_name_handler import ALREADY_QUOTED
|
|
15
14
|
from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
|
|
16
15
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
17
16
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
@@ -29,15 +28,8 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
29
28
|
def post_process_df(
|
|
30
29
|
df: snowpark.DataFrame, plan_id: int, source_table_name: str = None
|
|
31
30
|
) -> DataFrameContainer:
|
|
32
|
-
def _lower_or_unquote(string):
|
|
33
|
-
return (
|
|
34
|
-
string[1:-1].replace('""', '"')
|
|
35
|
-
if ALREADY_QUOTED.match(string)
|
|
36
|
-
else string.lower()
|
|
37
|
-
)
|
|
38
|
-
|
|
39
31
|
try:
|
|
40
|
-
true_names = list(map(lambda x:
|
|
32
|
+
true_names = list(map(lambda x: unquote_if_quoted(x), df.columns))
|
|
41
33
|
renamed_df, snowpark_column_names = rename_columns_as_snowflake_standard(
|
|
42
34
|
df, plan_id
|
|
43
35
|
)
|
|
@@ -317,6 +317,7 @@ class JsonReaderConfig(ReaderWriterConfig):
|
|
|
317
317
|
# TODO: modifiedAfter: Union[bool, str, None] = None,
|
|
318
318
|
# TODO: allowNonNumericNumbers: Union[bool, str, None] = None,
|
|
319
319
|
"rowsToInferSchema": 1000,
|
|
320
|
+
"batchSize": 1000,
|
|
320
321
|
},
|
|
321
322
|
supported_options={
|
|
322
323
|
"schema",
|
|
@@ -347,12 +348,13 @@ class JsonReaderConfig(ReaderWriterConfig):
|
|
|
347
348
|
# "ignoreNullFields",
|
|
348
349
|
"rowsToInferSchema",
|
|
349
350
|
# "inferTimestamp",
|
|
351
|
+
"batchSize",
|
|
350
352
|
},
|
|
351
353
|
boolean_config_list=[
|
|
352
354
|
"multiLine",
|
|
353
355
|
"dropFieldIfAllNull",
|
|
354
356
|
],
|
|
355
|
-
int_config_list=["rowsToInferSchema"],
|
|
357
|
+
int_config_list=["rowsToInferSchema", "batchSize"],
|
|
356
358
|
float_config_list=["samplingRatio"],
|
|
357
359
|
),
|
|
358
360
|
options,
|
|
@@ -73,13 +73,12 @@ def rename_columns_as_snowflake_standard(
|
|
|
73
73
|
return df, []
|
|
74
74
|
|
|
75
75
|
new_columns = make_column_names_snowpark_compatible(df.columns, plan_id)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
return (result, new_columns)
|
|
76
|
+
return (
|
|
77
|
+
df.select(
|
|
78
|
+
*(df.col(orig).alias(alias) for orig, alias in zip(df.columns, new_columns))
|
|
79
|
+
),
|
|
80
|
+
new_columns,
|
|
81
|
+
)
|
|
83
82
|
|
|
84
83
|
|
|
85
84
|
class Connection(Protocol):
|
|
@@ -6,28 +6,12 @@ import random
|
|
|
6
6
|
import re
|
|
7
7
|
import string
|
|
8
8
|
import time
|
|
9
|
-
from
|
|
10
|
-
from typing import AbstractSet, List, Optional, Sequence
|
|
9
|
+
from typing import Sequence
|
|
11
10
|
|
|
12
11
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
13
12
|
|
|
14
13
|
import snowflake.snowpark.functions as snowpark_fn
|
|
15
14
|
from snowflake import snowpark
|
|
16
|
-
from snowflake.snowpark._internal.analyzer.expression import (
|
|
17
|
-
COLUMN_DEPENDENCY_ALL,
|
|
18
|
-
COLUMN_DEPENDENCY_DOLLAR,
|
|
19
|
-
Expression,
|
|
20
|
-
FunctionExpression,
|
|
21
|
-
derive_dependent_columns,
|
|
22
|
-
)
|
|
23
|
-
from snowflake.snowpark._internal.analyzer.select_statement import (
|
|
24
|
-
SEQUENCE_DEPENDENT_DATA_GENERATION,
|
|
25
|
-
ColumnChangeState,
|
|
26
|
-
ColumnStateDict,
|
|
27
|
-
SelectStatement,
|
|
28
|
-
)
|
|
29
|
-
from snowflake.snowpark._internal.analyzer.unary_expression import Alias
|
|
30
|
-
from snowflake.snowpark._internal.analyzer.window_expression import WindowExpression
|
|
31
15
|
from snowflake.snowpark.types import (
|
|
32
16
|
BinaryType,
|
|
33
17
|
BooleanType,
|
|
@@ -239,156 +223,3 @@ def snowpark_functions_col(name: str, column_map: ColumnNameMap) -> snowpark.Col
|
|
|
239
223
|
"""
|
|
240
224
|
is_qualified_name = name not in column_map.get_snowpark_columns()
|
|
241
225
|
return snowpark_fn.col(name, _is_qualified_name=is_qualified_name)
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
def can_sort_be_flattened(
|
|
245
|
-
select_statement: Optional[SelectStatement], *sort_expressions: Optional[Expression]
|
|
246
|
-
) -> bool:
|
|
247
|
-
"""
|
|
248
|
-
Checks if the given SelectStatement can be "flattened" when sorting with regard to the given sort expressions.
|
|
249
|
-
Flattening means that the given SelectStatement can be enhanced and reused instead of being treated
|
|
250
|
-
as a subquery in the FROM clause after a "sort" or "filter" operation. Flattening allows accessing dropped columns
|
|
251
|
-
for sort and filter expressions.
|
|
252
|
-
"""
|
|
253
|
-
if not select_statement or select_statement.flatten_disabled:
|
|
254
|
-
return False
|
|
255
|
-
|
|
256
|
-
# In some cases, flattening sort can lead to leaving the "order by" clause in a subquery,
|
|
257
|
-
# which can cause incorrect ordering. We want to avoid flattening sort when all its dependent columns
|
|
258
|
-
# are available in the current projection.
|
|
259
|
-
dependent_columns_in_sort = derive_dependent_columns(*sort_expressions)
|
|
260
|
-
columns_in_projection = _get_columns_in_projection(select_statement.projection)
|
|
261
|
-
if len(dependent_columns_in_sort - columns_in_projection) == 0:
|
|
262
|
-
return False
|
|
263
|
-
|
|
264
|
-
return _can_clause_dependent_columns_flatten(
|
|
265
|
-
dependent_columns_in_sort, select_statement.column_states
|
|
266
|
-
) and not _has_data_generator_exp(select_statement.projection)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def can_filter_be_flattened(
|
|
270
|
-
select_statement: Optional[SelectStatement], condition: Expression
|
|
271
|
-
) -> bool:
|
|
272
|
-
"""
|
|
273
|
-
Checks if the given SelectStatement can be "flattened" when filtering with regard to the given condition.
|
|
274
|
-
Flattening means that the given SelectStatement can be enhanced and reused instead of being treated
|
|
275
|
-
as a subquery in the FROM clause after a "sort" or "filter" operation. Flattening allows accessing dropped columns
|
|
276
|
-
for sort and filter expressions.
|
|
277
|
-
"""
|
|
278
|
-
if not select_statement or select_statement.flatten_disabled:
|
|
279
|
-
return False
|
|
280
|
-
|
|
281
|
-
return all(
|
|
282
|
-
[
|
|
283
|
-
_can_clause_dependent_columns_flatten(
|
|
284
|
-
derive_dependent_columns(condition), select_statement.column_states
|
|
285
|
-
),
|
|
286
|
-
not _has_data_generator_or_window_exp(select_statement.projection),
|
|
287
|
-
select_statement.order_by is None,
|
|
288
|
-
select_statement.limit_ is None,
|
|
289
|
-
]
|
|
290
|
-
)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
def _get_columns_in_projection(
|
|
294
|
-
projection: Optional[List[Expression]],
|
|
295
|
-
) -> AbstractSet[str]:
|
|
296
|
-
if projection is None:
|
|
297
|
-
return set()
|
|
298
|
-
|
|
299
|
-
columns = set()
|
|
300
|
-
for expression in projection:
|
|
301
|
-
if hasattr(expression, "name") and expression.name:
|
|
302
|
-
columns.add(expression.name)
|
|
303
|
-
elif hasattr(expression, "children"):
|
|
304
|
-
columns.update(_get_columns_in_projection(expression.children))
|
|
305
|
-
|
|
306
|
-
return columns
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
def _is_self_alias(expression):
|
|
310
|
-
"""
|
|
311
|
-
Check if the expression is a self-alias, meaning it has an alias that is the same as its name.
|
|
312
|
-
A self-alias can be flattened, even if Snowpark treats it as a CHANGED_EXP.
|
|
313
|
-
"""
|
|
314
|
-
if not isinstance(expression, Alias):
|
|
315
|
-
return False
|
|
316
|
-
|
|
317
|
-
first_child_with_name = expression.child
|
|
318
|
-
while (
|
|
319
|
-
first_child_with_name
|
|
320
|
-
and hasattr(first_child_with_name, "child")
|
|
321
|
-
and not hasattr(first_child_with_name, "name")
|
|
322
|
-
):
|
|
323
|
-
first_child_with_name = first_child_with_name.child
|
|
324
|
-
|
|
325
|
-
return (
|
|
326
|
-
first_child_with_name
|
|
327
|
-
and (first_child_with_name, "name")
|
|
328
|
-
and first_child_with_name.name == expression.name
|
|
329
|
-
)
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
def _can_clause_dependent_columns_flatten(
|
|
333
|
-
dependent_columns: Optional[AbstractSet[str]],
|
|
334
|
-
subquery_column_states: ColumnStateDict,
|
|
335
|
-
) -> bool:
|
|
336
|
-
if dependent_columns == COLUMN_DEPENDENCY_DOLLAR:
|
|
337
|
-
return False
|
|
338
|
-
elif (
|
|
339
|
-
subquery_column_states.has_changed_columns
|
|
340
|
-
or subquery_column_states.has_new_columns
|
|
341
|
-
):
|
|
342
|
-
if dependent_columns == COLUMN_DEPENDENCY_ALL:
|
|
343
|
-
return False
|
|
344
|
-
|
|
345
|
-
assert dependent_columns is not None
|
|
346
|
-
for dc in dependent_columns:
|
|
347
|
-
dc_state = subquery_column_states.get(dc)
|
|
348
|
-
if dc_state:
|
|
349
|
-
if (
|
|
350
|
-
dc_state.change_state == ColumnChangeState.CHANGED_EXP
|
|
351
|
-
and not _is_self_alias(dc_state.expression)
|
|
352
|
-
):
|
|
353
|
-
return False
|
|
354
|
-
return True
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
def _has_data_generator_exp(expressions: List[Expression]) -> bool:
|
|
358
|
-
return _has_expression(expressions, [_is_generator_expression])
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
def _has_data_generator_or_window_exp(expressions: List[Expression]) -> bool:
|
|
362
|
-
return _has_expression(
|
|
363
|
-
expressions, [_is_generator_expression, _is_window_expression]
|
|
364
|
-
)
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
def _has_expression(
|
|
368
|
-
expressions: Optional[List[Expression]], checks: List[Callable[[Expression], bool]]
|
|
369
|
-
) -> bool:
|
|
370
|
-
if expressions is None:
|
|
371
|
-
return False
|
|
372
|
-
|
|
373
|
-
for exp in expressions:
|
|
374
|
-
if not exp:
|
|
375
|
-
continue
|
|
376
|
-
|
|
377
|
-
if any([check(exp) for check in checks]):
|
|
378
|
-
return True
|
|
379
|
-
|
|
380
|
-
if _has_expression(exp.children, checks):
|
|
381
|
-
return True
|
|
382
|
-
|
|
383
|
-
return False
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
def _is_window_expression(exp: Expression) -> bool:
|
|
387
|
-
return isinstance(exp, WindowExpression)
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
def _is_generator_expression(exp: Expression) -> bool:
|
|
391
|
-
# https://docs.snowflake.com/en/sql-reference/functions-data-generation
|
|
392
|
-
return isinstance(exp, FunctionExpression) and (
|
|
393
|
-
exp.is_data_generator or exp.name.lower() in SEQUENCE_DEPENDENT_DATA_GENERATION
|
|
394
|
-
)
|
|
@@ -107,10 +107,7 @@ def _spark_to_snowflake(multipart_id: str) -> str:
|
|
|
107
107
|
|
|
108
108
|
def map_write(request: proto_base.ExecutePlanRequest):
|
|
109
109
|
write_op = request.plan.command.write_operation
|
|
110
|
-
|
|
111
|
-
telemetry.report_io_write(write_op.source, dict(write_op.options))
|
|
112
|
-
else:
|
|
113
|
-
telemetry.report_io_write(write_op.source)
|
|
110
|
+
telemetry.report_io_write(write_op.source)
|
|
114
111
|
|
|
115
112
|
write_mode = None
|
|
116
113
|
match write_op.mode:
|
|
@@ -234,10 +231,11 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
234
231
|
column_order=_column_order_for_write,
|
|
235
232
|
)
|
|
236
233
|
case "append":
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
234
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
235
|
+
# if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
236
|
+
# raise AnalysisException(
|
|
237
|
+
# f"Table {snowpark_table_name} is not an iceberg table"
|
|
238
|
+
# )
|
|
241
239
|
_validate_schema_and_get_writer(
|
|
242
240
|
input_df, "append", snowpark_table_name
|
|
243
241
|
).saveAsTable(
|
|
@@ -264,10 +262,12 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
264
262
|
)
|
|
265
263
|
case "overwrite":
|
|
266
264
|
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
265
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
266
|
+
# if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
267
|
+
# raise AnalysisException(
|
|
268
|
+
# f"Table {snowpark_table_name} is not an iceberg table"
|
|
269
|
+
# )
|
|
270
|
+
pass
|
|
271
271
|
else:
|
|
272
272
|
create_iceberg_table(
|
|
273
273
|
snowpark_table_name=snowpark_table_name,
|
|
@@ -298,13 +298,14 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
298
298
|
if check_snowflake_table_existence(
|
|
299
299
|
snowpark_table_name, session
|
|
300
300
|
):
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
301
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
302
|
+
# if (
|
|
303
|
+
# check_table_type(snowpark_table_name, session)
|
|
304
|
+
# != "TABLE"
|
|
305
|
+
# ):
|
|
306
|
+
# raise AnalysisException(
|
|
307
|
+
# f"Table {snowpark_table_name} is not a FDN table"
|
|
308
|
+
# )
|
|
308
309
|
write_mode = "truncate"
|
|
309
310
|
_validate_schema_and_get_writer(
|
|
310
311
|
input_df, write_mode, snowpark_table_name
|
|
@@ -314,10 +315,11 @@ def map_write(request: proto_base.ExecutePlanRequest):
|
|
|
314
315
|
column_order=_column_order_for_write,
|
|
315
316
|
)
|
|
316
317
|
case "append":
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
318
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
319
|
+
# if check_table_type(snowpark_table_name, session) != "TABLE":
|
|
320
|
+
# raise AnalysisException(
|
|
321
|
+
# f"Table {snowpark_table_name} is not a FDN table"
|
|
322
|
+
# )
|
|
321
323
|
_validate_schema_and_get_writer(
|
|
322
324
|
input_df, write_mode, snowpark_table_name
|
|
323
325
|
).saveAsTable(
|
|
@@ -388,10 +390,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
388
390
|
raise AnalysisException(
|
|
389
391
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
390
392
|
)
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
393
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
394
|
+
# if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
395
|
+
# raise AnalysisException(
|
|
396
|
+
# f"Table {snowpark_table_name} is not an iceberg table"
|
|
397
|
+
# )
|
|
395
398
|
_validate_schema_and_get_writer(
|
|
396
399
|
input_df, "append", snowpark_table_name
|
|
397
400
|
).saveAsTable(
|
|
@@ -402,10 +405,12 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
402
405
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
403
406
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
404
407
|
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
408
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
409
|
+
# if check_table_type(snowpark_table_name, session) != "ICEBERG":
|
|
410
|
+
# raise AnalysisException(
|
|
411
|
+
# f"Table {snowpark_table_name} is not an iceberg table"
|
|
412
|
+
# )
|
|
413
|
+
pass
|
|
409
414
|
else:
|
|
410
415
|
raise AnalysisException(
|
|
411
416
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
@@ -471,10 +476,11 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
471
476
|
raise AnalysisException(
|
|
472
477
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view `{write_op.table_name}` cannot be found."
|
|
473
478
|
)
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
479
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
480
|
+
# if check_table_type(snowpark_table_name, session) != "TABLE":
|
|
481
|
+
# raise AnalysisException(
|
|
482
|
+
# f"Table {snowpark_table_name} is not a FDN table"
|
|
483
|
+
# )
|
|
478
484
|
_validate_schema_and_get_writer(
|
|
479
485
|
input_df, "append", snowpark_table_name
|
|
480
486
|
).saveAsTable(
|
|
@@ -485,10 +491,12 @@ def map_write_v2(request: proto_base.ExecutePlanRequest):
|
|
|
485
491
|
case commands_proto.WriteOperationV2.MODE_OVERWRITE | commands_proto.WriteOperationV2.MODE_OVERWRITE_PARTITIONS:
|
|
486
492
|
# TODO: handle the filter condition for MODE_OVERWRITE
|
|
487
493
|
if check_snowflake_table_existence(snowpark_table_name, session):
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
494
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
495
|
+
# if check_table_type(snowpark_table_name, session) != "TABLE":
|
|
496
|
+
# raise AnalysisException(
|
|
497
|
+
# f"Table {snowpark_table_name} is not a FDN table"
|
|
498
|
+
# )
|
|
499
|
+
pass
|
|
492
500
|
else:
|
|
493
501
|
raise AnalysisException(
|
|
494
502
|
f"[TABLE_OR_VIEW_NOT_FOUND] Table {snowpark_table_name} does not exist"
|
|
@@ -798,17 +806,18 @@ def check_snowflake_table_existence(
|
|
|
798
806
|
return False
|
|
799
807
|
|
|
800
808
|
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
809
|
+
# TODO: SNOW-2299414 Fix the implementation of table type check
|
|
810
|
+
# def check_table_type(
|
|
811
|
+
# snowpark_table_name: str,
|
|
812
|
+
# snowpark_session: snowpark.Session,
|
|
813
|
+
# ) -> str:
|
|
814
|
+
# # currently we only support iceberg table and FDN table
|
|
815
|
+
# metadata = snowpark_session.sql(
|
|
816
|
+
# f"SHOW TABLES LIKE '{unquote_if_quoted(snowpark_table_name)}';"
|
|
817
|
+
# ).collect()
|
|
818
|
+
# if metadata is None or len(metadata) == 0:
|
|
819
|
+
# raise AnalysisException(f"Table {snowpark_table_name} does not exist")
|
|
820
|
+
# metadata = metadata[0]
|
|
821
|
+
# if metadata.as_dict().get("is_iceberg") == "Y":
|
|
822
|
+
# return "ICEBERG"
|
|
823
|
+
# return "TABLE"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
3
|
#
|
|
4
|
-
|
|
4
|
+
import pathlib
|
|
5
5
|
import threading
|
|
6
6
|
import time
|
|
7
7
|
|
|
@@ -9,6 +9,8 @@ from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_sess
|
|
|
9
9
|
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
10
10
|
|
|
11
11
|
_resources_initialized = threading.Event()
|
|
12
|
+
SPARK_VERSION = "3.5.6"
|
|
13
|
+
RESOURCE_PATH = "/snowflake/snowpark_connect/resources"
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
def initialize_resources() -> None:
|
|
@@ -41,6 +43,31 @@ def initialize_resources() -> None:
|
|
|
41
43
|
|
|
42
44
|
session.sql("select 1 as sf_connection_warm_up").collect()
|
|
43
45
|
|
|
46
|
+
def upload_scala_udf_jars() -> None:
|
|
47
|
+
"""Upload Spark jar files required for creating Scala UDFs."""
|
|
48
|
+
stage = session.get_session_stage()
|
|
49
|
+
resource_path = stage + RESOURCE_PATH
|
|
50
|
+
import snowflake
|
|
51
|
+
|
|
52
|
+
pyspark_jars = (
|
|
53
|
+
pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
|
|
54
|
+
)
|
|
55
|
+
jar_files = [
|
|
56
|
+
f"spark-sql_2.12-{SPARK_VERSION}.jar",
|
|
57
|
+
f"spark-connect-client-jvm_2.12-{SPARK_VERSION}.jar",
|
|
58
|
+
f"spark-common-utils_2.12-{SPARK_VERSION}.jar",
|
|
59
|
+
"json4s-ast_2.12-3.7.0-M11.jar",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
for jar in jar_files:
|
|
63
|
+
session.file.put(
|
|
64
|
+
str(pyspark_jars) + "/" + jar,
|
|
65
|
+
resource_path,
|
|
66
|
+
auto_compress=False,
|
|
67
|
+
overwrite=False,
|
|
68
|
+
source_compression="NONE",
|
|
69
|
+
)
|
|
70
|
+
|
|
44
71
|
start_time = time.time()
|
|
45
72
|
|
|
46
73
|
resources = [
|
|
@@ -49,6 +76,7 @@ def initialize_resources() -> None:
|
|
|
49
76
|
("Initialize Session Stage", initialize_session_stage), # Takes about 0.3s
|
|
50
77
|
("Initialize Session Catalog", initialize_catalog), # Takes about 1.2s
|
|
51
78
|
("Snowflake Connection Warm Up", warm_up_sf_connection), # Takes about 1s
|
|
79
|
+
("Upload Scala UDF Jars", upload_scala_udf_jars),
|
|
52
80
|
]
|
|
53
81
|
|
|
54
82
|
for name, resource_func in resources:
|
|
@@ -68,10 +68,14 @@ from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
|
68
68
|
from snowflake.snowpark_connect.relation.utils import get_semantic_string
|
|
69
69
|
from snowflake.snowpark_connect.resources_initializer import initialize_resources_async
|
|
70
70
|
from snowflake.snowpark_connect.type_mapping import (
|
|
71
|
-
|
|
71
|
+
map_type_string_to_proto,
|
|
72
72
|
snowpark_to_proto_type,
|
|
73
73
|
)
|
|
74
|
-
from snowflake.snowpark_connect.utils.artifacts import
|
|
74
|
+
from snowflake.snowpark_connect.utils.artifacts import (
|
|
75
|
+
check_checksum,
|
|
76
|
+
write_artifact,
|
|
77
|
+
write_class_files_to_stage,
|
|
78
|
+
)
|
|
75
79
|
from snowflake.snowpark_connect.utils.cache import (
|
|
76
80
|
df_cache_map_get,
|
|
77
81
|
df_cache_map_pop,
|
|
@@ -249,7 +253,9 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
249
253
|
return proto_base.AnalyzePlanResponse(
|
|
250
254
|
session_id=request.session_id,
|
|
251
255
|
ddl_parse=proto_base.AnalyzePlanResponse.DDLParse(
|
|
252
|
-
parsed=
|
|
256
|
+
parsed=map_type_string_to_proto(
|
|
257
|
+
request.ddl_parse.ddl_string
|
|
258
|
+
)
|
|
253
259
|
),
|
|
254
260
|
)
|
|
255
261
|
case "get_storage_level":
|
|
@@ -516,7 +522,13 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
516
522
|
for name, data in cache_data.items():
|
|
517
523
|
_try_handle_local_relation(name, bytes(data))
|
|
518
524
|
|
|
525
|
+
class_files: dict[str, str] = {}
|
|
519
526
|
for (name, filepath) in filenames.items():
|
|
527
|
+
if name.endswith(".class"):
|
|
528
|
+
# name is <dir>/<package>/<class_name>
|
|
529
|
+
# we don't need the dir name, but require the package, so only remove dir
|
|
530
|
+
class_files[name.split("/", 1)[-1]] = filepath
|
|
531
|
+
continue
|
|
520
532
|
session.file.put(
|
|
521
533
|
filepath,
|
|
522
534
|
session.get_session_stage(),
|
|
@@ -541,6 +553,9 @@ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
|
541
553
|
if not name.startswith("pyfiles"):
|
|
542
554
|
session._import_files.add(cached_name)
|
|
543
555
|
|
|
556
|
+
if class_files:
|
|
557
|
+
write_class_files_to_stage(session, class_files)
|
|
558
|
+
|
|
544
559
|
return proto_base.AddArtifactsResponse(artifacts=list(response.values()))
|
|
545
560
|
|
|
546
561
|
def ArtifactStatus(self, request, context):
|