snowpark-connect 0.24.0__py3-none-any.whl → 0.26.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +116 -4
- snowflake/snowpark_connect/config.py +23 -0
- snowflake/snowpark_connect/constants.py +0 -29
- snowflake/snowpark_connect/dataframe_container.py +22 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
- snowflake/snowpark_connect/expression/literal.py +13 -2
- snowflake/snowpark_connect/expression/map_cast.py +5 -8
- snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
- snowflake/snowpark_connect/expression/map_udf.py +88 -29
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
- snowflake/snowpark_connect/expression/map_unresolved_function.py +840 -367
- snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
- snowflake/snowpark_connect/hidden_column.py +39 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
- snowflake/snowpark_connect/relation/map_extension.py +52 -11
- snowflake/snowpark_connect/relation/map_join.py +258 -62
- snowflake/snowpark_connect/relation/map_map_partitions.py +9 -4
- snowflake/snowpark_connect/relation/map_relation.py +12 -1
- snowflake/snowpark_connect/relation/map_row_ops.py +8 -1
- snowflake/snowpark_connect/relation/map_sql.py +88 -11
- snowflake/snowpark_connect/relation/map_udtf.py +100 -46
- snowflake/snowpark_connect/relation/read/map_read.py +3 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
- snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
- snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
- snowflake/snowpark_connect/relation/utils.py +44 -0
- snowflake/snowpark_connect/relation/write/map_write.py +175 -75
- snowflake/snowpark_connect/resources_initializer.py +47 -6
- snowflake/snowpark_connect/server.py +26 -4
- snowflake/snowpark_connect/type_mapping.py +29 -25
- snowflake/snowpark_connect/typed_column.py +14 -0
- snowflake/snowpark_connect/utils/artifacts.py +23 -0
- snowflake/snowpark_connect/utils/concurrent.py +4 -0
- snowflake/snowpark_connect/utils/context.py +6 -1
- snowflake/snowpark_connect/utils/external_udxf_cache.py +36 -0
- snowflake/snowpark_connect/utils/scala_udf_utils.py +596 -0
- snowflake/snowpark_connect/utils/session.py +4 -0
- snowflake/snowpark_connect/utils/telemetry.py +6 -17
- snowflake/snowpark_connect/utils/udf_helper.py +2 -0
- snowflake/snowpark_connect/utils/udf_utils.py +22 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +1 -0
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/METADATA +1 -1
- snowpark_connect-0.26.0.dist-info/RECORD +481 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
- snowpark_connect-0.24.0.dist-info/RECORD +0 -898
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/top_level.txt +0 -0
|
@@ -90,6 +90,7 @@ def map_relation(
|
|
|
90
90
|
table_name=copy.deepcopy(cached_container.table_name),
|
|
91
91
|
alias=cached_container.alias,
|
|
92
92
|
cached_schema_getter=lambda: cached_df.schema,
|
|
93
|
+
partition_hint=cached_container.partition_hint,
|
|
93
94
|
)
|
|
94
95
|
# If we don't make a copy of the df._output, the expression IDs for attributes in Snowpark DataFrames will differ from those stored in the cache,
|
|
95
96
|
# leading to errors during query execution.
|
|
@@ -189,13 +190,23 @@ def map_relation(
|
|
|
189
190
|
case "read":
|
|
190
191
|
result = read.map_read(rel)
|
|
191
192
|
case "repartition":
|
|
192
|
-
#
|
|
193
|
+
# Preserve partition hint for file output control
|
|
194
|
+
# This handles both repartition(n) with shuffle=True and coalesce(n) with shuffle=False
|
|
193
195
|
result = map_relation(rel.repartition.input)
|
|
196
|
+
if rel.repartition.num_partitions > 0:
|
|
197
|
+
result.partition_hint = rel.repartition.num_partitions
|
|
194
198
|
case "repartition_by_expression":
|
|
195
199
|
# This is a no-op operation in SAS as Snowpark doesn't have the concept of partitions.
|
|
196
200
|
# All the data in the dataframe will be treated as a single partition, and this will not
|
|
197
201
|
# have any side effects.
|
|
198
202
|
result = map_relation(rel.repartition_by_expression.input)
|
|
203
|
+
# Only preserve partition hint if num_partitions is explicitly specified and > 0
|
|
204
|
+
# Column-based repartitioning without count should clear any existing partition hints
|
|
205
|
+
if rel.repartition_by_expression.num_partitions > 0:
|
|
206
|
+
result.partition_hint = rel.repartition_by_expression.num_partitions
|
|
207
|
+
else:
|
|
208
|
+
# Column-based repartitioning clears partition hint (resets to default behavior)
|
|
209
|
+
result.partition_hint = None
|
|
199
210
|
case "replace":
|
|
200
211
|
result = map_row_ops.map_replace(rel)
|
|
201
212
|
case "sample":
|
|
@@ -553,7 +553,14 @@ def map_filter(
|
|
|
553
553
|
rel.filter.condition, input_container.column_map, typer
|
|
554
554
|
)
|
|
555
555
|
|
|
556
|
-
|
|
556
|
+
if rel.filter.input.WhichOneof("rel_type") == "subquery_alias":
|
|
557
|
+
# map_subquery_alias does not actually wrap the DataFrame in an alias or subquery.
|
|
558
|
+
# Apparently, there are cases (e.g., TpcdsQ53) where this is required, without it, we get
|
|
559
|
+
# SQL compilation error.
|
|
560
|
+
# To mitigate it, we are doing .select("*"), .alias() introduces additional describe queries
|
|
561
|
+
result = input_df.select("*").filter(condition.col)
|
|
562
|
+
else:
|
|
563
|
+
result = input_df.filter(condition.col)
|
|
557
564
|
|
|
558
565
|
return DataFrameContainer(
|
|
559
566
|
result,
|
|
@@ -77,6 +77,7 @@ from ..expression.map_sql_expression import (
|
|
|
77
77
|
from ..utils.identifiers import spark_to_sf_single_id
|
|
78
78
|
|
|
79
79
|
_ctes = ContextVar[dict[str, relation_proto.Relation]]("_ctes", default={})
|
|
80
|
+
_cte_definitions = ContextVar[dict[str, any]]("_cte_definitions", default={})
|
|
80
81
|
_having_condition = ContextVar[expressions_proto.Expression | None](
|
|
81
82
|
"_having_condition", default=None
|
|
82
83
|
)
|
|
@@ -117,12 +118,15 @@ def _push_cte_scope():
|
|
|
117
118
|
"""
|
|
118
119
|
Creates a new CTE scope when evaluating nested WITH clauses.
|
|
119
120
|
"""
|
|
120
|
-
|
|
121
|
-
|
|
121
|
+
cur_ctes = _ctes.get()
|
|
122
|
+
cur_definitions = _cte_definitions.get()
|
|
123
|
+
cte_token = _ctes.set(cur_ctes.copy())
|
|
124
|
+
def_token = _cte_definitions.set(cur_definitions.copy())
|
|
122
125
|
try:
|
|
123
126
|
yield
|
|
124
127
|
finally:
|
|
125
|
-
_ctes.reset(
|
|
128
|
+
_ctes.reset(cte_token)
|
|
129
|
+
_cte_definitions.reset(def_token)
|
|
126
130
|
|
|
127
131
|
|
|
128
132
|
@contextmanager
|
|
@@ -405,15 +409,21 @@ def map_sql_to_pandas_df(
|
|
|
405
409
|
else:
|
|
406
410
|
# TODO: Unset the schema
|
|
407
411
|
pass
|
|
408
|
-
case "CreateTable":
|
|
412
|
+
case "CreateTable" | "ReplaceTable":
|
|
413
|
+
if class_name == "ReplaceTable":
|
|
414
|
+
replace_table = " OR REPLACE "
|
|
415
|
+
if_not_exists = ""
|
|
416
|
+
else:
|
|
417
|
+
replace_table = ""
|
|
418
|
+
if_not_exists = (
|
|
419
|
+
"IF NOT EXISTS " if logical_plan.ignoreIfExists() else ""
|
|
420
|
+
)
|
|
421
|
+
|
|
409
422
|
name = get_relation_identifier_name(logical_plan.name())
|
|
410
423
|
columns = ", ".join(
|
|
411
424
|
_spark_field_to_sql(f, True)
|
|
412
425
|
for f in logical_plan.tableSchema().fields()
|
|
413
426
|
)
|
|
414
|
-
if_not_exists = (
|
|
415
|
-
"IF NOT EXISTS " if logical_plan.ignoreIfExists() else ""
|
|
416
|
-
)
|
|
417
427
|
comment_opt = logical_plan.tableSpec().comment()
|
|
418
428
|
comment = (
|
|
419
429
|
f"COMMENT = '{_escape_sql_comment(str(comment_opt.get()))}'"
|
|
@@ -422,7 +432,7 @@ def map_sql_to_pandas_df(
|
|
|
422
432
|
)
|
|
423
433
|
# NOTE: We are intentionally ignoring any FORMAT=... parameters here.
|
|
424
434
|
session.sql(
|
|
425
|
-
f"CREATE TABLE {if_not_exists}{name} ({columns}) {comment}"
|
|
435
|
+
f"CREATE {replace_table} TABLE {if_not_exists}{name} ({columns}) {comment}"
|
|
426
436
|
).collect()
|
|
427
437
|
case "CreateTableAsSelect":
|
|
428
438
|
mode = "ignore" if logical_plan.ignoreIfExists() else "errorifexists"
|
|
@@ -1351,8 +1361,28 @@ def map_logical_plan_relation(
|
|
|
1351
1361
|
value_column_names = [e for e in as_java_list(rel.valueColumnNames())]
|
|
1352
1362
|
variable_column_name = rel.variableColumnName()
|
|
1353
1363
|
|
|
1364
|
+
# Check for multi-column UNPIVOT which Snowflake doesn't support
|
|
1365
|
+
if len(value_column_names) > 1:
|
|
1366
|
+
raise UnsupportedOperationException(
|
|
1367
|
+
f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
|
|
1368
|
+
f"multiple value columns ({', '.join(value_column_names)}) in a single operation. "
|
|
1369
|
+
f"Workaround: Use separate UNPIVOT operations for each value column and join the results, "
|
|
1370
|
+
f"or restructure your query to unpivot columns individually."
|
|
1371
|
+
)
|
|
1372
|
+
|
|
1354
1373
|
values = []
|
|
1355
|
-
|
|
1374
|
+
values_groups = as_java_list(rel.values().get())
|
|
1375
|
+
|
|
1376
|
+
# Check if we have multi-column groups in the IN clause
|
|
1377
|
+
if values_groups and len(as_java_list(values_groups[0])) > 1:
|
|
1378
|
+
group_sizes = [len(as_java_list(group)) for group in values_groups]
|
|
1379
|
+
raise UnsupportedOperationException(
|
|
1380
|
+
f"Multi-column UNPIVOT is not supported. Snowflake SQL does not support unpivoting "
|
|
1381
|
+
f"multiple columns together in groups. Found groups with {max(group_sizes)} columns. "
|
|
1382
|
+
f"Workaround: Unpivot each column separately and then join/union the results as needed."
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
for e1 in values_groups:
|
|
1356
1386
|
for e in as_java_list(e1):
|
|
1357
1387
|
values.append(map_logical_plan_expression(e))
|
|
1358
1388
|
|
|
@@ -1468,7 +1498,50 @@ def map_logical_plan_relation(
|
|
|
1468
1498
|
# The name corresponds to a `WITH` alias rather than a table.
|
|
1469
1499
|
# TODO: We currently evaluate the query each time its alias is used;
|
|
1470
1500
|
# we should eventually start using `WITH` in Snowflake SQL.
|
|
1471
|
-
|
|
1501
|
+
# Each CTE reference should get completely fresh evaluation to prevent ambiguity
|
|
1502
|
+
# when the same CTE is joined multiple times. Instead of reusing the same cte_proto,
|
|
1503
|
+
# re-evaluate the CTE definition to get fresh column identifiers.
|
|
1504
|
+
|
|
1505
|
+
# Re-evaluate the CTE definition to get fresh column identifiers
|
|
1506
|
+
cte_definition = _cte_definitions.get().get(name)
|
|
1507
|
+
if cte_definition is not None:
|
|
1508
|
+
# Get the original column names for consistency across CTE references
|
|
1509
|
+
original_container = map_relation(cte_proto)
|
|
1510
|
+
original_spark_columns = (
|
|
1511
|
+
original_container.column_map.get_spark_columns()
|
|
1512
|
+
)
|
|
1513
|
+
|
|
1514
|
+
# Re-evaluate the CTE definition with a fresh plan_id
|
|
1515
|
+
fresh_plan_id = gen_sql_plan_id()
|
|
1516
|
+
fresh_cte_proto = map_logical_plan_relation(
|
|
1517
|
+
cte_definition, fresh_plan_id
|
|
1518
|
+
)
|
|
1519
|
+
|
|
1520
|
+
# Use SubqueryColumnAliases to ensure consistent column names across CTE references
|
|
1521
|
+
# This is crucial for CTEs that reference other CTEs
|
|
1522
|
+
any_proto = Any()
|
|
1523
|
+
any_proto.Pack(
|
|
1524
|
+
snowflake_proto.Extension(
|
|
1525
|
+
subquery_column_aliases=snowflake_proto.SubqueryColumnAliases(
|
|
1526
|
+
input=fresh_cte_proto,
|
|
1527
|
+
aliases=original_spark_columns,
|
|
1528
|
+
)
|
|
1529
|
+
)
|
|
1530
|
+
)
|
|
1531
|
+
column_aliased_proto = relation_proto.Relation(extension=any_proto)
|
|
1532
|
+
column_aliased_proto.common.plan_id = gen_sql_plan_id()
|
|
1533
|
+
|
|
1534
|
+
# Wrap in SubqueryAlias with the CTE name
|
|
1535
|
+
proto = relation_proto.Relation(
|
|
1536
|
+
subquery_alias=relation_proto.SubqueryAlias(
|
|
1537
|
+
input=column_aliased_proto,
|
|
1538
|
+
alias=name,
|
|
1539
|
+
)
|
|
1540
|
+
)
|
|
1541
|
+
proto.common.plan_id = gen_sql_plan_id()
|
|
1542
|
+
else:
|
|
1543
|
+
# Fallback to stored CTE if definition not found
|
|
1544
|
+
proto = cte_proto
|
|
1472
1545
|
else:
|
|
1473
1546
|
tmp_views = _get_current_temp_objects()
|
|
1474
1547
|
current_schema = session.connection.schema
|
|
@@ -1600,7 +1673,11 @@ def map_logical_plan_relation(
|
|
|
1600
1673
|
with _push_cte_scope():
|
|
1601
1674
|
for cte in as_java_list(rel.cteRelations()):
|
|
1602
1675
|
name = str(cte._1())
|
|
1603
|
-
|
|
1676
|
+
# Store the original CTE definition for re-evaluation
|
|
1677
|
+
_cte_definitions.get()[name] = cte._2()
|
|
1678
|
+
# Process CTE definition with a unique plan_id to ensure proper column naming
|
|
1679
|
+
cte_plan_id = gen_sql_plan_id()
|
|
1680
|
+
cte_proto = map_logical_plan_relation(cte._2(), cte_plan_id)
|
|
1604
1681
|
_ctes.get()[name] = cte_proto
|
|
1605
1682
|
|
|
1606
1683
|
proto = map_logical_plan_relation(rel.child())
|
|
@@ -27,10 +27,14 @@ from snowflake.snowpark_connect.expression.map_expression import (
|
|
|
27
27
|
)
|
|
28
28
|
from snowflake.snowpark_connect.expression.typer import ExpressionTyper
|
|
29
29
|
from snowflake.snowpark_connect.type_mapping import (
|
|
30
|
-
|
|
30
|
+
map_type_string_to_proto,
|
|
31
31
|
proto_to_snowpark_type,
|
|
32
32
|
)
|
|
33
33
|
from snowflake.snowpark_connect.utils.context import push_udtf_context
|
|
34
|
+
from snowflake.snowpark_connect.utils.external_udxf_cache import (
|
|
35
|
+
cache_external_udtf,
|
|
36
|
+
get_external_udtf_from_cache,
|
|
37
|
+
)
|
|
34
38
|
from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
|
|
35
39
|
from snowflake.snowpark_connect.utils.udtf_helper import (
|
|
36
40
|
SnowparkUDTF,
|
|
@@ -44,6 +48,34 @@ from snowflake.snowpark_connect.utils.udxf_import_utils import (
|
|
|
44
48
|
)
|
|
45
49
|
|
|
46
50
|
|
|
51
|
+
def cache_external_udtf_wrapper(from_register_udtf: bool):
|
|
52
|
+
def outer_wrapper(wrapper_func):
|
|
53
|
+
def wrapper(
|
|
54
|
+
udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
|
|
55
|
+
spark_column_names,
|
|
56
|
+
) -> SnowparkUDTF | None:
|
|
57
|
+
udf_hash = hash(str(udtf_proto))
|
|
58
|
+
cached_udtf = get_external_udtf_from_cache(udf_hash)
|
|
59
|
+
|
|
60
|
+
if cached_udtf:
|
|
61
|
+
if from_register_udtf:
|
|
62
|
+
session = get_or_create_snowpark_session()
|
|
63
|
+
session._udtfs[udtf_proto.function_name.lower()] = (
|
|
64
|
+
cached_udtf,
|
|
65
|
+
spark_column_names,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return cached_udtf
|
|
69
|
+
|
|
70
|
+
snowpark_udf = wrapper_func(udtf_proto, spark_column_names)
|
|
71
|
+
cache_external_udtf(udf_hash, snowpark_udf)
|
|
72
|
+
return snowpark_udf
|
|
73
|
+
|
|
74
|
+
return wrapper
|
|
75
|
+
|
|
76
|
+
return outer_wrapper
|
|
77
|
+
|
|
78
|
+
|
|
47
79
|
def build_expected_types_from_parsed(
|
|
48
80
|
parsed_return: types_proto.DataType,
|
|
49
81
|
) -> List[Tuple[str, Any]]:
|
|
@@ -125,7 +157,9 @@ def process_return_type(
|
|
|
125
157
|
) -> tuple[list[tuple[str, Any]], DataType, StructType, list[str]]:
|
|
126
158
|
try:
|
|
127
159
|
if return_type.HasField("unparsed"):
|
|
128
|
-
parsed_return =
|
|
160
|
+
parsed_return = map_type_string_to_proto(
|
|
161
|
+
return_type.unparsed.data_type_string
|
|
162
|
+
)
|
|
129
163
|
else:
|
|
130
164
|
parsed_return = return_type
|
|
131
165
|
except ValueError as e:
|
|
@@ -163,26 +197,37 @@ def register_udtf(
|
|
|
163
197
|
) = process_return_type(python_udft.return_type)
|
|
164
198
|
function_name = udtf_proto.function_name
|
|
165
199
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
200
|
+
@cache_external_udtf_wrapper(from_register_udtf=True)
|
|
201
|
+
def _register_udtf(
|
|
202
|
+
udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
|
|
203
|
+
spark_column_names,
|
|
204
|
+
):
|
|
205
|
+
kwargs = {
|
|
206
|
+
"session": session,
|
|
207
|
+
"udtf_proto": udtf_proto,
|
|
208
|
+
"expected_types": expected_types,
|
|
209
|
+
"output_schema": output_schema,
|
|
210
|
+
"packages": global_config.get("snowpark.connect.udf.packages", ""),
|
|
211
|
+
"imports": get_python_udxf_import_files(session),
|
|
212
|
+
"called_from": "register_udtf",
|
|
213
|
+
"is_arrow_enabled": is_arrow_enabled_in_udtf(),
|
|
214
|
+
"is_spark_compatible_udtf_mode_enabled": is_spark_compatible_udtf_mode_enabled(),
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if require_creating_udtf_in_sproc(udtf_proto):
|
|
218
|
+
snowpark_udtf = create_udtf_in_sproc(**kwargs)
|
|
219
|
+
else:
|
|
220
|
+
udtf = create_udtf(**kwargs)
|
|
221
|
+
snowpark_udtf = SnowparkUDTF(
|
|
222
|
+
name=udtf.name,
|
|
223
|
+
input_types=udtf._input_types,
|
|
224
|
+
output_schema=output_schema,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return snowpark_udtf
|
|
185
228
|
|
|
229
|
+
snowpark_udtf = _register_udtf(udtf_proto, spark_column_names)
|
|
230
|
+
# We have to update cached _udtfs here, because function could have been cached in map_common_inline_user_defined_table_function
|
|
186
231
|
session._udtfs[function_name.lower()] = (snowpark_udtf, spark_column_names)
|
|
187
232
|
return snowpark_udtf
|
|
188
233
|
|
|
@@ -211,32 +256,41 @@ def map_common_inline_user_defined_table_function(
|
|
|
211
256
|
spark_column_names,
|
|
212
257
|
) = process_return_type(python_udft.return_type)
|
|
213
258
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
259
|
+
@cache_external_udtf_wrapper(from_register_udtf=False)
|
|
260
|
+
def _get_udtf(
|
|
261
|
+
udtf_proto: relation_proto.CommonInlineUserDefinedTableFunction,
|
|
262
|
+
spark_column_names,
|
|
263
|
+
):
|
|
264
|
+
kwargs = {
|
|
265
|
+
"session": session,
|
|
266
|
+
"udtf_proto": udtf_proto,
|
|
267
|
+
"expected_types": expected_types,
|
|
268
|
+
"output_schema": output_schema,
|
|
269
|
+
"packages": global_config.get("snowpark.connect.udf.packages", ""),
|
|
270
|
+
"imports": get_python_udxf_import_files(session),
|
|
271
|
+
"called_from": "map_common_inline_user_defined_table_function",
|
|
272
|
+
"is_arrow_enabled": is_arrow_enabled_in_udtf(),
|
|
273
|
+
"is_spark_compatible_udtf_mode_enabled": is_spark_compatible_udtf_mode_enabled(),
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if require_creating_udtf_in_sproc(udtf_proto):
|
|
277
|
+
snowpark_udtf_or_error = create_udtf_in_sproc(**kwargs)
|
|
278
|
+
if isinstance(snowpark_udtf_or_error, str):
|
|
279
|
+
raise PythonException(snowpark_udtf_or_error)
|
|
280
|
+
snowpark_udtf = snowpark_udtf_or_error
|
|
281
|
+
else:
|
|
282
|
+
udtf_or_error = create_udtf(**kwargs)
|
|
283
|
+
if isinstance(udtf_or_error, str):
|
|
284
|
+
raise PythonException(udtf_or_error)
|
|
285
|
+
udtf = udtf_or_error
|
|
286
|
+
snowpark_udtf = SnowparkUDTF(
|
|
287
|
+
name=udtf.name,
|
|
288
|
+
input_types=udtf._input_types,
|
|
289
|
+
output_schema=output_schema,
|
|
290
|
+
)
|
|
291
|
+
return snowpark_udtf
|
|
239
292
|
|
|
293
|
+
snowpark_udtf = _get_udtf(rel, spark_column_names)
|
|
240
294
|
column_map = ColumnNameMap([], [])
|
|
241
295
|
snowpark_udtf_args = []
|
|
242
296
|
|
|
@@ -65,7 +65,7 @@ def map_read(
|
|
|
65
65
|
read_format = "parquet"
|
|
66
66
|
|
|
67
67
|
if read_format.lower() == "iceberg":
|
|
68
|
-
telemetry.report_io_read("iceberg"
|
|
68
|
+
telemetry.report_io_read("iceberg")
|
|
69
69
|
return map_read_table(rel)
|
|
70
70
|
|
|
71
71
|
if rel.read.data_source.schema == "":
|
|
@@ -90,7 +90,7 @@ def map_read(
|
|
|
90
90
|
)
|
|
91
91
|
schema = map_json_schema_to_snowpark(parsed_schema)
|
|
92
92
|
options = dict(rel.read.data_source.options)
|
|
93
|
-
telemetry.report_io_read(read_format
|
|
93
|
+
telemetry.report_io_read(read_format)
|
|
94
94
|
session: snowpark.Session = get_or_create_snowpark_session()
|
|
95
95
|
if len(rel.read.data_source.paths) > 0:
|
|
96
96
|
# Normalize paths to ensure consistent behavior
|
|
@@ -175,7 +175,7 @@ def map_read_table_or_file(rel):
|
|
|
175
175
|
rel.read.named_table.unparsed_identifier
|
|
176
176
|
)
|
|
177
177
|
options = {}
|
|
178
|
-
telemetry.report_io_read(read_format
|
|
178
|
+
telemetry.report_io_read(read_format)
|
|
179
179
|
session: snowpark.Session = get_or_create_snowpark_session()
|
|
180
180
|
|
|
181
181
|
clean_source_paths = [
|
|
@@ -94,7 +94,7 @@ def map_read_jdbc(
|
|
|
94
94
|
num_partitions=num_partitions,
|
|
95
95
|
predicates=predicates,
|
|
96
96
|
)
|
|
97
|
-
true_names = list(map(lambda x: unquote_if_quoted(x)
|
|
97
|
+
true_names = list(map(lambda x: unquote_if_quoted(x), df.columns))
|
|
98
98
|
renamed_df, snowpark_cols = rename_columns_as_snowflake_standard(
|
|
99
99
|
df, rel.common.plan_id
|
|
100
100
|
)
|
|
@@ -34,6 +34,7 @@ from snowflake.snowpark_connect.type_mapping import (
|
|
|
34
34
|
cast_to_match_snowpark_type,
|
|
35
35
|
map_simple_types,
|
|
36
36
|
)
|
|
37
|
+
from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
|
|
37
38
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
38
39
|
SnowparkConnectNotImplementedError,
|
|
39
40
|
)
|
|
@@ -66,6 +67,7 @@ def map_read_json(
|
|
|
66
67
|
|
|
67
68
|
rows_to_infer_schema = snowpark_options.pop("rowstoinferschema", 1000)
|
|
68
69
|
dropFieldIfAllNull = snowpark_options.pop("dropfieldifallnull", False)
|
|
70
|
+
batch_size = snowpark_options.pop("batchsize", 1000)
|
|
69
71
|
|
|
70
72
|
reader = session.read.options(snowpark_options)
|
|
71
73
|
|
|
@@ -99,7 +101,7 @@ def map_read_json(
|
|
|
99
101
|
]
|
|
100
102
|
|
|
101
103
|
df = construct_dataframe_by_schema(
|
|
102
|
-
schema, df.to_local_iterator(), session, snowpark_options
|
|
104
|
+
schema, df.to_local_iterator(), session, snowpark_options, batch_size
|
|
103
105
|
)
|
|
104
106
|
|
|
105
107
|
spark_column_names = get_spark_column_names_from_snowpark_columns(df.columns)
|
|
@@ -277,9 +279,11 @@ def construct_dataframe_by_schema(
|
|
|
277
279
|
result = None
|
|
278
280
|
|
|
279
281
|
current_data = []
|
|
282
|
+
progress = 0
|
|
280
283
|
for row in rows:
|
|
281
284
|
current_data.append(construct_row_by_schema(row, schema, snowpark_options))
|
|
282
285
|
if len(current_data) >= batch_size:
|
|
286
|
+
progress += len(current_data)
|
|
283
287
|
result = union_data_into_df(
|
|
284
288
|
result,
|
|
285
289
|
current_data,
|
|
@@ -287,9 +291,11 @@ def construct_dataframe_by_schema(
|
|
|
287
291
|
session,
|
|
288
292
|
)
|
|
289
293
|
|
|
294
|
+
logger.info(f"JSON reader: finished processing {progress} rows")
|
|
290
295
|
current_data = []
|
|
291
296
|
|
|
292
297
|
if len(current_data) > 0:
|
|
298
|
+
progress += len(current_data)
|
|
293
299
|
result = union_data_into_df(
|
|
294
300
|
result,
|
|
295
301
|
current_data,
|
|
@@ -297,6 +303,7 @@ def construct_dataframe_by_schema(
|
|
|
297
303
|
session,
|
|
298
304
|
)
|
|
299
305
|
|
|
306
|
+
logger.info(f"JSON reader: finished processing {progress} rows")
|
|
300
307
|
current_data = []
|
|
301
308
|
|
|
302
309
|
if result is None:
|
|
@@ -11,7 +11,6 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
11
11
|
unquote_if_quoted,
|
|
12
12
|
)
|
|
13
13
|
from snowflake.snowpark.exceptions import SnowparkSQLException
|
|
14
|
-
from snowflake.snowpark_connect.column_name_handler import ALREADY_QUOTED
|
|
15
14
|
from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
|
|
16
15
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
17
16
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
@@ -29,15 +28,8 @@ from snowflake.snowpark_connect.utils.telemetry import (
|
|
|
29
28
|
def post_process_df(
|
|
30
29
|
df: snowpark.DataFrame, plan_id: int, source_table_name: str = None
|
|
31
30
|
) -> DataFrameContainer:
|
|
32
|
-
def _lower_or_unquote(string):
|
|
33
|
-
return (
|
|
34
|
-
string[1:-1].replace('""', '"')
|
|
35
|
-
if ALREADY_QUOTED.match(string)
|
|
36
|
-
else string.lower()
|
|
37
|
-
)
|
|
38
|
-
|
|
39
31
|
try:
|
|
40
|
-
true_names = list(map(lambda x:
|
|
32
|
+
true_names = list(map(lambda x: unquote_if_quoted(x), df.columns))
|
|
41
33
|
renamed_df, snowpark_column_names = rename_columns_as_snowflake_standard(
|
|
42
34
|
df, plan_id
|
|
43
35
|
)
|
|
@@ -317,6 +317,7 @@ class JsonReaderConfig(ReaderWriterConfig):
|
|
|
317
317
|
# TODO: modifiedAfter: Union[bool, str, None] = None,
|
|
318
318
|
# TODO: allowNonNumericNumbers: Union[bool, str, None] = None,
|
|
319
319
|
"rowsToInferSchema": 1000,
|
|
320
|
+
"batchSize": 1000,
|
|
320
321
|
},
|
|
321
322
|
supported_options={
|
|
322
323
|
"schema",
|
|
@@ -347,12 +348,13 @@ class JsonReaderConfig(ReaderWriterConfig):
|
|
|
347
348
|
# "ignoreNullFields",
|
|
348
349
|
"rowsToInferSchema",
|
|
349
350
|
# "inferTimestamp",
|
|
351
|
+
"batchSize",
|
|
350
352
|
},
|
|
351
353
|
boolean_config_list=[
|
|
352
354
|
"multiLine",
|
|
353
355
|
"dropFieldIfAllNull",
|
|
354
356
|
],
|
|
355
|
-
int_config_list=["rowsToInferSchema"],
|
|
357
|
+
int_config_list=["rowsToInferSchema", "batchSize"],
|
|
356
358
|
float_config_list=["samplingRatio"],
|
|
357
359
|
),
|
|
358
360
|
options,
|
|
@@ -6,6 +6,7 @@ import random
|
|
|
6
6
|
import re
|
|
7
7
|
import string
|
|
8
8
|
import time
|
|
9
|
+
import uuid
|
|
9
10
|
from typing import Sequence
|
|
10
11
|
|
|
11
12
|
import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
@@ -153,6 +154,49 @@ def random_string(
|
|
|
153
154
|
return "".join([prefix, random_part, suffix])
|
|
154
155
|
|
|
155
156
|
|
|
157
|
+
def generate_spark_compatible_filename(
|
|
158
|
+
task_id: int = 0,
|
|
159
|
+
attempt_number: int = 0,
|
|
160
|
+
compression: str = None,
|
|
161
|
+
format_ext: str = "parquet",
|
|
162
|
+
) -> str:
|
|
163
|
+
"""Generate a Spark-compatible filename following the convention:
|
|
164
|
+
part-<task-id>-<uuid>-c<attempt-number>.<compression>.<format>
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
task_id: Task ID (usually 0 for single partition)
|
|
168
|
+
attempt_number: Attempt number (usually 0)
|
|
169
|
+
compression: Compression type (e.g., 'snappy', 'gzip', 'none')
|
|
170
|
+
format_ext: File format extension (e.g., 'parquet', 'csv', 'json')
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
A filename string following Spark's naming convention
|
|
174
|
+
"""
|
|
175
|
+
# Generate a UUID for uniqueness
|
|
176
|
+
file_uuid = str(uuid.uuid4())
|
|
177
|
+
|
|
178
|
+
# Format task ID with leading zeros (5 digits)
|
|
179
|
+
formatted_task_id = f"{task_id:05d}"
|
|
180
|
+
|
|
181
|
+
# Format attempt number with leading zeros (3 digits)
|
|
182
|
+
formatted_attempt = f"{attempt_number:03d}"
|
|
183
|
+
|
|
184
|
+
# Build the base filename
|
|
185
|
+
base_name = f"part-{formatted_task_id}-{file_uuid}-c{formatted_attempt}"
|
|
186
|
+
|
|
187
|
+
# Add compression if specified and not 'none'
|
|
188
|
+
if compression and compression.lower() not in ("none", "uncompressed"):
|
|
189
|
+
compression_part = f".{compression.lower()}"
|
|
190
|
+
else:
|
|
191
|
+
compression_part = ""
|
|
192
|
+
|
|
193
|
+
# Add format extension if specified
|
|
194
|
+
if format_ext:
|
|
195
|
+
return f"{base_name}{compression_part}.{format_ext}"
|
|
196
|
+
else:
|
|
197
|
+
return f"{base_name}{compression_part}"
|
|
198
|
+
|
|
199
|
+
|
|
156
200
|
def _normalize_query_for_semantic_hash(query_str: str) -> str:
|
|
157
201
|
"""
|
|
158
202
|
Normalize a query string for semantic comparison by extracting original names from
|