snowpark-connect 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (474) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/literal.py +13 -2
  7. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  11. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  12. snowflake/snowpark_connect/expression/map_unresolved_function.py +825 -353
  13. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  14. snowflake/snowpark_connect/hidden_column.py +39 -0
  15. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  16. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  17. snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
  18. snowflake/snowpark_connect/relation/map_extension.py +52 -11
  19. snowflake/snowpark_connect/relation/map_join.py +258 -62
  20. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  21. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  22. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  23. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  24. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  25. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  26. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  27. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  28. snowflake/snowpark_connect/resources_initializer.py +29 -1
  29. snowflake/snowpark_connect/server.py +18 -3
  30. snowflake/snowpark_connect/type_mapping.py +29 -25
  31. snowflake/snowpark_connect/typed_column.py +14 -0
  32. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  33. snowflake/snowpark_connect/utils/context.py +6 -1
  34. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  35. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  36. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  37. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  38. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  39. snowflake/snowpark_connect/version.py +1 -1
  40. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  41. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  42. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  46. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  47. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  48. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  49. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  50. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  51. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  52. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  53. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  93. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  94. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  95. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  96. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  97. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  98. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  100. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
  101. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
  102. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
  103. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
  104. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  105. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  360. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  361. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  362. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  439. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  440. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  444. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  445. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  466. snowpark_connect-0.24.0.dist-info/RECORD +0 -898
  467. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  468. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  469. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  470. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  471. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  472. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  473. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  474. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,653 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
- from distutils.version import LooseVersion
18
- import unittest
19
-
20
- import numpy as np
21
- import pandas as pd
22
-
23
- from pyspark import pandas as ps
24
- from pyspark.testing.pandasutils import ComparisonTestBase
25
- from pyspark.testing.sqlutils import SQLTestUtils
26
-
27
-
28
- # This file contains test cases for 'Combining / joining / merging'
29
- # https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/frame.html#combining-joining-merging
30
- class FrameCombineMixin:
31
- @property
32
- def pdf(self):
33
- return pd.DataFrame(
34
- {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
35
- index=np.random.rand(9),
36
- )
37
-
38
- @property
39
- def df_pair(self):
40
- pdf = self.pdf
41
- psdf = ps.from_pandas(pdf)
42
- return pdf, psdf
43
-
44
- @unittest.skipIf(
45
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
46
- "TODO(SPARK-43562): Enable DataFrameTests.test_append for pandas 2.0.0.",
47
- )
48
- def test_append(self):
49
- pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"))
50
- psdf = ps.from_pandas(pdf)
51
- other_pdf = pd.DataFrame([[3, 4], [5, 6]], columns=list("BC"), index=[2, 3])
52
- other_psdf = ps.from_pandas(other_pdf)
53
-
54
- self.assert_eq(psdf.append(psdf), pdf.append(pdf))
55
- self.assert_eq(psdf.append(psdf, ignore_index=True), pdf.append(pdf, ignore_index=True))
56
-
57
- # Assert DataFrames with non-matching columns
58
- self.assert_eq(psdf.append(other_psdf), pdf.append(other_pdf))
59
-
60
- # Assert appending a Series fails
61
- msg = "DataFrames.append() does not support appending Series to DataFrames"
62
- with self.assertRaises(TypeError, msg=msg):
63
- psdf.append(psdf["A"])
64
-
65
- # Assert using the sort parameter raises an exception
66
- msg = "The 'sort' parameter is currently not supported"
67
- with self.assertRaises(NotImplementedError, msg=msg):
68
- psdf.append(psdf, sort=True)
69
-
70
- # Assert using 'verify_integrity' only raises an exception for overlapping indices
71
- self.assert_eq(
72
- psdf.append(other_psdf, verify_integrity=True),
73
- pdf.append(other_pdf, verify_integrity=True),
74
- )
75
- msg = "Indices have overlapping values"
76
- with self.assertRaises(ValueError, msg=msg):
77
- psdf.append(psdf, verify_integrity=True)
78
-
79
- # Skip integrity verification when ignore_index=True
80
- self.assert_eq(
81
- psdf.append(psdf, ignore_index=True, verify_integrity=True),
82
- pdf.append(pdf, ignore_index=True, verify_integrity=True),
83
- )
84
-
85
- # Assert appending multi-index DataFrames
86
- multi_index_pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[[2, 3], [4, 5]])
87
- multi_index_psdf = ps.from_pandas(multi_index_pdf)
88
- other_multi_index_pdf = pd.DataFrame(
89
- [[5, 6], [7, 8]], columns=list("AB"), index=[[2, 3], [6, 7]]
90
- )
91
- other_multi_index_psdf = ps.from_pandas(other_multi_index_pdf)
92
-
93
- self.assert_eq(
94
- multi_index_psdf.append(multi_index_psdf), multi_index_pdf.append(multi_index_pdf)
95
- )
96
-
97
- # Assert DataFrames with non-matching columns
98
- self.assert_eq(
99
- multi_index_psdf.append(other_multi_index_psdf),
100
- multi_index_pdf.append(other_multi_index_pdf),
101
- )
102
-
103
- # Assert using 'verify_integrity' only raises an exception for overlapping indices
104
- self.assert_eq(
105
- multi_index_psdf.append(other_multi_index_psdf, verify_integrity=True),
106
- multi_index_pdf.append(other_multi_index_pdf, verify_integrity=True),
107
- )
108
- with self.assertRaises(ValueError, msg=msg):
109
- multi_index_psdf.append(multi_index_psdf, verify_integrity=True)
110
-
111
- # Skip integrity verification when ignore_index=True
112
- self.assert_eq(
113
- multi_index_psdf.append(multi_index_psdf, ignore_index=True, verify_integrity=True),
114
- multi_index_pdf.append(multi_index_pdf, ignore_index=True, verify_integrity=True),
115
- )
116
-
117
- # Assert trying to append DataFrames with different index levels
118
- msg = "Both DataFrames have to have the same number of index levels"
119
- with self.assertRaises(ValueError, msg=msg):
120
- psdf.append(multi_index_psdf)
121
-
122
- # Skip index level check when ignore_index=True
123
- self.assert_eq(
124
- psdf.append(multi_index_psdf, ignore_index=True),
125
- pdf.append(multi_index_pdf, ignore_index=True),
126
- )
127
-
128
- columns = pd.MultiIndex.from_tuples([("A", "X"), ("A", "Y")])
129
- pdf.columns = columns
130
- psdf.columns = columns
131
-
132
- self.assert_eq(psdf.append(psdf), pdf.append(pdf))
133
-
134
- def test_merge(self):
135
- left_pdf = pd.DataFrame(
136
- {
137
- "lkey": ["foo", "bar", "baz", "foo", "bar", "l"],
138
- "value": [1, 2, 3, 5, 6, 7],
139
- "x": list("abcdef"),
140
- },
141
- columns=["lkey", "value", "x"],
142
- )
143
- right_pdf = pd.DataFrame(
144
- {
145
- "rkey": ["baz", "foo", "bar", "baz", "foo", "r"],
146
- "value": [4, 5, 6, 7, 8, 9],
147
- "y": list("efghij"),
148
- },
149
- columns=["rkey", "value", "y"],
150
- )
151
- right_pser = pd.Series(list("defghi"), name="x", index=[5, 6, 7, 8, 9, 10])
152
-
153
- left_psdf = ps.from_pandas(left_pdf)
154
- right_psdf = ps.from_pandas(right_pdf)
155
- right_psser = ps.from_pandas(right_pser)
156
-
157
- def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
158
- ps_res = op(left_psdf, right_psdf)
159
- ps_res = ps_res._to_pandas()
160
- ps_res = ps_res.sort_values(by=list(ps_res.columns))
161
- ps_res = ps_res.reset_index(drop=True)
162
- p_res = op(left_pdf, right_pdf)
163
- p_res = p_res.sort_values(by=list(p_res.columns))
164
- p_res = p_res.reset_index(drop=True)
165
- self.assert_eq(ps_res, p_res)
166
-
167
- check(lambda left, right: left.merge(right))
168
- check(lambda left, right: left.merge(right, on="value"))
169
- check(lambda left, right: left.merge(right, on=("value",)))
170
- check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey"))
171
- check(lambda left, right: left.set_index("lkey").merge(right.set_index("rkey")))
172
- check(
173
- lambda left, right: left.set_index("lkey").merge(
174
- right, left_index=True, right_on="rkey"
175
- )
176
- )
177
- check(
178
- lambda left, right: left.merge(
179
- right.set_index("rkey"), left_on="lkey", right_index=True
180
- )
181
- )
182
- check(
183
- lambda left, right: left.set_index("lkey").merge(
184
- right.set_index("rkey"), left_index=True, right_index=True
185
- )
186
- )
187
-
188
- # MultiIndex
189
- check(
190
- lambda left, right: left.merge(
191
- right, left_on=["lkey", "value"], right_on=["rkey", "value"]
192
- )
193
- )
194
- check(
195
- lambda left, right: left.set_index(["lkey", "value"]).merge(
196
- right, left_index=True, right_on=["rkey", "value"]
197
- )
198
- )
199
- check(
200
- lambda left, right: left.merge(
201
- right.set_index(["rkey", "value"]), left_on=["lkey", "value"], right_index=True
202
- )
203
- )
204
- # TODO: when both left_index=True and right_index=True with multi-index
205
- # check(lambda left, right: left.set_index(['lkey', 'value']).merge(
206
- # right.set_index(['rkey', 'value']), left_index=True, right_index=True))
207
-
208
- # join types
209
- for how in ["inner", "left", "right", "outer"]:
210
- check(lambda left, right: left.merge(right, on="value", how=how))
211
- check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey", how=how))
212
-
213
- # suffix
214
- check(
215
- lambda left, right: left.merge(
216
- right, left_on="lkey", right_on="rkey", suffixes=["_left", "_right"]
217
- )
218
- )
219
-
220
- # Test Series on the right
221
- check(lambda left, right: left.merge(right), right_psser, right_pser)
222
- check(
223
- lambda left, right: left.merge(right, left_on="x", right_on="x"),
224
- right_psser,
225
- right_pser,
226
- )
227
- check(
228
- lambda left, right: left.set_index("x").merge(right, left_index=True, right_on="x"),
229
- right_psser,
230
- right_pser,
231
- )
232
-
233
- # Test join types with Series
234
- for how in ["inner", "left", "right", "outer"]:
235
- check(lambda left, right: left.merge(right, how=how), right_psser, right_pser)
236
- check(
237
- lambda left, right: left.merge(right, left_on="x", right_on="x", how=how),
238
- right_psser,
239
- right_pser,
240
- )
241
-
242
- # suffix with Series
243
- check(
244
- lambda left, right: left.merge(
245
- right,
246
- suffixes=["_left", "_right"],
247
- how="outer",
248
- left_index=True,
249
- right_index=True,
250
- ),
251
- right_psser,
252
- right_pser,
253
- )
254
-
255
- # multi-index columns
256
- left_columns = pd.MultiIndex.from_tuples([(10, "lkey"), (10, "value"), (20, "x")])
257
- left_pdf.columns = left_columns
258
- left_psdf.columns = left_columns
259
-
260
- right_columns = pd.MultiIndex.from_tuples([(10, "rkey"), (10, "value"), (30, "y")])
261
- right_pdf.columns = right_columns
262
- right_psdf.columns = right_columns
263
-
264
- check(lambda left, right: left.merge(right))
265
- check(lambda left, right: left.merge(right, on=[(10, "value")]))
266
- check(
267
- lambda left, right: (left.set_index((10, "lkey")).merge(right.set_index((10, "rkey"))))
268
- )
269
- check(
270
- lambda left, right: (
271
- left.set_index((10, "lkey")).merge(
272
- right.set_index((10, "rkey")), left_index=True, right_index=True
273
- )
274
- )
275
- )
276
- # TODO: when both left_index=True and right_index=True with multi-index columns
277
- # check(lambda left, right: left.merge(right,
278
- # left_on=[('a', 'lkey')], right_on=[('a', 'rkey')]))
279
- # check(lambda left, right: (left.set_index(('a', 'lkey'))
280
- # .merge(right, left_index=True, right_on=[('a', 'rkey')])))
281
-
282
- # non-string names
283
- left_pdf.columns = [10, 100, 1000]
284
- left_psdf.columns = [10, 100, 1000]
285
-
286
- right_pdf.columns = [20, 100, 2000]
287
- right_psdf.columns = [20, 100, 2000]
288
-
289
- check(lambda left, right: left.merge(right))
290
- check(lambda left, right: left.merge(right, on=[100]))
291
- check(lambda left, right: (left.set_index(10).merge(right.set_index(20))))
292
- check(
293
- lambda left, right: (
294
- left.set_index(10).merge(right.set_index(20), left_index=True, right_index=True)
295
- )
296
- )
297
-
298
- def test_merge_same_anchor(self):
299
- pdf = pd.DataFrame(
300
- {
301
- "lkey": ["foo", "bar", "baz", "foo", "bar", "l"],
302
- "rkey": ["baz", "foo", "bar", "baz", "foo", "r"],
303
- "value": [1, 1, 3, 5, 6, 7],
304
- "x": list("abcdef"),
305
- "y": list("efghij"),
306
- },
307
- columns=["lkey", "rkey", "value", "x", "y"],
308
- )
309
- psdf = ps.from_pandas(pdf)
310
-
311
- left_pdf = pdf[["lkey", "value", "x"]]
312
- right_pdf = pdf[["rkey", "value", "y"]]
313
- left_psdf = psdf[["lkey", "value", "x"]]
314
- right_psdf = psdf[["rkey", "value", "y"]]
315
-
316
- def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
317
- k_res = op(left_psdf, right_psdf)
318
- k_res = k_res._to_pandas()
319
- k_res = k_res.sort_values(by=list(k_res.columns))
320
- k_res = k_res.reset_index(drop=True)
321
- p_res = op(left_pdf, right_pdf)
322
- p_res = p_res.sort_values(by=list(p_res.columns))
323
- p_res = p_res.reset_index(drop=True)
324
- self.assert_eq(k_res, p_res)
325
-
326
- check(lambda left, right: left.merge(right))
327
- check(lambda left, right: left.merge(right, on="value"))
328
- check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey"))
329
- check(lambda left, right: left.set_index("lkey").merge(right.set_index("rkey")))
330
- check(
331
- lambda left, right: left.set_index("lkey").merge(
332
- right, left_index=True, right_on="rkey"
333
- )
334
- )
335
- check(
336
- lambda left, right: left.merge(
337
- right.set_index("rkey"), left_on="lkey", right_index=True
338
- )
339
- )
340
- check(
341
- lambda left, right: left.set_index("lkey").merge(
342
- right.set_index("rkey"), left_index=True, right_index=True
343
- )
344
- )
345
-
346
- def test_merge_retains_indices(self):
347
- left_pdf = pd.DataFrame({"A": [0, 1]})
348
- right_pdf = pd.DataFrame({"B": [1, 2]}, index=[1, 2])
349
- left_psdf = ps.from_pandas(left_pdf)
350
- right_psdf = ps.from_pandas(right_pdf)
351
-
352
- self.assert_eq(
353
- left_psdf.merge(right_psdf, left_index=True, right_index=True),
354
- left_pdf.merge(right_pdf, left_index=True, right_index=True),
355
- )
356
- self.assert_eq(
357
- left_psdf.merge(right_psdf, left_on="A", right_index=True),
358
- left_pdf.merge(right_pdf, left_on="A", right_index=True),
359
- )
360
- self.assert_eq(
361
- left_psdf.merge(right_psdf, left_index=True, right_on="B"),
362
- left_pdf.merge(right_pdf, left_index=True, right_on="B"),
363
- )
364
- self.assert_eq(
365
- left_psdf.merge(right_psdf, left_on="A", right_on="B"),
366
- left_pdf.merge(right_pdf, left_on="A", right_on="B"),
367
- )
368
-
369
- def test_merge_how_parameter(self):
370
- left_pdf = pd.DataFrame({"A": [1, 2]})
371
- right_pdf = pd.DataFrame({"B": ["x", "y"]}, index=[1, 2])
372
- left_psdf = ps.from_pandas(left_pdf)
373
- right_psdf = ps.from_pandas(right_pdf)
374
-
375
- psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True)
376
- pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True)
377
- self.assert_eq(
378
- psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
379
- pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
380
- )
381
-
382
- psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="left")
383
- pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="left")
384
- self.assert_eq(
385
- psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
386
- pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
387
- )
388
-
389
- psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="right")
390
- pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="right")
391
- self.assert_eq(
392
- psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
393
- pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
394
- )
395
-
396
- psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="outer")
397
- pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="outer")
398
- self.assert_eq(
399
- psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
400
- pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
401
- )
402
-
403
- def test_merge_raises(self):
404
- left = ps.DataFrame(
405
- {"value": [1, 2, 3, 5, 6], "x": list("abcde")},
406
- columns=["value", "x"],
407
- index=["foo", "bar", "baz", "foo", "bar"],
408
- )
409
- right = ps.DataFrame(
410
- {"value": [4, 5, 6, 7, 8], "y": list("fghij")},
411
- columns=["value", "y"],
412
- index=["baz", "foo", "bar", "baz", "foo"],
413
- )
414
-
415
- with self.assertRaisesRegex(ValueError, "No common columns to perform merge on"):
416
- left[["x"]].merge(right[["y"]])
417
-
418
- with self.assertRaisesRegex(ValueError, "not a combination of both"):
419
- left.merge(right, on="value", left_on="x")
420
-
421
- with self.assertRaisesRegex(ValueError, "Must pass right_on or right_index=True"):
422
- left.merge(right, left_on="x")
423
-
424
- with self.assertRaisesRegex(ValueError, "Must pass right_on or right_index=True"):
425
- left.merge(right, left_index=True)
426
-
427
- with self.assertRaisesRegex(ValueError, "Must pass left_on or left_index=True"):
428
- left.merge(right, right_on="y")
429
-
430
- with self.assertRaisesRegex(ValueError, "Must pass left_on or left_index=True"):
431
- left.merge(right, right_index=True)
432
-
433
- with self.assertRaisesRegex(
434
- ValueError, "len\\(left_keys\\) must equal len\\(right_keys\\)"
435
- ):
436
- left.merge(right, left_on="value", right_on=["value", "y"])
437
-
438
- with self.assertRaisesRegex(
439
- ValueError, "len\\(left_keys\\) must equal len\\(right_keys\\)"
440
- ):
441
- left.merge(right, left_on=["value", "x"], right_on="value")
442
-
443
- with self.assertRaisesRegex(ValueError, "['inner', 'left', 'right', 'full', 'outer']"):
444
- left.merge(right, left_index=True, right_index=True, how="foo")
445
-
446
- with self.assertRaisesRegex(KeyError, "id"):
447
- left.merge(right, on="id")
448
-
449
- def test_join(self):
450
- # check basic function
451
- pdf1 = pd.DataFrame(
452
- {"key": ["K0", "K1", "K2", "K3"], "A": ["A0", "A1", "A2", "A3"]}, columns=["key", "A"]
453
- )
454
- pdf2 = pd.DataFrame(
455
- {"key": ["K0", "K1", "K2"], "B": ["B0", "B1", "B2"]}, columns=["key", "B"]
456
- )
457
- psdf1 = ps.from_pandas(pdf1)
458
- psdf2 = ps.from_pandas(pdf2)
459
-
460
- join_pdf = pdf1.join(pdf2, lsuffix="_left", rsuffix="_right")
461
- join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)
462
-
463
- join_psdf = psdf1.join(psdf2, lsuffix="_left", rsuffix="_right")
464
- join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
465
-
466
- self.assert_eq(join_pdf, join_psdf)
467
-
468
- # join with duplicated columns in Series
469
- with self.assertRaisesRegex(ValueError, "columns overlap but no suffix specified"):
470
- ks1 = ps.Series(["A1", "A5"], index=[1, 2], name="A")
471
- psdf1.join(ks1, how="outer")
472
- # join with duplicated columns in DataFrame
473
- with self.assertRaisesRegex(ValueError, "columns overlap but no suffix specified"):
474
- psdf1.join(psdf2, how="outer")
475
-
476
- # check `on` parameter
477
- join_pdf = pdf1.join(pdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right")
478
- join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)
479
-
480
- join_psdf = psdf1.join(psdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right")
481
- join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
482
- self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))
483
-
484
- join_pdf = pdf1.set_index("key").join(
485
- pdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right"
486
- )
487
- join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)
488
-
489
- join_psdf = psdf1.set_index("key").join(
490
- psdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right"
491
- )
492
- join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
493
- self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))
494
-
495
- # multi-index columns
496
- columns1 = pd.MultiIndex.from_tuples([("x", "key"), ("Y", "A")])
497
- columns2 = pd.MultiIndex.from_tuples([("x", "key"), ("Y", "B")])
498
- pdf1.columns = columns1
499
- pdf2.columns = columns2
500
- psdf1.columns = columns1
501
- psdf2.columns = columns2
502
-
503
- join_pdf = pdf1.join(pdf2, lsuffix="_left", rsuffix="_right")
504
- join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)
505
-
506
- join_psdf = psdf1.join(psdf2, lsuffix="_left", rsuffix="_right")
507
- join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
508
-
509
- self.assert_eq(join_pdf, join_psdf)
510
-
511
- # check `on` parameter
512
- join_pdf = pdf1.join(
513
- pdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
514
- )
515
- join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)
516
-
517
- join_psdf = psdf1.join(
518
- psdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
519
- )
520
- join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
521
-
522
- self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))
523
-
524
- join_pdf = pdf1.set_index(("x", "key")).join(
525
- pdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
526
- )
527
- join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)
528
-
529
- join_psdf = psdf1.set_index(("x", "key")).join(
530
- psdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
531
- )
532
- join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
533
-
534
- self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))
535
-
536
- # multi-index
537
- midx1 = pd.MultiIndex.from_tuples(
538
- [("w", "a"), ("x", "b"), ("y", "c"), ("z", "d")], names=["index1", "index2"]
539
- )
540
- midx2 = pd.MultiIndex.from_tuples(
541
- [("w", "a"), ("x", "b"), ("y", "c")], names=["index1", "index2"]
542
- )
543
- pdf1.index = midx1
544
- pdf2.index = midx2
545
- psdf1 = ps.from_pandas(pdf1)
546
- psdf2 = ps.from_pandas(pdf2)
547
-
548
- join_pdf = pdf1.join(pdf2, on=["index1", "index2"], rsuffix="_right")
549
- join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)
550
-
551
- join_psdf = psdf1.join(psdf2, on=["index1", "index2"], rsuffix="_right")
552
- join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
553
-
554
- self.assert_eq(join_pdf, join_psdf)
555
-
556
- with self.assertRaisesRegex(
557
- ValueError, r'len\(left_on\) must equal the number of levels in the index of "right"'
558
- ):
559
- psdf1.join(psdf2, on=["index1"], rsuffix="_right")
560
-
561
- def test_update(self):
562
- # check base function
563
- def get_data(left_columns=None, right_columns=None):
564
- left_pdf = pd.DataFrame(
565
- {"A": ["1", "2", "3", "4"], "B": ["100", "200", np.nan, np.nan]}, columns=["A", "B"]
566
- )
567
- right_pdf = pd.DataFrame(
568
- {"B": ["x", np.nan, "y", np.nan], "C": ["100", "200", "300", "400"]},
569
- columns=["B", "C"],
570
- )
571
-
572
- left_psdf = ps.DataFrame(
573
- {"A": ["1", "2", "3", "4"], "B": ["100", "200", None, None]}, columns=["A", "B"]
574
- )
575
- right_psdf = ps.DataFrame(
576
- {"B": ["x", None, "y", None], "C": ["100", "200", "300", "400"]}, columns=["B", "C"]
577
- )
578
- if left_columns is not None:
579
- left_pdf.columns = left_columns
580
- left_psdf.columns = left_columns
581
- if right_columns is not None:
582
- right_pdf.columns = right_columns
583
- right_psdf.columns = right_columns
584
- return left_psdf, left_pdf, right_psdf, right_pdf
585
-
586
- left_psdf, left_pdf, right_psdf, right_pdf = get_data()
587
- pser = left_pdf.B
588
- psser = left_psdf.B
589
- left_pdf.update(right_pdf)
590
- left_psdf.update(right_psdf)
591
- self.assert_eq(left_pdf.sort_values(by=["A", "B"]), left_psdf.sort_values(by=["A", "B"]))
592
- # Skip due to pandas bug: https://github.com/pandas-dev/pandas/issues/47188
593
- if not (LooseVersion("1.4.0") <= LooseVersion(pd.__version__) <= LooseVersion("1.4.2")):
594
- self.assert_eq(psser.sort_index(), pser.sort_index())
595
-
596
- left_psdf, left_pdf, right_psdf, right_pdf = get_data()
597
- left_pdf.update(right_pdf, overwrite=False)
598
- left_psdf.update(right_psdf, overwrite=False)
599
- self.assert_eq(left_pdf.sort_values(by=["A", "B"]), left_psdf.sort_values(by=["A", "B"]))
600
-
601
- with self.assertRaises(NotImplementedError):
602
- left_psdf.update(right_psdf, join="right")
603
-
604
- # multi-index columns
605
- left_columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")])
606
- right_columns = pd.MultiIndex.from_tuples([("X", "B"), ("Y", "C")])
607
-
608
- left_psdf, left_pdf, right_psdf, right_pdf = get_data(
609
- left_columns=left_columns, right_columns=right_columns
610
- )
611
- left_pdf.update(right_pdf)
612
- left_psdf.update(right_psdf)
613
- self.assert_eq(
614
- left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
615
- left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
616
- )
617
-
618
- left_psdf, left_pdf, right_psdf, right_pdf = get_data(
619
- left_columns=left_columns, right_columns=right_columns
620
- )
621
- left_pdf.update(right_pdf, overwrite=False)
622
- left_psdf.update(right_psdf, overwrite=False)
623
- self.assert_eq(
624
- left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
625
- left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
626
- )
627
-
628
- right_columns = pd.MultiIndex.from_tuples([("Y", "B"), ("Y", "C")])
629
- left_psdf, left_pdf, right_psdf, right_pdf = get_data(
630
- left_columns=left_columns, right_columns=right_columns
631
- )
632
- left_pdf.update(right_pdf)
633
- left_psdf.update(right_psdf)
634
- self.assert_eq(
635
- left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
636
- left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
637
- )
638
-
639
-
640
- class FrameCombineTests(FrameCombineMixin, ComparisonTestBase, SQLTestUtils):
641
- pass
642
-
643
-
644
- if __name__ == "__main__":
645
- from pyspark.pandas.tests.computation.test_combine import * # noqa: F401
646
-
647
- try:
648
- import xmlrunner
649
-
650
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
651
- except ImportError:
652
- testRunner = None
653
- unittest.main(testRunner=testRunner, verbosity=2)