snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (476) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/function_defaults.py +207 -0
  7. snowflake/snowpark_connect/expression/literal.py +18 -2
  8. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  9. snowflake/snowpark_connect/expression/map_expression.py +10 -1
  10. snowflake/snowpark_connect/expression/map_extension.py +12 -2
  11. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  12. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  13. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  14. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  15. snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
  16. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  17. snowflake/snowpark_connect/hidden_column.py +39 -0
  18. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  19. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  20. snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
  21. snowflake/snowpark_connect/relation/map_extension.py +56 -15
  22. snowflake/snowpark_connect/relation/map_join.py +258 -62
  23. snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
  24. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  25. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  26. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  27. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  28. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  29. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  30. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  31. snowflake/snowpark_connect/relation/read/utils.py +6 -7
  32. snowflake/snowpark_connect/relation/utils.py +1 -170
  33. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  34. snowflake/snowpark_connect/resources_initializer.py +29 -1
  35. snowflake/snowpark_connect/server.py +18 -3
  36. snowflake/snowpark_connect/type_mapping.py +29 -25
  37. snowflake/snowpark_connect/typed_column.py +14 -0
  38. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  39. snowflake/snowpark_connect/utils/context.py +6 -1
  40. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  41. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  42. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  43. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  44. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  45. snowflake/snowpark_connect/version.py +1 -1
  46. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  47. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  48. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  52. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  53. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  93. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  94. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  95. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  96. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  97. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  98. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  100. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  101. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  102. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  103. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  104. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  105. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  362. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  439. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  440. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  444. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  445. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  466. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  467. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  468. snowpark_connect-0.23.0.dist-info/RECORD +0 -893
  469. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  470. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  471. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  472. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  473. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  474. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  475. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  476. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,1355 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
-
18
- from distutils.version import LooseVersion
19
- from itertools import product
20
- import unittest
21
-
22
- import pandas as pd
23
- import numpy as np
24
-
25
- from pyspark import pandas as ps
26
- from pyspark.pandas.config import set_option, reset_option
27
- from pyspark.pandas.frame import DataFrame
28
- from pyspark.testing.pandasutils import PandasOnSparkTestCase
29
- from pyspark.testing.sqlutils import SQLTestUtils
30
- from pyspark.pandas.typedef.typehints import (
31
- extension_dtypes,
32
- extension_dtypes_available,
33
- extension_float_dtypes_available,
34
- extension_object_dtypes_available,
35
- )
36
-
37
-
38
- class OpsOnDiffFramesEnabledTestsMixin:
39
- @classmethod
40
- def setUpClass(cls):
41
- super().setUpClass()
42
- set_option("compute.ops_on_diff_frames", True)
43
-
44
- @classmethod
45
- def tearDownClass(cls):
46
- reset_option("compute.ops_on_diff_frames")
47
- super().tearDownClass()
48
-
49
- @property
50
- def pdf1(self):
51
- return pd.DataFrame(
52
- {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
53
- index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
54
- )
55
-
56
- @property
57
- def pdf2(self):
58
- return pd.DataFrame(
59
- {"a": [9, 8, 7, 6, 5, 4, 3, 2, 1], "b": [0, 0, 0, 4, 5, 6, 1, 2, 3]},
60
- index=list(range(9)),
61
- )
62
-
63
- @property
64
- def pdf3(self):
65
- return pd.DataFrame(
66
- {"b": [1, 1, 1, 1, 1, 1, 1, 1, 1], "c": [1, 1, 1, 1, 1, 1, 1, 1, 1]},
67
- index=list(range(9)),
68
- )
69
-
70
- @property
71
- def pdf4(self):
72
- return pd.DataFrame(
73
- {"e": [2, 2, 2, 2, 2, 2, 2, 2, 2], "f": [2, 2, 2, 2, 2, 2, 2, 2, 2]},
74
- index=list(range(9)),
75
- )
76
-
77
- @property
78
- def pdf5(self):
79
- return pd.DataFrame(
80
- {
81
- "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
82
- "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
83
- "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
84
- },
85
- index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
86
- ).set_index(["a", "b"])
87
-
88
- @property
89
- def pdf6(self):
90
- return pd.DataFrame(
91
- {
92
- "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
93
- "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
94
- "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
95
- "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
96
- },
97
- index=list(range(9)),
98
- ).set_index(["a", "b"])
99
-
100
- @property
101
- def pser1(self):
102
- midx = pd.MultiIndex(
103
- [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", "power"]],
104
- [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
105
- )
106
- return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx)
107
-
108
- @property
109
- def pser2(self):
110
- midx = pd.MultiIndex(
111
- [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
112
- [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
113
- )
114
- return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx)
115
-
116
- @property
117
- def pser3(self):
118
- midx = pd.MultiIndex(
119
- [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
120
- [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]],
121
- )
122
- return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
123
-
124
- @property
125
- def psdf1(self):
126
- return ps.from_pandas(self.pdf1)
127
-
128
- @property
129
- def psdf2(self):
130
- return ps.from_pandas(self.pdf2)
131
-
132
- @property
133
- def psdf3(self):
134
- return ps.from_pandas(self.pdf3)
135
-
136
- @property
137
- def psdf4(self):
138
- return ps.from_pandas(self.pdf4)
139
-
140
- @property
141
- def psdf5(self):
142
- return ps.from_pandas(self.pdf5)
143
-
144
- @property
145
- def psdf6(self):
146
- return ps.from_pandas(self.pdf6)
147
-
148
- @property
149
- def psser1(self):
150
- return ps.from_pandas(self.pser1)
151
-
152
- @property
153
- def psser2(self):
154
- return ps.from_pandas(self.pser2)
155
-
156
- @property
157
- def psser3(self):
158
- return ps.from_pandas(self.pser3)
159
-
160
- def test_ranges(self):
161
- self.assert_eq(
162
- (ps.range(10) + ps.range(10)).sort_index(),
163
- (
164
- ps.DataFrame({"id": list(range(10))}) + ps.DataFrame({"id": list(range(10))})
165
- ).sort_index(),
166
- )
167
-
168
- def test_no_matched_index(self):
169
- with self.assertRaisesRegex(ValueError, "Index names must be exactly matched"):
170
- ps.DataFrame({"a": [1, 2, 3]}).set_index("a") + ps.DataFrame(
171
- {"b": [1, 2, 3]}
172
- ).set_index("b")
173
-
174
- def test_arithmetic(self):
175
- self._test_arithmetic_frame(self.pdf1, self.pdf2, check_extension=False)
176
- self._test_arithmetic_series(self.pser1, self.pser2, check_extension=False)
177
-
178
- @unittest.skipIf(not extension_dtypes_available, "pandas extension dtypes are not available")
179
- def test_arithmetic_extension_dtypes(self):
180
- self._test_arithmetic_frame(
181
- self.pdf1.astype("Int64"), self.pdf2.astype("Int64"), check_extension=True
182
- )
183
- self._test_arithmetic_series(
184
- self.pser1.astype(int).astype("Int64"),
185
- self.pser2.astype(int).astype("Int64"),
186
- check_extension=True,
187
- )
188
-
189
- @unittest.skipIf(
190
- not extension_float_dtypes_available, "pandas extension float dtypes are not available"
191
- )
192
- def test_arithmetic_extension_float_dtypes(self):
193
- self._test_arithmetic_frame(
194
- self.pdf1.astype("Float64"), self.pdf2.astype("Float64"), check_extension=True
195
- )
196
- self._test_arithmetic_series(
197
- self.pser1.astype("Float64"), self.pser2.astype("Float64"), check_extension=True
198
- )
199
-
200
- def _test_arithmetic_frame(self, pdf1, pdf2, *, check_extension):
201
- psdf1 = ps.from_pandas(pdf1)
202
- psdf2 = ps.from_pandas(pdf2)
203
-
204
- def assert_eq(actual, expected):
205
- if LooseVersion("1.1") <= LooseVersion(pd.__version__) < LooseVersion("1.2.2"):
206
- self.assert_eq(actual, expected, check_exact=not check_extension)
207
- if check_extension:
208
- if isinstance(actual, DataFrame):
209
- for dtype in actual.dtypes:
210
- self.assertTrue(isinstance(dtype, extension_dtypes))
211
- else:
212
- self.assertTrue(isinstance(actual.dtype, extension_dtypes))
213
- else:
214
- self.assert_eq(actual, expected)
215
-
216
- # Series
217
- assert_eq((psdf1.a - psdf2.b).sort_index(), (pdf1.a - pdf2.b).sort_index())
218
-
219
- assert_eq((psdf1.a * psdf2.a).sort_index(), (pdf1.a * pdf2.a).sort_index())
220
-
221
- if check_extension and not extension_float_dtypes_available:
222
- self.assert_eq(
223
- (psdf1["a"] / psdf2["a"]).sort_index(), (pdf1["a"] / pdf2["a"]).sort_index()
224
- )
225
- else:
226
- assert_eq((psdf1["a"] / psdf2["a"]).sort_index(), (pdf1["a"] / pdf2["a"]).sort_index())
227
-
228
- # DataFrame
229
- assert_eq((psdf1 + psdf2).sort_index(), (pdf1 + pdf2).sort_index())
230
-
231
- # Multi-index columns
232
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
233
- psdf1.columns = columns
234
- psdf2.columns = columns
235
- pdf1.columns = columns
236
- pdf2.columns = columns
237
-
238
- # Series
239
- assert_eq(
240
- (psdf1[("x", "a")] - psdf2[("x", "b")]).sort_index(),
241
- (pdf1[("x", "a")] - pdf2[("x", "b")]).sort_index(),
242
- )
243
-
244
- assert_eq(
245
- (psdf1[("x", "a")] - psdf2["x"]["b"]).sort_index(),
246
- (pdf1[("x", "a")] - pdf2["x"]["b"]).sort_index(),
247
- )
248
-
249
- assert_eq(
250
- (psdf1["x"]["a"] - psdf2[("x", "b")]).sort_index(),
251
- (pdf1["x"]["a"] - pdf2[("x", "b")]).sort_index(),
252
- )
253
-
254
- # DataFrame
255
- assert_eq((psdf1 + psdf2).sort_index(), (pdf1 + pdf2).sort_index())
256
-
257
- def _test_arithmetic_series(self, pser1, pser2, *, check_extension):
258
- psser1 = ps.from_pandas(pser1)
259
- psser2 = ps.from_pandas(pser2)
260
-
261
- def assert_eq(actual, expected):
262
- if LooseVersion("1.1") <= LooseVersion(pd.__version__) < LooseVersion("1.2.2"):
263
- self.assert_eq(actual, expected, check_exact=not check_extension)
264
- if check_extension:
265
- self.assertTrue(isinstance(actual.dtype, extension_dtypes))
266
- else:
267
- self.assert_eq(actual, expected)
268
-
269
- # MultiIndex Series
270
- assert_eq((psser1 + psser2).sort_index(), (pser1 + pser2).sort_index())
271
-
272
- assert_eq((psser1 - psser2).sort_index(), (pser1 - pser2).sort_index())
273
-
274
- assert_eq((psser1 * psser2).sort_index(), (pser1 * pser2).sort_index())
275
-
276
- if check_extension and not extension_float_dtypes_available:
277
- self.assert_eq((psser1 / psser2).sort_index(), (pser1 / pser2).sort_index())
278
- else:
279
- assert_eq((psser1 / psser2).sort_index(), (pser1 / pser2).sort_index())
280
-
281
- def test_arithmetic_chain(self):
282
- self._test_arithmetic_chain_frame(self.pdf1, self.pdf2, self.pdf3, check_extension=False)
283
- self._test_arithmetic_chain_series(
284
- self.pser1, self.pser2, self.pser3, check_extension=False
285
- )
286
-
287
- @unittest.skipIf(not extension_dtypes_available, "pandas extension dtypes are not available")
288
- def test_arithmetic_chain_extension_dtypes(self):
289
- self._test_arithmetic_chain_frame(
290
- self.pdf1.astype("Int64"),
291
- self.pdf2.astype("Int64"),
292
- self.pdf3.astype("Int64"),
293
- check_extension=True,
294
- )
295
- self._test_arithmetic_chain_series(
296
- self.pser1.astype(int).astype("Int64"),
297
- self.pser2.astype(int).astype("Int64"),
298
- self.pser3.astype(int).astype("Int64"),
299
- check_extension=True,
300
- )
301
-
302
- @unittest.skipIf(
303
- not extension_float_dtypes_available, "pandas extension float dtypes are not available"
304
- )
305
- def test_arithmetic_chain_extension_float_dtypes(self):
306
- self._test_arithmetic_chain_frame(
307
- self.pdf1.astype("Float64"),
308
- self.pdf2.astype("Float64"),
309
- self.pdf3.astype("Float64"),
310
- check_extension=True,
311
- )
312
- self._test_arithmetic_chain_series(
313
- self.pser1.astype("Float64"),
314
- self.pser2.astype("Float64"),
315
- self.pser3.astype("Float64"),
316
- check_extension=True,
317
- )
318
-
319
- def _test_arithmetic_chain_frame(self, pdf1, pdf2, pdf3, *, check_extension):
320
- psdf1 = ps.from_pandas(pdf1)
321
- psdf2 = ps.from_pandas(pdf2)
322
- psdf3 = ps.from_pandas(pdf3)
323
-
324
- common_columns = set(psdf1.columns).intersection(psdf2.columns).intersection(psdf3.columns)
325
-
326
- def assert_eq(actual, expected):
327
- if LooseVersion("1.1") <= LooseVersion(pd.__version__) < LooseVersion("1.2.2"):
328
- self.assert_eq(actual, expected, check_exact=not check_extension)
329
- if check_extension:
330
- if isinstance(actual, DataFrame):
331
- for column, dtype in zip(actual.columns, actual.dtypes):
332
- if column in common_columns:
333
- self.assertTrue(isinstance(dtype, extension_dtypes))
334
- else:
335
- self.assertFalse(isinstance(dtype, extension_dtypes))
336
- else:
337
- self.assertTrue(isinstance(actual.dtype, extension_dtypes))
338
- else:
339
- self.assert_eq(actual, expected)
340
-
341
- # Series
342
- assert_eq(
343
- (psdf1.a - psdf2.b - psdf3.c).sort_index(), (pdf1.a - pdf2.b - pdf3.c).sort_index()
344
- )
345
-
346
- assert_eq(
347
- (psdf1.a * (psdf2.a * psdf3.c)).sort_index(), (pdf1.a * (pdf2.a * pdf3.c)).sort_index()
348
- )
349
-
350
- if check_extension and not extension_float_dtypes_available:
351
- self.assert_eq(
352
- (psdf1["a"] / psdf2["a"] / psdf3["c"]).sort_index(),
353
- (pdf1["a"] / pdf2["a"] / pdf3["c"]).sort_index(),
354
- )
355
- else:
356
- assert_eq(
357
- (psdf1["a"] / psdf2["a"] / psdf3["c"]).sort_index(),
358
- (pdf1["a"] / pdf2["a"] / pdf3["c"]).sort_index(),
359
- )
360
-
361
- # DataFrame
362
- if check_extension and LooseVersion(pd.__version__) < LooseVersion("1.1"):
363
- self.assert_eq(
364
- (psdf1 + psdf2 - psdf3).sort_index(), (pdf1 + pdf2 - pdf3).sort_index(), almost=True
365
- )
366
- else:
367
- assert_eq((psdf1 + psdf2 - psdf3).sort_index(), (pdf1 + pdf2 - pdf3).sort_index())
368
-
369
- # Multi-index columns
370
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
371
- psdf1.columns = columns
372
- psdf2.columns = columns
373
- pdf1.columns = columns
374
- pdf2.columns = columns
375
- columns = pd.MultiIndex.from_tuples([("x", "b"), ("y", "c")])
376
- psdf3.columns = columns
377
- pdf3.columns = columns
378
-
379
- common_columns = set(psdf1.columns).intersection(psdf2.columns).intersection(psdf3.columns)
380
-
381
- # Series
382
- assert_eq(
383
- (psdf1[("x", "a")] - psdf2[("x", "b")] - psdf3[("y", "c")]).sort_index(),
384
- (pdf1[("x", "a")] - pdf2[("x", "b")] - pdf3[("y", "c")]).sort_index(),
385
- )
386
-
387
- assert_eq(
388
- (psdf1[("x", "a")] * (psdf2[("x", "b")] * psdf3[("y", "c")])).sort_index(),
389
- (pdf1[("x", "a")] * (pdf2[("x", "b")] * pdf3[("y", "c")])).sort_index(),
390
- )
391
-
392
- # DataFrame
393
- if check_extension and LooseVersion(pd.__version__) < LooseVersion("1.1"):
394
- self.assert_eq(
395
- (psdf1 + psdf2 - psdf3).sort_index(), (pdf1 + pdf2 - pdf3).sort_index(), almost=True
396
- )
397
- else:
398
- assert_eq((psdf1 + psdf2 - psdf3).sort_index(), (pdf1 + pdf2 - pdf3).sort_index())
399
-
400
- def _test_arithmetic_chain_series(self, pser1, pser2, pser3, *, check_extension):
401
- psser1 = ps.from_pandas(pser1)
402
- psser2 = ps.from_pandas(pser2)
403
- psser3 = ps.from_pandas(pser3)
404
-
405
- def assert_eq(actual, expected):
406
- if LooseVersion("1.1") <= LooseVersion(pd.__version__) < LooseVersion("1.2.2"):
407
- self.assert_eq(actual, expected, check_exact=not check_extension)
408
- if check_extension:
409
- self.assertTrue(isinstance(actual.dtype, extension_dtypes))
410
- else:
411
- self.assert_eq(actual, expected)
412
-
413
- # MultiIndex Series
414
- assert_eq((psser1 + psser2 - psser3).sort_index(), (pser1 + pser2 - pser3).sort_index())
415
-
416
- assert_eq((psser1 * psser2 * psser3).sort_index(), (pser1 * pser2 * pser3).sort_index())
417
-
418
- if check_extension and not extension_float_dtypes_available:
419
- self.assert_eq(
420
- (psser1 - psser2 / psser3).sort_index(), (pser1 - pser2 / pser3).sort_index()
421
- )
422
- else:
423
- assert_eq((psser1 - psser2 / psser3).sort_index(), (pser1 - pser2 / pser3).sort_index())
424
-
425
- assert_eq((psser1 + psser2 * psser3).sort_index(), (pser1 + pser2 * pser3).sort_index())
426
-
427
- def test_mod(self):
428
- pser = pd.Series([100, None, -300, None, 500, -700])
429
- pser_other = pd.Series([-150] * 6)
430
- psser = ps.from_pandas(pser)
431
- psser_other = ps.from_pandas(pser_other)
432
-
433
- self.assert_eq(psser.mod(psser_other).sort_index(), pser.mod(pser_other))
434
- self.assert_eq(psser.mod(psser_other).sort_index(), pser.mod(pser_other))
435
- self.assert_eq(psser.mod(psser_other).sort_index(), pser.mod(pser_other))
436
-
437
- def test_rmod(self):
438
- pser = pd.Series([100, None, -300, None, 500, -700])
439
- pser_other = pd.Series([-150] * 6)
440
- psser = ps.from_pandas(pser)
441
- psser_other = ps.from_pandas(pser_other)
442
-
443
- self.assert_eq(psser.rmod(psser_other).sort_index(), pser.rmod(pser_other))
444
- self.assert_eq(psser.rmod(psser_other).sort_index(), pser.rmod(pser_other))
445
- self.assert_eq(psser.rmod(psser_other).sort_index(), pser.rmod(pser_other))
446
-
447
- def test_getitem_boolean_series(self):
448
- pdf1 = pd.DataFrame(
449
- {"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]}, index=[20, 10, 30, 0, 50]
450
- )
451
- pdf2 = pd.DataFrame(
452
- {"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]},
453
- index=[0, 30, 10, 20, 50],
454
- )
455
- psdf1 = ps.from_pandas(pdf1)
456
- psdf2 = ps.from_pandas(pdf2)
457
-
458
- self.assert_eq(pdf1[pdf2.A > -3].sort_index(), psdf1[psdf2.A > -3].sort_index())
459
-
460
- self.assert_eq(pdf1.A[pdf2.A > -3].sort_index(), psdf1.A[psdf2.A > -3].sort_index())
461
-
462
- self.assert_eq(
463
- (pdf1.A + 1)[pdf2.A > -3].sort_index(), (psdf1.A + 1)[psdf2.A > -3].sort_index()
464
- )
465
-
466
- def test_loc_getitem_boolean_series(self):
467
- pdf1 = pd.DataFrame(
468
- {"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]}, index=[20, 10, 30, 0, 50]
469
- )
470
- pdf2 = pd.DataFrame(
471
- {"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]},
472
- index=[20, 10, 30, 0, 50],
473
- )
474
- psdf1 = ps.from_pandas(pdf1)
475
- psdf2 = ps.from_pandas(pdf2)
476
-
477
- self.assert_eq(pdf1.loc[pdf2.A > -3].sort_index(), psdf1.loc[psdf2.A > -3].sort_index())
478
-
479
- self.assert_eq(pdf1.A.loc[pdf2.A > -3].sort_index(), psdf1.A.loc[psdf2.A > -3].sort_index())
480
-
481
- self.assert_eq(
482
- (pdf1.A + 1).loc[pdf2.A > -3].sort_index(), (psdf1.A + 1).loc[psdf2.A > -3].sort_index()
483
- )
484
-
485
- pser = pd.Series([0, 1, 2, 3, 4], index=[20, 10, 30, 0, 50])
486
- psser = ps.from_pandas(pser)
487
- self.assert_eq(pser.loc[pdf2.A > -3].sort_index(), psser.loc[psdf2.A > -3].sort_index())
488
- pser.name = psser.name = "B"
489
- self.assert_eq(pser.loc[pdf2.A > -3].sort_index(), psser.loc[psdf2.A > -3].sort_index())
490
-
491
- def test_bitwise(self):
492
- pser1 = pd.Series([True, False, True, False, np.nan, np.nan, True, False, np.nan])
493
- pser2 = pd.Series([True, False, False, True, True, False, np.nan, np.nan, np.nan])
494
- psser1 = ps.from_pandas(pser1)
495
- psser2 = ps.from_pandas(pser2)
496
-
497
- self.assert_eq(pser1 | pser2, (psser1 | psser2).sort_index())
498
- self.assert_eq(pser1 & pser2, (psser1 & psser2).sort_index())
499
-
500
- pser1 = pd.Series([True, False, np.nan], index=list("ABC"))
501
- pser2 = pd.Series([False, True, np.nan], index=list("DEF"))
502
- psser1 = ps.from_pandas(pser1)
503
- psser2 = ps.from_pandas(pser2)
504
-
505
- self.assert_eq(pser1 | pser2, (psser1 | psser2).sort_index())
506
- self.assert_eq(pser1 & pser2, (psser1 & psser2).sort_index())
507
-
508
- @unittest.skipIf(
509
- not extension_object_dtypes_available, "pandas extension object dtypes are not available"
510
- )
511
- def test_bitwise_extension_dtype(self):
512
- def assert_eq(actual, expected):
513
- if LooseVersion("1.1") <= LooseVersion(pd.__version__) < LooseVersion("1.2.2"):
514
- self.assert_eq(actual, expected, check_exact=False)
515
- self.assertTrue(isinstance(actual.dtype, extension_dtypes))
516
- else:
517
- self.assert_eq(actual, expected)
518
-
519
- pser1 = pd.Series(
520
- [True, False, True, False, np.nan, np.nan, True, False, np.nan], dtype="boolean"
521
- )
522
- pser2 = pd.Series(
523
- [True, False, False, True, True, False, np.nan, np.nan, np.nan], dtype="boolean"
524
- )
525
- psser1 = ps.from_pandas(pser1)
526
- psser2 = ps.from_pandas(pser2)
527
-
528
- assert_eq((psser1 | psser2).sort_index(), pser1 | pser2)
529
- assert_eq((psser1 & psser2).sort_index(), pser1 & pser2)
530
-
531
- pser1 = pd.Series([True, False, np.nan], index=list("ABC"), dtype="boolean")
532
- pser2 = pd.Series([False, True, np.nan], index=list("DEF"), dtype="boolean")
533
- psser1 = ps.from_pandas(pser1)
534
- psser2 = ps.from_pandas(pser2)
535
-
536
- # a pandas bug?
537
- # assert_eq((psser1 | psser2).sort_index(), pser1 | pser2)
538
- # assert_eq((psser1 & psser2).sort_index(), pser1 & pser2)
539
- assert_eq(
540
- (psser1 | psser2).sort_index(),
541
- pd.Series([True, None, None, None, True, None], index=list("ABCDEF"), dtype="boolean"),
542
- )
543
- assert_eq(
544
- (psser1 & psser2).sort_index(),
545
- pd.Series(
546
- [None, False, None, False, None, None], index=list("ABCDEF"), dtype="boolean"
547
- ),
548
- )
549
-
550
- @unittest.skipIf(
551
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
552
- "TODO(SPARK-43453): Enable OpsOnDiffFramesEnabledTests.test_concat_column_axis "
553
- "for pandas 2.0.0.",
554
- )
555
- def test_concat_column_axis(self):
556
- pdf1 = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5]}, index=[1, 2, 3])
557
- pdf1.columns.names = ["AB"]
558
- pdf2 = pd.DataFrame({"C": [1, 2, 3], "D": [4, 5, 6]}, index=[1, 3, 5])
559
- pdf2.columns.names = ["CD"]
560
- psdf1 = ps.from_pandas(pdf1)
561
- psdf2 = ps.from_pandas(pdf2)
562
-
563
- psdf3 = psdf1.copy()
564
- psdf4 = psdf2.copy()
565
- pdf3 = pdf1.copy()
566
- pdf4 = pdf2.copy()
567
-
568
- columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")], names=["X", "AB"])
569
- pdf3.columns = columns
570
- psdf3.columns = columns
571
-
572
- columns = pd.MultiIndex.from_tuples([("X", "C"), ("X", "D")], names=["Y", "CD"])
573
- pdf4.columns = columns
574
- psdf4.columns = columns
575
-
576
- pdf5 = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5]}, index=[1, 2, 3])
577
- pdf6 = pd.DataFrame({"C": [1, 2, 3]}, index=[1, 3, 5])
578
- psdf5 = ps.from_pandas(pdf5)
579
- psdf6 = ps.from_pandas(pdf6)
580
-
581
- ignore_indexes = [True, False]
582
- joins = ["inner", "outer"]
583
-
584
- objs = [
585
- ([psdf1.A, psdf2.C], [pdf1.A, pdf2.C]),
586
- # TODO: ([psdf1, psdf2.C], [pdf1, pdf2.C]),
587
- ([psdf1.A, psdf2], [pdf1.A, pdf2]),
588
- ([psdf1.A, psdf2.C], [pdf1.A, pdf2.C]),
589
- ([psdf3[("X", "A")], psdf4[("X", "C")]], [pdf3[("X", "A")], pdf4[("X", "C")]]),
590
- ([psdf3, psdf4[("X", "C")]], [pdf3, pdf4[("X", "C")]]),
591
- ([psdf3[("X", "A")], psdf4], [pdf3[("X", "A")], pdf4]),
592
- ([psdf3, psdf4], [pdf3, pdf4]),
593
- ([psdf5, psdf6], [pdf5, pdf6]),
594
- ([psdf6, psdf5], [pdf6, pdf5]),
595
- ]
596
-
597
- for ignore_index, join in product(ignore_indexes, joins):
598
- for i, (psdfs, pdfs) in enumerate(objs):
599
- with self.subTest(ignore_index=ignore_index, join=join, pdfs=pdfs, pair=i):
600
- actual = ps.concat(psdfs, axis=1, ignore_index=ignore_index, join=join)
601
- expected = pd.concat(pdfs, axis=1, ignore_index=ignore_index, join=join)
602
- self.assert_eq(
603
- repr(actual.sort_values(list(actual.columns)).reset_index(drop=True)),
604
- repr(expected.sort_values(list(expected.columns)).reset_index(drop=True)),
605
- )
606
- actual = ps.concat(
607
- psdfs, axis=1, ignore_index=ignore_index, join=join, sort=True
608
- )
609
- expected = pd.concat(
610
- pdfs, axis=1, ignore_index=ignore_index, join=join, sort=True
611
- )
612
- self.assert_eq(
613
- repr(actual.reset_index(drop=True)),
614
- repr(expected.reset_index(drop=True)),
615
- )
616
-
617
- def test_combine_first(self):
618
- pser1 = pd.Series({"falcon": 330.0, "eagle": 160.0})
619
- pser2 = pd.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
620
- psser1 = ps.from_pandas(pser1)
621
- psser2 = ps.from_pandas(pser2)
622
-
623
- self.assert_eq(
624
- psser1.combine_first(psser2).sort_index(), pser1.combine_first(pser2).sort_index()
625
- )
626
- with self.assertRaisesRegex(
627
- TypeError, "`combine_first` only allows `Series` for parameter `other`"
628
- ):
629
- psser1.combine_first(50)
630
-
631
- psser1.name = ("X", "A")
632
- psser2.name = ("Y", "B")
633
- pser1.name = ("X", "A")
634
- pser2.name = ("Y", "B")
635
- self.assert_eq(
636
- psser1.combine_first(psser2).sort_index(), pser1.combine_first(pser2).sort_index()
637
- )
638
-
639
- # MultiIndex
640
- midx1 = pd.MultiIndex(
641
- [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", "power"]],
642
- [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
643
- )
644
- midx2 = pd.MultiIndex(
645
- [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
646
- [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
647
- )
648
- pser1 = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx1)
649
- pser2 = pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx2)
650
- psser1 = ps.from_pandas(pser1)
651
- psser2 = ps.from_pandas(pser2)
652
-
653
- self.assert_eq(
654
- psser1.combine_first(psser2).sort_index(), pser1.combine_first(pser2).sort_index()
655
- )
656
-
657
- # DataFrame
658
- pdf1 = pd.DataFrame({"A": [None, 0], "B": [4, None]})
659
- psdf1 = ps.from_pandas(pdf1)
660
- pdf2 = pd.DataFrame({"C": [3, 3], "B": [1, 1]})
661
- psdf2 = ps.from_pandas(pdf2)
662
-
663
- if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
664
- self.assert_eq(pdf1.combine_first(pdf2), psdf1.combine_first(psdf2).sort_index())
665
- else:
666
- # pandas < 1.2.0 returns unexpected dtypes,
667
- # please refer to https://github.com/pandas-dev/pandas/issues/28481 for details
668
- expected_pdf = pd.DataFrame({"A": [None, 0], "B": [4.0, 1.0], "C": [3, 3]})
669
- self.assert_eq(expected_pdf, psdf1.combine_first(psdf2).sort_index())
670
-
671
- pdf1.columns = pd.MultiIndex.from_tuples([("A", "willow"), ("B", "pine")])
672
- psdf1 = ps.from_pandas(pdf1)
673
- pdf2.columns = pd.MultiIndex.from_tuples([("C", "oak"), ("B", "pine")])
674
- psdf2 = ps.from_pandas(pdf2)
675
-
676
- if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
677
- self.assert_eq(pdf1.combine_first(pdf2), psdf1.combine_first(psdf2).sort_index())
678
- else:
679
- # pandas < 1.2.0 returns unexpected dtypes,
680
- # please refer to https://github.com/pandas-dev/pandas/issues/28481 for details
681
- expected_pdf = pd.DataFrame({"A": [None, 0], "B": [4.0, 1.0], "C": [3, 3]})
682
- expected_pdf.columns = pd.MultiIndex.from_tuples(
683
- [("A", "willow"), ("B", "pine"), ("C", "oak")]
684
- )
685
- self.assert_eq(expected_pdf, psdf1.combine_first(psdf2).sort_index())
686
-
687
- def test_insert(self):
688
- #
689
- # Basic DataFrame
690
- #
691
- pdf = pd.DataFrame([1, 2, 3])
692
- psdf = ps.from_pandas(pdf)
693
-
694
- pser = pd.Series([4, 5, 6])
695
- psser = ps.from_pandas(pser)
696
- psdf.insert(1, "y", psser)
697
- pdf.insert(1, "y", pser)
698
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
699
-
700
- #
701
- # DataFrame with Index different from inserting Series'
702
- #
703
- pdf = pd.DataFrame([1, 2, 3], index=[10, 20, 30])
704
- psdf = ps.from_pandas(pdf)
705
-
706
- pser = pd.Series([4, 5, 6])
707
- psser = ps.from_pandas(pser)
708
- psdf.insert(1, "y", psser)
709
- pdf.insert(1, "y", pser)
710
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
711
-
712
- #
713
- # DataFrame with Multi-index columns
714
- #
715
- pdf = pd.DataFrame({("x", "a"): [1, 2, 3]})
716
- psdf = ps.from_pandas(pdf)
717
-
718
- pser = pd.Series([4, 5, 6])
719
- psser = ps.from_pandas(pser)
720
- pdf = pd.DataFrame({("x", "a", "b"): [1, 2, 3]})
721
- psdf = ps.from_pandas(pdf)
722
- psdf.insert(0, "a", psser)
723
- pdf.insert(0, "a", pser)
724
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
725
- psdf.insert(0, ("b", "c", ""), psser)
726
- pdf.insert(0, ("b", "c", ""), pser)
727
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
728
-
729
- def test_compare(self):
730
- if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
731
- pser1 = pd.Series(["b", "c", np.nan, "g", np.nan])
732
- pser2 = pd.Series(["a", "c", np.nan, np.nan, "h"])
733
- psser1 = ps.from_pandas(pser1)
734
- psser2 = ps.from_pandas(pser2)
735
- self.assert_eq(
736
- pser1.compare(pser2).sort_index(),
737
- psser1.compare(psser2).sort_index(),
738
- )
739
-
740
- # `keep_shape=True`
741
- self.assert_eq(
742
- pser1.compare(pser2, keep_shape=True).sort_index(),
743
- psser1.compare(psser2, keep_shape=True).sort_index(),
744
- )
745
- # `keep_equal=True`
746
- self.assert_eq(
747
- pser1.compare(pser2, keep_equal=True).sort_index(),
748
- psser1.compare(psser2, keep_equal=True).sort_index(),
749
- )
750
- # `keep_shape=True` and `keep_equal=True`
751
- self.assert_eq(
752
- pser1.compare(pser2, keep_shape=True, keep_equal=True).sort_index(),
753
- psser1.compare(psser2, keep_shape=True, keep_equal=True).sort_index(),
754
- )
755
-
756
- # MultiIndex
757
- pser1.index = pd.MultiIndex.from_tuples(
758
- [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
759
- )
760
- pser2.index = pd.MultiIndex.from_tuples(
761
- [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
762
- )
763
- psser1 = ps.from_pandas(pser1)
764
- psser2 = ps.from_pandas(pser2)
765
- self.assert_eq(
766
- pser1.compare(pser2).sort_index(),
767
- psser1.compare(psser2).sort_index(),
768
- )
769
-
770
- # `keep_shape=True` with MultiIndex
771
- self.assert_eq(
772
- pser1.compare(pser2, keep_shape=True).sort_index(),
773
- psser1.compare(psser2, keep_shape=True).sort_index(),
774
- )
775
- # `keep_equal=True` with MultiIndex
776
- self.assert_eq(
777
- pser1.compare(pser2, keep_equal=True).sort_index(),
778
- psser1.compare(psser2, keep_equal=True).sort_index(),
779
- )
780
- # `keep_shape=True` and `keep_equal=True` with MultiIndex
781
- self.assert_eq(
782
- pser1.compare(pser2, keep_shape=True, keep_equal=True).sort_index(),
783
- psser1.compare(psser2, keep_shape=True, keep_equal=True).sort_index(),
784
- )
785
- else:
786
- psser1 = ps.Series(["b", "c", np.nan, "g", np.nan])
787
- psser2 = ps.Series(["a", "c", np.nan, np.nan, "h"])
788
- expected = ps.DataFrame(
789
- [["b", "a"], ["g", None], [None, "h"]], index=[0, 3, 4], columns=["self", "other"]
790
- )
791
- self.assert_eq(expected, psser1.compare(psser2).sort_index())
792
-
793
- # `keep_shape=True`
794
- expected = ps.DataFrame(
795
- [["b", "a"], [None, None], [None, None], ["g", None], [None, "h"]],
796
- index=[0, 1, 2, 3, 4],
797
- columns=["self", "other"],
798
- )
799
- self.assert_eq(
800
- expected,
801
- psser1.compare(psser2, keep_shape=True).sort_index(),
802
- )
803
- # `keep_equal=True`
804
- expected = ps.DataFrame(
805
- [["b", "a"], ["g", None], [None, "h"]], index=[0, 3, 4], columns=["self", "other"]
806
- )
807
- self.assert_eq(
808
- expected,
809
- psser1.compare(psser2, keep_equal=True).sort_index(),
810
- )
811
- # `keep_shape=True` and `keep_equal=True`
812
- expected = ps.DataFrame(
813
- [["b", "a"], ["c", "c"], [None, None], ["g", None], [None, "h"]],
814
- index=[0, 1, 2, 3, 4],
815
- columns=["self", "other"],
816
- )
817
- self.assert_eq(
818
- expected,
819
- psser1.compare(psser2, keep_shape=True, keep_equal=True).sort_index(),
820
- )
821
-
822
- # MultiIndex
823
- psser1 = ps.Series(
824
- ["b", "c", np.nan, "g", np.nan],
825
- index=pd.MultiIndex.from_tuples(
826
- [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
827
- ),
828
- )
829
- psser2 = ps.Series(
830
- ["a", "c", np.nan, np.nan, "h"],
831
- index=pd.MultiIndex.from_tuples(
832
- [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
833
- ),
834
- )
835
- expected = ps.DataFrame(
836
- [["b", "a"], [None, "h"], ["g", None]],
837
- index=pd.MultiIndex.from_tuples([("a", "x"), ("q", "l"), ("x", "k")]),
838
- columns=["self", "other"],
839
- )
840
- self.assert_eq(expected, psser1.compare(psser2).sort_index())
841
-
842
- # `keep_shape=True`
843
- expected = ps.DataFrame(
844
- [["b", "a"], [None, None], [None, None], [None, "h"], ["g", None]],
845
- index=pd.MultiIndex.from_tuples(
846
- [("a", "x"), ("b", "y"), ("c", "z"), ("q", "l"), ("x", "k")]
847
- ),
848
- columns=["self", "other"],
849
- )
850
- self.assert_eq(
851
- expected,
852
- psser1.compare(psser2, keep_shape=True).sort_index(),
853
- )
854
- # `keep_equal=True`
855
- expected = ps.DataFrame(
856
- [["b", "a"], [None, "h"], ["g", None]],
857
- index=pd.MultiIndex.from_tuples([("a", "x"), ("q", "l"), ("x", "k")]),
858
- columns=["self", "other"],
859
- )
860
- self.assert_eq(
861
- expected,
862
- psser1.compare(psser2, keep_equal=True).sort_index(),
863
- )
864
- # `keep_shape=True` and `keep_equal=True`
865
- expected = ps.DataFrame(
866
- [["b", "a"], ["c", "c"], [None, None], [None, "h"], ["g", None]],
867
- index=pd.MultiIndex.from_tuples(
868
- [("a", "x"), ("b", "y"), ("c", "z"), ("q", "l"), ("x", "k")]
869
- ),
870
- columns=["self", "other"],
871
- )
872
- self.assert_eq(
873
- expected,
874
- psser1.compare(psser2, keep_shape=True, keep_equal=True).sort_index(),
875
- )
876
-
877
- # Different Index
878
- with self.assertRaisesRegex(
879
- ValueError, "Can only compare identically-labeled Series objects"
880
- ):
881
- psser1 = ps.Series(
882
- [1, 2, 3, 4, 5],
883
- index=pd.Index([1, 2, 3, 4, 5]),
884
- )
885
- psser2 = ps.Series(
886
- [2, 2, 3, 4, 1],
887
- index=pd.Index([5, 4, 3, 2, 1]),
888
- )
889
- psser1.compare(psser2)
890
- # Different MultiIndex
891
- with self.assertRaisesRegex(
892
- ValueError, "Can only compare identically-labeled Series objects"
893
- ):
894
- psser1 = ps.Series(
895
- [1, 2, 3, 4, 5],
896
- index=pd.MultiIndex.from_tuples(
897
- [("a", "x"), ("b", "y"), ("c", "z"), ("x", "k"), ("q", "l")]
898
- ),
899
- )
900
- psser2 = ps.Series(
901
- [2, 2, 3, 4, 1],
902
- index=pd.MultiIndex.from_tuples(
903
- [("a", "x"), ("b", "y"), ("c", "a"), ("x", "k"), ("q", "l")]
904
- ),
905
- )
906
- psser1.compare(psser2)
907
- # SPARK-37495: Skip identical index checking of Series.compare when config
908
- # 'compute.eager_check' is disabled
909
- psser1 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 3, 4, 5]))
910
- psser2 = ps.Series([1, 2, 3, 4, 5, 6], index=pd.Index([1, 2, 4, 3, 6, 7]))
911
- expected = ps.DataFrame(
912
- {"self": [3, 4, 5, np.nan, np.nan], "other": [4, 3, np.nan, 5.0, 6.0]},
913
- index=[3, 4, 5, 6, 7],
914
- )
915
-
916
- with ps.option_context("compute.eager_check", False):
917
- self.assert_eq(expected, psser1.compare(psser2))
918
-
919
- def test_different_columns(self):
920
- psdf1 = self.psdf1
921
- psdf4 = self.psdf4
922
- pdf1 = self.pdf1
923
- pdf4 = self.pdf4
924
-
925
- self.assert_eq((psdf1 + psdf4).sort_index(), (pdf1 + pdf4).sort_index(), almost=True)
926
-
927
- # Multi-index columns
928
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
929
- psdf1.columns = columns
930
- pdf1.columns = columns
931
- columns = pd.MultiIndex.from_tuples([("z", "e"), ("z", "f")])
932
- psdf4.columns = columns
933
- pdf4.columns = columns
934
-
935
- self.assert_eq((psdf1 + psdf4).sort_index(), (pdf1 + pdf4).sort_index(), almost=True)
936
-
937
- def test_assignment_series(self):
938
- psdf = ps.from_pandas(self.pdf1)
939
- pdf = self.pdf1
940
- psser = psdf.a
941
- pser = pdf.a
942
- psdf["a"] = self.psdf2.a
943
- pdf["a"] = self.pdf2.a
944
-
945
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
946
- self.assert_eq(psser, pser)
947
-
948
- psdf = ps.from_pandas(self.pdf1)
949
- pdf = self.pdf1
950
- psser = psdf.a
951
- pser = pdf.a
952
- psdf["a"] = self.psdf2.b
953
- pdf["a"] = self.pdf2.b
954
-
955
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
956
- self.assert_eq(psser, pser)
957
-
958
- psdf = ps.from_pandas(self.pdf1)
959
- pdf = self.pdf1
960
- psdf["c"] = self.psdf2.a
961
- pdf["c"] = self.pdf2.a
962
-
963
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
964
-
965
- # Multi-index columns
966
- psdf = ps.from_pandas(self.pdf1)
967
- pdf = self.pdf1
968
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
969
- psdf.columns = columns
970
- pdf.columns = columns
971
- psdf[("y", "c")] = self.psdf2.a
972
- pdf[("y", "c")] = self.pdf2.a
973
-
974
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
975
-
976
- pdf = pd.DataFrame({"a": [1, 2, 3], "Koalas": [0, 1, 2]}).set_index("Koalas", drop=False)
977
- psdf = ps.from_pandas(pdf)
978
-
979
- psdf.index.name = None
980
- psdf["NEW"] = ps.Series([100, 200, 300])
981
-
982
- pdf.index.name = None
983
- pdf["NEW"] = pd.Series([100, 200, 300])
984
-
985
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
986
-
987
- def test_assignment_frame(self):
988
- psdf = ps.from_pandas(self.pdf1)
989
- pdf = self.pdf1
990
- psser = psdf.a
991
- pser = pdf.a
992
- psdf[["a", "b"]] = self.psdf1
993
- pdf[["a", "b"]] = self.pdf1
994
-
995
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
996
- self.assert_eq(psser, pser)
997
-
998
- # 'c' does not exist in `psdf`.
999
- psdf = ps.from_pandas(self.pdf1)
1000
- pdf = self.pdf1
1001
- psser = psdf.a
1002
- pser = pdf.a
1003
- psdf[["b", "c"]] = self.psdf1
1004
- pdf[["b", "c"]] = self.pdf1
1005
-
1006
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1007
- self.assert_eq(psser, pser)
1008
-
1009
- # 'c' and 'd' do not exist in `psdf`.
1010
- psdf = ps.from_pandas(self.pdf1)
1011
- pdf = self.pdf1
1012
- psdf[["c", "d"]] = self.psdf1
1013
- pdf[["c", "d"]] = self.pdf1
1014
-
1015
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1016
-
1017
- # Multi-index columns
1018
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b")])
1019
- psdf = ps.from_pandas(self.pdf1)
1020
- pdf = self.pdf1
1021
- psdf.columns = columns
1022
- pdf.columns = columns
1023
- psdf[[("y", "c"), ("z", "d")]] = self.psdf1
1024
- pdf[[("y", "c"), ("z", "d")]] = self.pdf1
1025
-
1026
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1027
-
1028
- psdf = ps.from_pandas(self.pdf1)
1029
- pdf = self.pdf1
1030
- psdf1 = ps.from_pandas(self.pdf1)
1031
- pdf1 = self.pdf1
1032
- psdf1.columns = columns
1033
- pdf1.columns = columns
1034
- psdf[["c", "d"]] = psdf1
1035
- pdf[["c", "d"]] = pdf1
1036
-
1037
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1038
-
1039
- def test_assignment_series_chain(self):
1040
- psdf = ps.from_pandas(self.pdf1)
1041
- pdf = self.pdf1
1042
- psdf["a"] = self.psdf1.a
1043
- pdf["a"] = self.pdf1.a
1044
-
1045
- psdf["a"] = self.psdf2.b
1046
- pdf["a"] = self.pdf2.b
1047
-
1048
- psdf["d"] = self.psdf3.c
1049
- pdf["d"] = self.pdf3.c
1050
-
1051
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1052
-
1053
- def test_assignment_frame_chain(self):
1054
- psdf = ps.from_pandas(self.pdf1)
1055
- pdf = self.pdf1
1056
- psdf[["a", "b"]] = self.psdf1
1057
- pdf[["a", "b"]] = self.pdf1
1058
-
1059
- psdf[["e", "f"]] = self.psdf3
1060
- pdf[["e", "f"]] = self.pdf3
1061
-
1062
- psdf[["b", "c"]] = self.psdf2
1063
- pdf[["b", "c"]] = self.pdf2
1064
-
1065
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1066
-
1067
- def test_multi_index_arithmetic(self):
1068
- psdf5 = self.psdf5
1069
- psdf6 = self.psdf6
1070
- pdf5 = self.pdf5
1071
- pdf6 = self.pdf6
1072
-
1073
- # Series
1074
- self.assert_eq((psdf5.c - psdf6.e).sort_index(), (pdf5.c - pdf6.e).sort_index())
1075
-
1076
- self.assert_eq((psdf5["c"] / psdf6["e"]).sort_index(), (pdf5["c"] / pdf6["e"]).sort_index())
1077
-
1078
- # DataFrame
1079
- self.assert_eq((psdf5 + psdf6).sort_index(), (pdf5 + pdf6).sort_index(), almost=True)
1080
-
1081
- def test_multi_index_assignment_series(self):
1082
- psdf = ps.from_pandas(self.pdf5)
1083
- pdf = self.pdf5
1084
- psdf["x"] = self.psdf6.e
1085
- pdf["x"] = self.pdf6.e
1086
-
1087
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1088
-
1089
- psdf = ps.from_pandas(self.pdf5)
1090
- pdf = self.pdf5
1091
- psdf["e"] = self.psdf6.e
1092
- pdf["e"] = self.pdf6.e
1093
-
1094
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1095
-
1096
- psdf = ps.from_pandas(self.pdf5)
1097
- pdf = self.pdf5
1098
- psdf["c"] = self.psdf6.e
1099
- pdf["c"] = self.pdf6.e
1100
-
1101
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1102
-
1103
- def test_multi_index_assignment_frame(self):
1104
- psdf = ps.from_pandas(self.pdf5)
1105
- pdf = self.pdf5
1106
- psdf[["c"]] = self.psdf5
1107
- pdf[["c"]] = self.pdf5
1108
-
1109
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1110
-
1111
- psdf = ps.from_pandas(self.pdf5)
1112
- pdf = self.pdf5
1113
- psdf[["x"]] = self.psdf5
1114
- pdf[["x"]] = self.pdf5
1115
-
1116
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1117
-
1118
- psdf = ps.from_pandas(self.pdf6)
1119
- pdf = self.pdf6
1120
- psdf[["x", "y"]] = self.psdf6
1121
- pdf[["x", "y"]] = self.pdf6
1122
-
1123
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
1124
-
1125
-
1126
- class OpsOnDiffFramesDisabledTestsMixin:
1127
- @classmethod
1128
- def setUpClass(cls):
1129
- super().setUpClass()
1130
- set_option("compute.ops_on_diff_frames", False)
1131
-
1132
- @classmethod
1133
- def tearDownClass(cls):
1134
- reset_option("compute.ops_on_diff_frames")
1135
- super().tearDownClass()
1136
-
1137
- @property
1138
- def pdf1(self):
1139
- return pd.DataFrame(
1140
- {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
1141
- index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
1142
- )
1143
-
1144
- @property
1145
- def pdf2(self):
1146
- return pd.DataFrame(
1147
- {"a": [9, 8, 7, 6, 5, 4, 3, 2, 1], "b": [0, 0, 0, 4, 5, 6, 1, 2, 3]},
1148
- index=list(range(9)),
1149
- )
1150
-
1151
- @property
1152
- def psdf1(self):
1153
- return ps.from_pandas(self.pdf1)
1154
-
1155
- @property
1156
- def psdf2(self):
1157
- return ps.from_pandas(self.pdf2)
1158
-
1159
- def test_arithmetic(self):
1160
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1161
- self.psdf1.a - self.psdf2.b
1162
-
1163
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1164
- self.psdf1.a - self.psdf2.a
1165
-
1166
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1167
- self.psdf1["a"] - self.psdf2["a"]
1168
-
1169
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1170
- self.psdf1 - self.psdf2
1171
-
1172
- def test_assignment(self):
1173
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1174
- psdf = ps.from_pandas(self.pdf1)
1175
- psdf["c"] = self.psdf1.a
1176
-
1177
- def test_frame_loc_setitem(self):
1178
- pdf = pd.DataFrame(
1179
- [[1, 2], [4, 5], [7, 8]],
1180
- index=["cobra", "viper", "sidewinder"],
1181
- columns=["max_speed", "shield"],
1182
- )
1183
- psdf = ps.DataFrame(pdf)
1184
- another_psdf = ps.DataFrame(pdf)
1185
-
1186
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1187
- psdf.loc[["viper", "sidewinder"], ["shield"]] = another_psdf.max_speed
1188
-
1189
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1190
- psdf.loc[another_psdf.max_speed < 5, ["shield"]] = -psdf.max_speed
1191
-
1192
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1193
- psdf.loc[another_psdf.max_speed < 5, ["shield"]] = -another_psdf.max_speed
1194
-
1195
- def test_frame_iloc_setitem(self):
1196
- pdf = pd.DataFrame(
1197
- [[1, 2], [4, 5], [7, 8]],
1198
- index=["cobra", "viper", "sidewinder"],
1199
- columns=["max_speed", "shield"],
1200
- )
1201
- psdf = ps.DataFrame(pdf)
1202
- another_psdf = ps.DataFrame(pdf)
1203
-
1204
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1205
- psdf.iloc[[1, 2], [1]] = another_psdf.max_speed.iloc[[1, 2]]
1206
-
1207
- def test_series_loc_setitem(self):
1208
- pser = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
1209
- psser = ps.from_pandas(pser)
1210
-
1211
- pser_another = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
1212
- psser_another = ps.from_pandas(pser_another)
1213
-
1214
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1215
- psser.loc[psser % 2 == 1] = -psser_another
1216
-
1217
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1218
- psser.loc[psser_another % 2 == 1] = -psser
1219
-
1220
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1221
- psser.loc[psser_another % 2 == 1] = -psser_another
1222
-
1223
- def test_series_iloc_setitem(self):
1224
- pser = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
1225
- psser = ps.from_pandas(pser)
1226
-
1227
- pser_another = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
1228
- psser_another = ps.from_pandas(pser_another)
1229
-
1230
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1231
- psser.iloc[[1]] = -psser_another.iloc[[1]]
1232
-
1233
- def test_where(self):
1234
- pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
1235
- pdf2 = pd.DataFrame({"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]})
1236
- psdf1 = ps.from_pandas(pdf1)
1237
- psdf2 = ps.from_pandas(pdf2)
1238
-
1239
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1240
- psdf1.where(psdf2 > 100)
1241
-
1242
- pdf1 = pd.DataFrame({"A": [-1, -2, -3, -4, -5], "B": [-100, -200, -300, -400, -500]})
1243
- pdf2 = pd.DataFrame({"A": [-10, -20, -30, -40, -50], "B": [-5, -4, -3, -2, -1]})
1244
- psdf1 = ps.from_pandas(pdf1)
1245
- psdf2 = ps.from_pandas(pdf2)
1246
-
1247
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1248
- psdf1.where(psdf2 < -250)
1249
-
1250
- def test_mask(self):
1251
- pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
1252
- pdf2 = pd.DataFrame({"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]})
1253
- psdf1 = ps.from_pandas(pdf1)
1254
- psdf2 = ps.from_pandas(pdf2)
1255
-
1256
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1257
- psdf1.mask(psdf2 < 100)
1258
-
1259
- pdf1 = pd.DataFrame({"A": [-1, -2, -3, -4, -5], "B": [-100, -200, -300, -400, -500]})
1260
- pdf2 = pd.DataFrame({"A": [-10, -20, -30, -40, -50], "B": [-5, -4, -3, -2, -1]})
1261
- psdf1 = ps.from_pandas(pdf1)
1262
- psdf2 = ps.from_pandas(pdf2)
1263
-
1264
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1265
- psdf1.mask(psdf2 > -250)
1266
-
1267
- def test_align(self):
1268
- pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=[10, 20, 30])
1269
- pdf2 = pd.DataFrame({"a": [4, 5, 6], "c": ["d", "e", "f"]}, index=[10, 11, 12])
1270
- psdf1 = ps.from_pandas(pdf1)
1271
- psdf2 = ps.from_pandas(pdf2)
1272
-
1273
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1274
- psdf1.align(psdf2)
1275
-
1276
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1277
- psdf1.align(psdf2, axis=0)
1278
-
1279
- def test_pow_and_rpow(self):
1280
- pser = pd.Series([1, 2, np.nan])
1281
- psser = ps.from_pandas(pser)
1282
- pser_other = pd.Series([np.nan, 2, 3])
1283
- psser_other = ps.from_pandas(pser_other)
1284
-
1285
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1286
- psser.pow(psser_other)
1287
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1288
- psser**psser_other
1289
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1290
- psser.rpow(psser_other)
1291
-
1292
- def test_equals(self):
1293
- psidx1 = ps.Index([1, 2, 3, 4])
1294
- psidx2 = ps.Index([1, 2, 3, 4])
1295
-
1296
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1297
- psidx1.equals(psidx2)
1298
-
1299
- def test_combine_first(self):
1300
- pdf1 = pd.DataFrame({"A": [None, 0], "B": [4, None]})
1301
- psdf1 = ps.from_pandas(pdf1)
1302
-
1303
- self.assertRaises(TypeError, lambda: psdf1.combine_first(ps.Series([1, 2])))
1304
-
1305
- pser1 = pd.Series({"falcon": 330.0, "eagle": 160.0})
1306
- pser2 = pd.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0})
1307
- psser1 = ps.from_pandas(pser1)
1308
- psser2 = ps.from_pandas(pser2)
1309
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1310
- psser1.combine_first(psser2)
1311
-
1312
- pdf1 = pd.DataFrame({"A": [None, 0], "B": [4, None]})
1313
- psdf1 = ps.from_pandas(pdf1)
1314
- pdf2 = pd.DataFrame({"C": [3, 3], "B": [1, 1]})
1315
- psdf2 = ps.from_pandas(pdf2)
1316
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1317
- psdf1.combine_first(psdf2)
1318
-
1319
- def test_series_eq(self):
1320
- pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
1321
- psser = ps.from_pandas(pser)
1322
-
1323
- others = (
1324
- ps.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"),
1325
- ps.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"),
1326
- )
1327
- for other in others:
1328
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1329
- psser.eq(other)
1330
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
1331
- psser == other
1332
-
1333
-
1334
- class OpsOnDiffFramesEnabledTests(
1335
- OpsOnDiffFramesEnabledTestsMixin, PandasOnSparkTestCase, SQLTestUtils
1336
- ):
1337
- pass
1338
-
1339
-
1340
- class OpsOnDiffFramesDisabledTests(
1341
- OpsOnDiffFramesDisabledTestsMixin, PandasOnSparkTestCase, SQLTestUtils
1342
- ):
1343
- pass
1344
-
1345
-
1346
- if __name__ == "__main__":
1347
- from pyspark.pandas.tests.test_ops_on_diff_frames import * # noqa: F401
1348
-
1349
- try:
1350
- import xmlrunner
1351
-
1352
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
1353
- except ImportError:
1354
- testRunner = None
1355
- unittest.main(testRunner=testRunner, verbosity=2)