snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (476) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/function_defaults.py +207 -0
  7. snowflake/snowpark_connect/expression/literal.py +18 -2
  8. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  9. snowflake/snowpark_connect/expression/map_expression.py +10 -1
  10. snowflake/snowpark_connect/expression/map_extension.py +12 -2
  11. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  12. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  13. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  14. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  15. snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
  16. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  17. snowflake/snowpark_connect/hidden_column.py +39 -0
  18. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  19. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  20. snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
  21. snowflake/snowpark_connect/relation/map_extension.py +56 -15
  22. snowflake/snowpark_connect/relation/map_join.py +258 -62
  23. snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
  24. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  25. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  26. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  27. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  28. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  29. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  30. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  31. snowflake/snowpark_connect/relation/read/utils.py +6 -7
  32. snowflake/snowpark_connect/relation/utils.py +1 -170
  33. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  34. snowflake/snowpark_connect/resources_initializer.py +29 -1
  35. snowflake/snowpark_connect/server.py +18 -3
  36. snowflake/snowpark_connect/type_mapping.py +29 -25
  37. snowflake/snowpark_connect/typed_column.py +14 -0
  38. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  39. snowflake/snowpark_connect/utils/context.py +6 -1
  40. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  41. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  42. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  43. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  44. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  45. snowflake/snowpark_connect/version.py +1 -1
  46. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  47. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  48. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  52. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  53. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  93. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  94. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  95. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  96. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  97. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  98. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  100. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  101. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  102. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  103. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  104. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  105. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  362. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  439. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  440. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  444. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  445. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  466. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  467. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  468. snowpark_connect-0.23.0.dist-info/RECORD +0 -893
  469. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  470. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  471. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  472. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  473. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  474. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  475. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  476. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,979 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
-
18
- import unittest
19
- import inspect
20
- from distutils.version import LooseVersion
21
-
22
- import numpy as np
23
- import pandas as pd
24
-
25
- from pyspark import pandas as ps
26
- from pyspark.pandas.exceptions import PandasNotImplementedError, DataError
27
- from pyspark.pandas.missing.groupby import (
28
- MissingPandasLikeDataFrameGroupBy,
29
- MissingPandasLikeSeriesGroupBy,
30
- )
31
- from pyspark.pandas.groupby import is_multi_agg_with_relabel, SeriesGroupBy
32
- from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
33
-
34
-
35
- class GroupByTestsMixin:
36
- @property
37
- def pdf(self):
38
- return pd.DataFrame(
39
- {
40
- "A": [1, 2, 1, 2],
41
- "B": [3.1, 4.1, 4.1, 3.1],
42
- "C": ["a", "b", "b", "a"],
43
- "D": [True, False, False, True],
44
- }
45
- )
46
-
47
- @property
48
- def psdf(self):
49
- return ps.from_pandas(self.pdf)
50
-
51
- def test_groupby_simple(self):
52
- pdf = pd.DataFrame(
53
- {
54
- "a": [1, 2, 6, 4, 4, 6, 4, 3, 7],
55
- "b": [4, 2, 7, 3, 3, 1, 1, 1, 2],
56
- "c": [4, 2, 7, 3, None, 1, 1, 1, 2],
57
- "d": list("abcdefght"),
58
- "e": [True, False, True, False, True, False, True, False, True],
59
- },
60
- index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
61
- )
62
- if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
63
- # TODO(SPARK-43295): Make DataFrameGroupBy.sum support for string type columns
64
- pdf = pdf[["a", "b", "c", "e"]]
65
- psdf = ps.from_pandas(pdf)
66
-
67
- for as_index in [True, False]:
68
- if as_index:
69
-
70
- def sort(df):
71
- return df.sort_index()
72
-
73
- else:
74
-
75
- def sort(df):
76
- return df.sort_values("a").reset_index(drop=True)
77
-
78
- self.assert_eq(
79
- sort(psdf.groupby("a", as_index=as_index).sum()),
80
- sort(pdf.groupby("a", as_index=as_index).sum()),
81
- )
82
- self.assert_eq(
83
- sort(psdf.groupby("a", as_index=as_index).b.sum()),
84
- sort(pdf.groupby("a", as_index=as_index).b.sum()),
85
- )
86
- self.assert_eq(
87
- sort(psdf.groupby("a", as_index=as_index)["b"].sum()),
88
- sort(pdf.groupby("a", as_index=as_index)["b"].sum()),
89
- )
90
- self.assert_eq(
91
- sort(psdf.groupby("a", as_index=as_index)[["b", "c"]].sum()),
92
- sort(pdf.groupby("a", as_index=as_index)[["b", "c"]].sum()),
93
- )
94
- self.assert_eq(
95
- sort(psdf.groupby("a", as_index=as_index)[[]].sum()),
96
- sort(pdf.groupby("a", as_index=as_index)[[]].sum()),
97
- )
98
- self.assert_eq(
99
- sort(psdf.groupby("a", as_index=as_index)["c"].sum()),
100
- sort(pdf.groupby("a", as_index=as_index)["c"].sum()),
101
- )
102
-
103
- self.assert_eq(
104
- psdf.groupby("a").a.sum().sort_index(), pdf.groupby("a").a.sum().sort_index()
105
- )
106
- self.assert_eq(
107
- psdf.groupby("a")["a"].sum().sort_index(), pdf.groupby("a")["a"].sum().sort_index()
108
- )
109
- self.assert_eq(
110
- psdf.groupby("a")[["a"]].sum().sort_index(), pdf.groupby("a")[["a"]].sum().sort_index()
111
- )
112
- self.assert_eq(
113
- psdf.groupby("a")[["a", "c"]].sum().sort_index(),
114
- pdf.groupby("a")[["a", "c"]].sum().sort_index(),
115
- )
116
-
117
- self.assert_eq(
118
- psdf.a.groupby(psdf.b).sum().sort_index(), pdf.a.groupby(pdf.b).sum().sort_index()
119
- )
120
-
121
- for axis in [0, "index"]:
122
- self.assert_eq(
123
- psdf.groupby("a", axis=axis).a.sum().sort_index(),
124
- pdf.groupby("a", axis=axis).a.sum().sort_index(),
125
- )
126
- self.assert_eq(
127
- psdf.groupby("a", axis=axis)["a"].sum().sort_index(),
128
- pdf.groupby("a", axis=axis)["a"].sum().sort_index(),
129
- )
130
- self.assert_eq(
131
- psdf.groupby("a", axis=axis)[["a"]].sum().sort_index(),
132
- pdf.groupby("a", axis=axis)[["a"]].sum().sort_index(),
133
- )
134
- self.assert_eq(
135
- psdf.groupby("a", axis=axis)[["a", "c"]].sum().sort_index(),
136
- pdf.groupby("a", axis=axis)[["a", "c"]].sum().sort_index(),
137
- )
138
-
139
- self.assert_eq(
140
- psdf.a.groupby(psdf.b, axis=axis).sum().sort_index(),
141
- pdf.a.groupby(pdf.b, axis=axis).sum().sort_index(),
142
- )
143
-
144
- self.assertRaises(ValueError, lambda: psdf.groupby("a", as_index=False).a)
145
- self.assertRaises(ValueError, lambda: psdf.groupby("a", as_index=False)["a"])
146
- self.assertRaises(ValueError, lambda: psdf.groupby("a", as_index=False)[["a"]])
147
- self.assertRaises(ValueError, lambda: psdf.groupby("a", as_index=False)[["a", "c"]])
148
- self.assertRaises(KeyError, lambda: psdf.groupby("z", as_index=False)[["a", "c"]])
149
- self.assertRaises(KeyError, lambda: psdf.groupby(["z"], as_index=False)[["a", "c"]])
150
-
151
- self.assertRaises(TypeError, lambda: psdf.a.groupby(psdf.b, as_index=False))
152
-
153
- self.assertRaises(NotImplementedError, lambda: psdf.groupby("a", axis=1))
154
- self.assertRaises(NotImplementedError, lambda: psdf.groupby("a", axis="columns"))
155
- self.assertRaises(ValueError, lambda: psdf.groupby("a", "b"))
156
- self.assertRaises(TypeError, lambda: psdf.a.groupby(psdf.a, psdf.b))
157
-
158
- # we can't use column name/names as a parameter `by` for `SeriesGroupBy`.
159
- self.assertRaises(KeyError, lambda: psdf.a.groupby(by="a"))
160
- self.assertRaises(KeyError, lambda: psdf.a.groupby(by=["a", "b"]))
161
- self.assertRaises(KeyError, lambda: psdf.a.groupby(by=("a", "b")))
162
- self.assertRaises(KeyError, lambda: psdf.a.groupby(by=[("a", "b")]))
163
-
164
- # we can't use DataFrame as a parameter `by` for `DataFrameGroupBy`/`SeriesGroupBy`.
165
- self.assertRaises(ValueError, lambda: psdf.groupby(psdf))
166
- self.assertRaises(ValueError, lambda: psdf.a.groupby(psdf))
167
- self.assertRaises(ValueError, lambda: psdf.a.groupby((psdf,)))
168
-
169
- with self.assertRaisesRegex(ValueError, "Grouper for 'list' not 1-dimensional"):
170
- psdf.groupby(by=[["a", "b"]])
171
-
172
- # non-string names
173
- pdf = pd.DataFrame(
174
- {
175
- 10: [1, 2, 6, 4, 4, 6, 4, 3, 7],
176
- 20: [4, 2, 7, 3, 3, 1, 1, 1, 2],
177
- 30: [4, 2, 7, 3, None, 1, 1, 1, 2],
178
- 40: list("abcdefght"),
179
- },
180
- index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
181
- )
182
- psdf = ps.from_pandas(pdf)
183
- if LooseVersion(pd.__version__) >= LooseVersion("2.0.0"):
184
- # TODO(SPARK-43295): Make DataFrameGroupBy.sum support for string type columns
185
- pdf = pdf[[10, 20, 30]]
186
-
187
- for as_index in [True, False]:
188
- if as_index:
189
-
190
- def sort(df):
191
- return df.sort_index()
192
-
193
- else:
194
-
195
- def sort(df):
196
- return df.sort_values(10).reset_index(drop=True)
197
-
198
- self.assert_eq(
199
- sort(psdf.groupby(10, as_index=as_index).sum()),
200
- sort(pdf.groupby(10, as_index=as_index).sum()),
201
- )
202
- self.assert_eq(
203
- sort(psdf.groupby(10, as_index=as_index)[20].sum()),
204
- sort(pdf.groupby(10, as_index=as_index)[20].sum()),
205
- )
206
- self.assert_eq(
207
- sort(psdf.groupby(10, as_index=as_index)[[20, 30]].sum()),
208
- sort(pdf.groupby(10, as_index=as_index)[[20, 30]].sum()),
209
- )
210
-
211
- def test_nsmallest(self):
212
- pdf = pd.DataFrame(
213
- {
214
- "a": [1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
215
- "b": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
216
- "c": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
217
- "d": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
218
- },
219
- index=np.random.rand(9 * 3),
220
- )
221
- psdf = ps.from_pandas(pdf)
222
-
223
- self.assert_eq(
224
- psdf.groupby(["a"])["b"].nsmallest(1).sort_values(),
225
- pdf.groupby(["a"])["b"].nsmallest(1).sort_values(),
226
- )
227
- self.assert_eq(
228
- psdf.groupby(["a"])["b"].nsmallest(2).sort_index(),
229
- pdf.groupby(["a"])["b"].nsmallest(2).sort_index(),
230
- )
231
- self.assert_eq(
232
- (psdf.b * 10).groupby(psdf.a).nsmallest(2).sort_index(),
233
- (pdf.b * 10).groupby(pdf.a).nsmallest(2).sort_index(),
234
- )
235
- self.assert_eq(
236
- psdf.b.rename().groupby(psdf.a).nsmallest(2).sort_index(),
237
- pdf.b.rename().groupby(pdf.a).nsmallest(2).sort_index(),
238
- )
239
- self.assert_eq(
240
- psdf.b.groupby(psdf.a.rename()).nsmallest(2).sort_index(),
241
- pdf.b.groupby(pdf.a.rename()).nsmallest(2).sort_index(),
242
- )
243
- self.assert_eq(
244
- psdf.b.rename().groupby(psdf.a.rename()).nsmallest(2).sort_index(),
245
- pdf.b.rename().groupby(pdf.a.rename()).nsmallest(2).sort_index(),
246
- )
247
- with self.assertRaisesRegex(ValueError, "nsmallest do not support multi-index now"):
248
- psdf.set_index(["a", "b"]).groupby(["c"])["d"].nsmallest(1)
249
-
250
- def test_nlargest(self):
251
- pdf = pd.DataFrame(
252
- {
253
- "a": [1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
254
- "b": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
255
- "c": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
256
- "d": [1, 2, 2, 2, 3, 3, 3, 4, 4] * 3,
257
- },
258
- index=np.random.rand(9 * 3),
259
- )
260
- psdf = ps.from_pandas(pdf)
261
-
262
- self.assert_eq(
263
- psdf.groupby(["a"])["b"].nlargest(1).sort_values(),
264
- pdf.groupby(["a"])["b"].nlargest(1).sort_values(),
265
- )
266
- self.assert_eq(
267
- psdf.groupby(["a"])["b"].nlargest(2).sort_index(),
268
- pdf.groupby(["a"])["b"].nlargest(2).sort_index(),
269
- )
270
- self.assert_eq(
271
- (psdf.b * 10).groupby(psdf.a).nlargest(2).sort_index(),
272
- (pdf.b * 10).groupby(pdf.a).nlargest(2).sort_index(),
273
- )
274
- self.assert_eq(
275
- psdf.b.rename().groupby(psdf.a).nlargest(2).sort_index(),
276
- pdf.b.rename().groupby(pdf.a).nlargest(2).sort_index(),
277
- )
278
- self.assert_eq(
279
- psdf.b.groupby(psdf.a.rename()).nlargest(2).sort_index(),
280
- pdf.b.groupby(pdf.a.rename()).nlargest(2).sort_index(),
281
- )
282
- self.assert_eq(
283
- psdf.b.rename().groupby(psdf.a.rename()).nlargest(2).sort_index(),
284
- pdf.b.rename().groupby(pdf.a.rename()).nlargest(2).sort_index(),
285
- )
286
- with self.assertRaisesRegex(ValueError, "nlargest do not support multi-index now"):
287
- psdf.set_index(["a", "b"]).groupby(["c"])["d"].nlargest(1)
288
-
289
- def test_shift(self):
290
- pdf = pd.DataFrame(
291
- {
292
- "a": [1, 1, 2, 2, 3, 3] * 3,
293
- "b": [1, 1, 2, 2, 3, 4] * 3,
294
- "c": [1, 4, 9, 16, 25, 36] * 3,
295
- },
296
- index=np.random.rand(6 * 3),
297
- )
298
- psdf = ps.from_pandas(pdf)
299
-
300
- self.assert_eq(
301
- psdf.groupby("a").shift().sort_index(), pdf.groupby("a").shift().sort_index()
302
- )
303
- # TODO: seems like a pandas' bug when fill_value is not None?
304
- # self.assert_eq(psdf.groupby(['a', 'b']).shift(periods=-1, fill_value=0).sort_index(),
305
- # pdf.groupby(['a', 'b']).shift(periods=-1, fill_value=0).sort_index())
306
- self.assert_eq(
307
- psdf.groupby(["b"])["a"].shift().sort_index(),
308
- pdf.groupby(["b"])["a"].shift().sort_index(),
309
- )
310
- self.assert_eq(
311
- psdf.groupby(["a", "b"])["c"].shift().sort_index(),
312
- pdf.groupby(["a", "b"])["c"].shift().sort_index(),
313
- )
314
- self.assert_eq(
315
- psdf.groupby(psdf.b // 5).shift().sort_index(),
316
- pdf.groupby(pdf.b // 5).shift().sort_index(),
317
- )
318
- self.assert_eq(
319
- psdf.groupby(psdf.b // 5)["a"].shift().sort_index(),
320
- pdf.groupby(pdf.b // 5)["a"].shift().sort_index(),
321
- )
322
- self.assert_eq(
323
- psdf.a.rename().groupby(psdf.b).shift().sort_index(),
324
- pdf.a.rename().groupby(pdf.b).shift().sort_index(),
325
- )
326
- self.assert_eq(
327
- psdf.a.groupby(psdf.b.rename()).shift().sort_index(),
328
- pdf.a.groupby(pdf.b.rename()).shift().sort_index(),
329
- )
330
- self.assert_eq(
331
- psdf.a.rename().groupby(psdf.b.rename()).shift().sort_index(),
332
- pdf.a.rename().groupby(pdf.b.rename()).shift().sort_index(),
333
- )
334
-
335
- self.assert_eq(psdf.groupby("a").shift().sum(), pdf.groupby("a").shift().sum().astype(int))
336
- self.assert_eq(
337
- psdf.a.rename().groupby(psdf.b).shift().sum(),
338
- pdf.a.rename().groupby(pdf.b).shift().sum(),
339
- )
340
-
341
- # multi-index columns
342
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
343
- pdf.columns = columns
344
- psdf.columns = columns
345
-
346
- self.assert_eq(
347
- psdf.groupby(("x", "a")).shift().sort_index(),
348
- pdf.groupby(("x", "a")).shift().sort_index(),
349
- )
350
- # TODO: seems like a pandas' bug when fill_value is not None?
351
- # self.assert_eq(psdf.groupby([('x', 'a'), ('x', 'b')]).shift(periods=-1,
352
- # fill_value=0).sort_index(),
353
- # pdf.groupby([('x', 'a'), ('x', 'b')]).shift(periods=-1,
354
- # fill_value=0).sort_index())
355
-
356
- def test_missing(self):
357
- psdf = ps.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
358
-
359
- # DataFrameGroupBy functions
360
- missing_functions = inspect.getmembers(
361
- MissingPandasLikeDataFrameGroupBy, inspect.isfunction
362
- )
363
- unsupported_functions = [
364
- name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
365
- ]
366
- for name in unsupported_functions:
367
- with self.assertRaisesRegex(
368
- PandasNotImplementedError,
369
- "method.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
370
- ):
371
- getattr(psdf.groupby("a"), name)()
372
-
373
- deprecated_functions = [
374
- name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
375
- ]
376
- for name in deprecated_functions:
377
- with self.assertRaisesRegex(
378
- PandasNotImplementedError, "method.*GroupBy.*{}.*is deprecated".format(name)
379
- ):
380
- getattr(psdf.groupby("a"), name)()
381
-
382
- # SeriesGroupBy functions
383
- missing_functions = inspect.getmembers(MissingPandasLikeSeriesGroupBy, inspect.isfunction)
384
- unsupported_functions = [
385
- name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
386
- ]
387
- for name in unsupported_functions:
388
- with self.assertRaisesRegex(
389
- PandasNotImplementedError,
390
- "method.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
391
- ):
392
- getattr(psdf.a.groupby(psdf.a), name)()
393
-
394
- deprecated_functions = [
395
- name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
396
- ]
397
- for name in deprecated_functions:
398
- with self.assertRaisesRegex(
399
- PandasNotImplementedError, "method.*GroupBy.*{}.*is deprecated".format(name)
400
- ):
401
- getattr(psdf.a.groupby(psdf.a), name)()
402
-
403
- # DataFrameGroupBy properties
404
- missing_properties = inspect.getmembers(
405
- MissingPandasLikeDataFrameGroupBy, lambda o: isinstance(o, property)
406
- )
407
- unsupported_properties = [
408
- name
409
- for (name, type_) in missing_properties
410
- if type_.fget.__name__ == "unsupported_property"
411
- ]
412
- for name in unsupported_properties:
413
- with self.assertRaisesRegex(
414
- PandasNotImplementedError,
415
- "property.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
416
- ):
417
- getattr(psdf.groupby("a"), name)
418
- deprecated_properties = [
419
- name
420
- for (name, type_) in missing_properties
421
- if type_.fget.__name__ == "deprecated_property"
422
- ]
423
- for name in deprecated_properties:
424
- with self.assertRaisesRegex(
425
- PandasNotImplementedError, "property.*GroupBy.*{}.*is deprecated".format(name)
426
- ):
427
- getattr(psdf.groupby("a"), name)
428
-
429
- # SeriesGroupBy properties
430
- missing_properties = inspect.getmembers(
431
- MissingPandasLikeSeriesGroupBy, lambda o: isinstance(o, property)
432
- )
433
- unsupported_properties = [
434
- name
435
- for (name, type_) in missing_properties
436
- if type_.fget.__name__ == "unsupported_property"
437
- ]
438
- for name in unsupported_properties:
439
- with self.assertRaisesRegex(
440
- PandasNotImplementedError,
441
- "property.*GroupBy.*{}.*not implemented( yet\\.|\\. .+)".format(name),
442
- ):
443
- getattr(psdf.a.groupby(psdf.a), name)
444
- deprecated_properties = [
445
- name
446
- for (name, type_) in missing_properties
447
- if type_.fget.__name__ == "deprecated_property"
448
- ]
449
- for name in deprecated_properties:
450
- with self.assertRaisesRegex(
451
- PandasNotImplementedError, "property.*GroupBy.*{}.*is deprecated".format(name)
452
- ):
453
- getattr(psdf.a.groupby(psdf.a), name)
454
-
455
- @staticmethod
456
- def test_is_multi_agg_with_relabel():
457
-
458
- assert is_multi_agg_with_relabel(a="max") is False
459
- assert is_multi_agg_with_relabel(a_min=("a", "max"), a_max=("a", "min")) is True
460
-
461
- def test_get_group(self):
462
- pdf = pd.DataFrame(
463
- [
464
- ("falcon", "bird", 389.0),
465
- ("parrot", "bird", 24.0),
466
- ("lion", "mammal", 80.5),
467
- ("monkey", "mammal", np.nan),
468
- ],
469
- columns=["name", "class", "max_speed"],
470
- index=[0, 2, 3, 1],
471
- )
472
- pdf.columns.name = "Koalas"
473
- psdf = ps.from_pandas(pdf)
474
-
475
- self.assert_eq(
476
- psdf.groupby("class").get_group("bird"),
477
- pdf.groupby("class").get_group("bird"),
478
- )
479
- self.assert_eq(
480
- psdf.groupby("class")["name"].get_group("mammal"),
481
- pdf.groupby("class")["name"].get_group("mammal"),
482
- )
483
- self.assert_eq(
484
- psdf.groupby("class")[["name"]].get_group("mammal"),
485
- pdf.groupby("class")[["name"]].get_group("mammal"),
486
- )
487
- self.assert_eq(
488
- psdf.groupby(["class", "name"]).get_group(("mammal", "lion")),
489
- pdf.groupby(["class", "name"]).get_group(("mammal", "lion")),
490
- )
491
- self.assert_eq(
492
- psdf.groupby(["class", "name"])["max_speed"].get_group(("mammal", "lion")),
493
- pdf.groupby(["class", "name"])["max_speed"].get_group(("mammal", "lion")),
494
- )
495
- self.assert_eq(
496
- psdf.groupby(["class", "name"])[["max_speed"]].get_group(("mammal", "lion")),
497
- pdf.groupby(["class", "name"])[["max_speed"]].get_group(("mammal", "lion")),
498
- )
499
- self.assert_eq(
500
- (psdf.max_speed + 1).groupby(psdf["class"]).get_group("mammal"),
501
- (pdf.max_speed + 1).groupby(pdf["class"]).get_group("mammal"),
502
- )
503
- self.assert_eq(
504
- psdf.groupby("max_speed").get_group(80.5),
505
- pdf.groupby("max_speed").get_group(80.5),
506
- )
507
-
508
- self.assertRaises(KeyError, lambda: psdf.groupby("class").get_group("fish"))
509
- self.assertRaises(TypeError, lambda: psdf.groupby("class").get_group(["bird", "mammal"]))
510
- self.assertRaises(KeyError, lambda: psdf.groupby("class")["name"].get_group("fish"))
511
- self.assertRaises(
512
- TypeError, lambda: psdf.groupby("class")["name"].get_group(["bird", "mammal"])
513
- )
514
- self.assertRaises(
515
- KeyError, lambda: psdf.groupby(["class", "name"]).get_group(("lion", "mammal"))
516
- )
517
- self.assertRaises(ValueError, lambda: psdf.groupby(["class", "name"]).get_group(("lion",)))
518
- self.assertRaises(
519
- ValueError, lambda: psdf.groupby(["class", "name"]).get_group(("mammal",))
520
- )
521
- self.assertRaises(ValueError, lambda: psdf.groupby(["class", "name"]).get_group("mammal"))
522
-
523
- # MultiIndex columns
524
- pdf.columns = pd.MultiIndex.from_tuples([("A", "name"), ("B", "class"), ("C", "max_speed")])
525
- pdf.columns.names = ["Hello", "Koalas"]
526
- psdf = ps.from_pandas(pdf)
527
- self.assert_eq(
528
- psdf.groupby(("B", "class")).get_group("bird"),
529
- pdf.groupby(("B", "class")).get_group("bird"),
530
- )
531
- self.assert_eq(
532
- psdf.groupby(("B", "class"))[[("A", "name")]].get_group("mammal"),
533
- pdf.groupby(("B", "class"))[[("A", "name")]].get_group("mammal"),
534
- )
535
- self.assert_eq(
536
- psdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal", "lion")),
537
- pdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal", "lion")),
538
- )
539
- self.assert_eq(
540
- psdf.groupby([("B", "class"), ("A", "name")])[[("C", "max_speed")]].get_group(
541
- ("mammal", "lion")
542
- ),
543
- pdf.groupby([("B", "class"), ("A", "name")])[[("C", "max_speed")]].get_group(
544
- ("mammal", "lion")
545
- ),
546
- )
547
- self.assert_eq(
548
- (psdf[("C", "max_speed")] + 1).groupby(psdf[("B", "class")]).get_group("mammal"),
549
- (pdf[("C", "max_speed")] + 1).groupby(pdf[("B", "class")]).get_group("mammal"),
550
- )
551
- self.assert_eq(
552
- psdf.groupby(("C", "max_speed")).get_group(80.5),
553
- pdf.groupby(("C", "max_speed")).get_group(80.5),
554
- )
555
-
556
- self.assertRaises(KeyError, lambda: psdf.groupby(("B", "class")).get_group("fish"))
557
- self.assertRaises(
558
- TypeError, lambda: psdf.groupby(("B", "class")).get_group(["bird", "mammal"])
559
- )
560
- self.assertRaises(
561
- KeyError, lambda: psdf.groupby(("B", "class"))[("A", "name")].get_group("fish")
562
- )
563
- self.assertRaises(
564
- KeyError,
565
- lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("lion", "mammal")),
566
- )
567
- self.assertRaises(
568
- ValueError,
569
- lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("lion",)),
570
- )
571
- self.assertRaises(
572
- ValueError, lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group(("mammal",))
573
- )
574
- self.assertRaises(
575
- ValueError, lambda: psdf.groupby([("B", "class"), ("A", "name")]).get_group("mammal")
576
- )
577
-
578
- def test_getitem(self):
579
- psdf = ps.DataFrame(
580
- {
581
- "a": [1, 1, 1, 1, 2, 2, 2, 3, 3, 3] * 3,
582
- "b": [2, 3, 1, 4, 6, 9, 8, 10, 7, 5] * 3,
583
- "c": [3, 5, 2, 5, 1, 2, 6, 4, 3, 6] * 3,
584
- },
585
- index=np.random.rand(10 * 3),
586
- )
587
-
588
- self.assertTrue(isinstance(psdf.groupby("a")["b"], SeriesGroupBy))
589
-
590
- def test_all_any(self):
591
- pdf = pd.DataFrame(
592
- {
593
- "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
594
- "B": [True, True, True, False, False, False, None, True, None, False],
595
- }
596
- )
597
- psdf = ps.from_pandas(pdf)
598
-
599
- for as_index in [True, False]:
600
- if as_index:
601
-
602
- def sort(df):
603
- return df.sort_index()
604
-
605
- else:
606
-
607
- def sort(df):
608
- return df.sort_values("A").reset_index(drop=True)
609
-
610
- self.assert_eq(
611
- sort(psdf.groupby("A", as_index=as_index).all()),
612
- sort(pdf.groupby("A", as_index=as_index).all()),
613
- )
614
- self.assert_eq(
615
- sort(psdf.groupby("A", as_index=as_index).any()),
616
- sort(pdf.groupby("A", as_index=as_index).any()),
617
- )
618
-
619
- self.assert_eq(
620
- sort(psdf.groupby("A", as_index=as_index).all()).B,
621
- sort(pdf.groupby("A", as_index=as_index).all()).B,
622
- )
623
- self.assert_eq(
624
- sort(psdf.groupby("A", as_index=as_index).any()).B,
625
- sort(pdf.groupby("A", as_index=as_index).any()).B,
626
- )
627
-
628
- self.assert_eq(
629
- psdf.B.groupby(psdf.A).all().sort_index(), pdf.B.groupby(pdf.A).all().sort_index()
630
- )
631
- self.assert_eq(
632
- psdf.B.groupby(psdf.A).any().sort_index(), pdf.B.groupby(pdf.A).any().sort_index()
633
- )
634
-
635
- # multi-index columns
636
- columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
637
- pdf.columns = columns
638
- psdf.columns = columns
639
-
640
- for as_index in [True, False]:
641
- if as_index:
642
-
643
- def sort(df):
644
- return df.sort_index()
645
-
646
- else:
647
-
648
- def sort(df):
649
- return df.sort_values(("X", "A")).reset_index(drop=True)
650
-
651
- self.assert_eq(
652
- sort(psdf.groupby(("X", "A"), as_index=as_index).all()),
653
- sort(pdf.groupby(("X", "A"), as_index=as_index).all()),
654
- )
655
- self.assert_eq(
656
- sort(psdf.groupby(("X", "A"), as_index=as_index).any()),
657
- sort(pdf.groupby(("X", "A"), as_index=as_index).any()),
658
- )
659
-
660
- # Test skipna
661
- pdf = pd.DataFrame({"A": [True, True], "B": [1, np.nan], "C": [True, None]})
662
- pdf.name = "x"
663
- psdf = ps.from_pandas(pdf)
664
- self.assert_eq(
665
- psdf.groupby("A").all(skipna=False).sort_index(),
666
- pdf.groupby("A").all(skipna=False).sort_index(),
667
- )
668
- self.assert_eq(
669
- psdf.groupby("A").all(skipna=True).sort_index(),
670
- pdf.groupby("A").all(skipna=True).sort_index(),
671
- )
672
-
673
- def test_raises(self):
674
- psdf = ps.DataFrame(
675
- {"a": [1, 2, 6, 4, 4, 6, 4, 3, 7], "b": [4, 2, 7, 3, 3, 1, 1, 1, 2]},
676
- index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
677
- )
678
- # test raises with incorrect key
679
- self.assertRaises(ValueError, lambda: psdf.groupby([]))
680
- self.assertRaises(KeyError, lambda: psdf.groupby("x"))
681
- self.assertRaises(KeyError, lambda: psdf.groupby(["a", "x"]))
682
- self.assertRaises(KeyError, lambda: psdf.groupby("a")["x"])
683
- self.assertRaises(KeyError, lambda: psdf.groupby("a")["b", "x"])
684
- self.assertRaises(KeyError, lambda: psdf.groupby("a")[["b", "x"]])
685
-
686
- def test_nunique(self):
687
- pdf = pd.DataFrame(
688
- {"a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "b": [2, 2, 2, 3, 3, 4, 4, 5, 5, 5]}
689
- )
690
- psdf = ps.from_pandas(pdf)
691
- self.assert_eq(
692
- psdf.groupby("a").agg({"b": "nunique"}).sort_index(),
693
- pdf.groupby("a").agg({"b": "nunique"}).sort_index(),
694
- )
695
- if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
696
- expected = ps.DataFrame({"b": [2, 2]}, index=pd.Index([0, 1], name="a"))
697
- self.assert_eq(psdf.groupby("a").nunique().sort_index(), expected)
698
- self.assert_eq(
699
- psdf.groupby("a").nunique(dropna=False).sort_index(),
700
- expected,
701
- )
702
- else:
703
- self.assert_eq(
704
- psdf.groupby("a").nunique().sort_index(), pdf.groupby("a").nunique().sort_index()
705
- )
706
- self.assert_eq(
707
- psdf.groupby("a").nunique(dropna=False).sort_index(),
708
- pdf.groupby("a").nunique(dropna=False).sort_index(),
709
- )
710
- self.assert_eq(
711
- psdf.groupby("a")["b"].nunique().sort_index(),
712
- pdf.groupby("a")["b"].nunique().sort_index(),
713
- )
714
- self.assert_eq(
715
- psdf.groupby("a")["b"].nunique(dropna=False).sort_index(),
716
- pdf.groupby("a")["b"].nunique(dropna=False).sort_index(),
717
- )
718
-
719
- nunique_psdf = psdf.groupby("a", as_index=False).agg({"b": "nunique"})
720
- nunique_pdf = pdf.groupby("a", as_index=False).agg({"b": "nunique"})
721
- self.assert_eq(
722
- nunique_psdf.sort_values(["a", "b"]).reset_index(drop=True),
723
- nunique_pdf.sort_values(["a", "b"]).reset_index(drop=True),
724
- )
725
-
726
- # multi-index columns
727
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
728
- pdf.columns = columns
729
- psdf.columns = columns
730
-
731
- if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
732
- expected = ps.DataFrame({("y", "b"): [2, 2]}, index=pd.Index([0, 1], name=("x", "a")))
733
- self.assert_eq(
734
- psdf.groupby(("x", "a")).nunique().sort_index(),
735
- expected,
736
- )
737
- self.assert_eq(
738
- psdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
739
- expected,
740
- )
741
- else:
742
- self.assert_eq(
743
- psdf.groupby(("x", "a")).nunique().sort_index(),
744
- pdf.groupby(("x", "a")).nunique().sort_index(),
745
- )
746
- self.assert_eq(
747
- psdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
748
- pdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
749
- )
750
-
751
- def test_unique(self):
752
- for pdf in [
753
- pd.DataFrame(
754
- {"a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "b": [2, 2, 2, 3, 3, 4, 4, 5, 5, 5]}
755
- ),
756
- pd.DataFrame(
757
- {
758
- "a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
759
- "b": ["w", "w", "w", "x", "x", "y", "y", "z", "z", "z"],
760
- }
761
- ),
762
- ]:
763
- with self.subTest(pdf=pdf):
764
- psdf = ps.from_pandas(pdf)
765
-
766
- actual = psdf.groupby("a")["b"].unique().sort_index()._to_pandas()
767
- expect = pdf.groupby("a")["b"].unique().sort_index()
768
- self.assert_eq(len(actual), len(expect))
769
- for act, exp in zip(actual, expect):
770
- self.assertTrue(sorted(act) == sorted(exp))
771
-
772
- @unittest.skipIf(
773
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
774
- "TODO(SPARK-43444): Enable GroupBySlowTests.test_value_counts for pandas 2.0.0.",
775
- )
776
- def test_value_counts(self):
777
- pdf = pd.DataFrame(
778
- {"A": [np.nan, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, np.nan]}, columns=["A", "B"]
779
- )
780
- psdf = ps.from_pandas(pdf)
781
- self.assert_eq(
782
- psdf.groupby("A")["B"].value_counts().sort_index(),
783
- pdf.groupby("A")["B"].value_counts().sort_index(),
784
- )
785
- self.assert_eq(
786
- psdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
787
- pdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
788
- )
789
- self.assert_eq(
790
- psdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
791
- pdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
792
- # Returns are the same considering values and types,
793
- # disable check_exact to pass the assert_eq
794
- check_exact=False,
795
- )
796
- self.assert_eq(
797
- psdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
798
- pdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
799
- )
800
- self.assert_eq(
801
- psdf.groupby("A")["B"]
802
- .value_counts(sort=True, ascending=False, dropna=False)
803
- .sort_index(),
804
- pdf.groupby("A")["B"]
805
- .value_counts(sort=True, ascending=False, dropna=False)
806
- .sort_index(),
807
- )
808
- self.assert_eq(
809
- psdf.groupby("A")["B"]
810
- .value_counts(sort=True, ascending=True, dropna=False)
811
- .sort_index(),
812
- pdf.groupby("A")["B"]
813
- .value_counts(sort=True, ascending=True, dropna=False)
814
- .sort_index(),
815
- )
816
- self.assert_eq(
817
- psdf.B.rename().groupby(psdf.A).value_counts().sort_index(),
818
- pdf.B.rename().groupby(pdf.A).value_counts().sort_index(),
819
- )
820
- self.assert_eq(
821
- psdf.B.rename().groupby(psdf.A, dropna=False).value_counts().sort_index(),
822
- pdf.B.rename().groupby(pdf.A, dropna=False).value_counts().sort_index(),
823
- # Returns are the same considering values and types,
824
- # disable check_exact to pass the assert_eq
825
- check_exact=False,
826
- )
827
- self.assert_eq(
828
- psdf.B.groupby(psdf.A.rename()).value_counts().sort_index(),
829
- pdf.B.groupby(pdf.A.rename()).value_counts().sort_index(),
830
- )
831
- self.assert_eq(
832
- psdf.B.rename().groupby(psdf.A.rename()).value_counts().sort_index(),
833
- pdf.B.rename().groupby(pdf.A.rename()).value_counts().sort_index(),
834
- )
835
-
836
- def test_size(self):
837
- pdf = pd.DataFrame({"A": [1, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, 3]})
838
- psdf = ps.from_pandas(pdf)
839
- self.assert_eq(psdf.groupby("A").size().sort_index(), pdf.groupby("A").size().sort_index())
840
- self.assert_eq(
841
- psdf.groupby("A")["B"].size().sort_index(), pdf.groupby("A")["B"].size().sort_index()
842
- )
843
- self.assert_eq(
844
- psdf.groupby("A")[["B"]].size().sort_index(),
845
- pdf.groupby("A")[["B"]].size().sort_index(),
846
- )
847
- self.assert_eq(
848
- psdf.groupby(["A", "B"]).size().sort_index(),
849
- pdf.groupby(["A", "B"]).size().sort_index(),
850
- )
851
-
852
- # multi-index columns
853
- columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
854
- pdf.columns = columns
855
- psdf.columns = columns
856
-
857
- self.assert_eq(
858
- psdf.groupby(("X", "A")).size().sort_index(),
859
- pdf.groupby(("X", "A")).size().sort_index(),
860
- )
861
- self.assert_eq(
862
- psdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
863
- pdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
864
- )
865
-
866
- def test_diff(self):
867
- pdf = pd.DataFrame(
868
- {
869
- "a": [1, 2, 3, 4, 5, 6] * 3,
870
- "b": [1, 1, 2, 3, 5, 8] * 3,
871
- "c": [1, 4, 9, 16, 25, 36] * 3,
872
- }
873
- )
874
- psdf = ps.from_pandas(pdf)
875
-
876
- self.assert_eq(psdf.groupby("b").diff().sort_index(), pdf.groupby("b").diff().sort_index())
877
- self.assert_eq(
878
- psdf.groupby(["a", "b"]).diff().sort_index(),
879
- pdf.groupby(["a", "b"]).diff().sort_index(),
880
- )
881
- self.assert_eq(
882
- psdf.groupby(["b"])["a"].diff().sort_index(),
883
- pdf.groupby(["b"])["a"].diff().sort_index(),
884
- )
885
- self.assert_eq(
886
- psdf.groupby(["b"])[["a", "b"]].diff().sort_index(),
887
- pdf.groupby(["b"])[["a", "b"]].diff().sort_index(),
888
- )
889
- self.assert_eq(
890
- psdf.groupby(psdf.b // 5).diff().sort_index(),
891
- pdf.groupby(pdf.b // 5).diff().sort_index(),
892
- )
893
- self.assert_eq(
894
- psdf.groupby(psdf.b // 5)["a"].diff().sort_index(),
895
- pdf.groupby(pdf.b // 5)["a"].diff().sort_index(),
896
- )
897
-
898
- self.assert_eq(psdf.groupby("b").diff().sum(), pdf.groupby("b").diff().sum().astype(int))
899
- self.assert_eq(psdf.groupby(["b"])["a"].diff().sum(), pdf.groupby(["b"])["a"].diff().sum())
900
-
901
- # multi-index columns
902
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
903
- pdf.columns = columns
904
- psdf.columns = columns
905
-
906
- self.assert_eq(
907
- psdf.groupby(("x", "b")).diff().sort_index(),
908
- pdf.groupby(("x", "b")).diff().sort_index(),
909
- )
910
- self.assert_eq(
911
- psdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
912
- pdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
913
- )
914
-
915
- def test_rank(self):
916
- pdf = pd.DataFrame(
917
- {
918
- "a": [1, 2, 3, 4, 5, 6] * 3,
919
- "b": [1, 1, 2, 3, 5, 8] * 3,
920
- "c": [1, 4, 9, 16, 25, 36] * 3,
921
- },
922
- index=np.random.rand(6 * 3),
923
- )
924
- psdf = ps.from_pandas(pdf)
925
-
926
- self.assert_eq(psdf.groupby("b").rank().sort_index(), pdf.groupby("b").rank().sort_index())
927
- self.assert_eq(
928
- psdf.groupby(["a", "b"]).rank().sort_index(),
929
- pdf.groupby(["a", "b"]).rank().sort_index(),
930
- )
931
- self.assert_eq(
932
- psdf.groupby(["b"])["a"].rank().sort_index(),
933
- pdf.groupby(["b"])["a"].rank().sort_index(),
934
- )
935
- self.assert_eq(
936
- psdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
937
- pdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
938
- )
939
- self.assert_eq(
940
- psdf.groupby(psdf.b // 5).rank().sort_index(),
941
- pdf.groupby(pdf.b // 5).rank().sort_index(),
942
- )
943
- self.assert_eq(
944
- psdf.groupby(psdf.b // 5)["a"].rank().sort_index(),
945
- pdf.groupby(pdf.b // 5)["a"].rank().sort_index(),
946
- )
947
-
948
- self.assert_eq(psdf.groupby("b").rank().sum(), pdf.groupby("b").rank().sum())
949
- self.assert_eq(psdf.groupby(["b"])["a"].rank().sum(), pdf.groupby(["b"])["a"].rank().sum())
950
-
951
- # multi-index columns
952
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
953
- pdf.columns = columns
954
- psdf.columns = columns
955
-
956
- self.assert_eq(
957
- psdf.groupby(("x", "b")).rank().sort_index(),
958
- pdf.groupby(("x", "b")).rank().sort_index(),
959
- )
960
- self.assert_eq(
961
- psdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
962
- pdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
963
- )
964
-
965
-
966
- class GroupByTests(GroupByTestsMixin, PandasOnSparkTestCase, TestUtils):
967
- pass
968
-
969
-
970
- if __name__ == "__main__":
971
- from pyspark.pandas.tests.groupby.test_groupby import * # noqa: F401
972
-
973
- try:
974
- import xmlrunner
975
-
976
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
977
- except ImportError:
978
- testRunner = None
979
- unittest.main(testRunner=testRunner, verbosity=2)