snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (476) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/function_defaults.py +207 -0
  7. snowflake/snowpark_connect/expression/literal.py +18 -2
  8. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  9. snowflake/snowpark_connect/expression/map_expression.py +10 -1
  10. snowflake/snowpark_connect/expression/map_extension.py +12 -2
  11. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  12. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  13. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  14. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  15. snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
  16. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  17. snowflake/snowpark_connect/hidden_column.py +39 -0
  18. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  19. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  20. snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
  21. snowflake/snowpark_connect/relation/map_extension.py +56 -15
  22. snowflake/snowpark_connect/relation/map_join.py +258 -62
  23. snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
  24. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  25. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  26. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  27. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  28. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  29. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  30. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  31. snowflake/snowpark_connect/relation/read/utils.py +6 -7
  32. snowflake/snowpark_connect/relation/utils.py +1 -170
  33. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  34. snowflake/snowpark_connect/resources_initializer.py +29 -1
  35. snowflake/snowpark_connect/server.py +18 -3
  36. snowflake/snowpark_connect/type_mapping.py +29 -25
  37. snowflake/snowpark_connect/typed_column.py +14 -0
  38. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  39. snowflake/snowpark_connect/utils/context.py +6 -1
  40. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  41. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  42. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  43. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  44. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  45. snowflake/snowpark_connect/version.py +1 -1
  46. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  47. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  48. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  52. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  53. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  93. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  94. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  95. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  96. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  97. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  98. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  100. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  101. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  102. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  103. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  104. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  105. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  362. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  439. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  440. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  444. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  445. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  466. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  467. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  468. snowpark_connect-0.23.0.dist-info/RECORD +0 -893
  469. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  470. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  471. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  472. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  473. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  474. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  475. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  476. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,869 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
- from distutils.version import LooseVersion
18
- import unittest
19
-
20
- import numpy as np
21
- import pandas as pd
22
- from pandas.tseries.offsets import DateOffset
23
-
24
- from pyspark import pandas as ps
25
- from pyspark.pandas.config import option_context
26
- from pyspark.testing.pandasutils import ComparisonTestBase
27
- from pyspark.testing.sqlutils import SQLTestUtils
28
-
29
-
30
- # This file contains test cases for 'Reindexing / Selection / Label manipulation'
31
- # https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/frame.html#reindexing-selection-label-manipulation
32
- class FrameReindexingMixin:
33
- @property
34
- def pdf(self):
35
- return pd.DataFrame(
36
- {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
37
- index=np.random.rand(9),
38
- )
39
-
40
- @property
41
- def df_pair(self):
42
- pdf = self.pdf
43
- psdf = ps.from_pandas(pdf)
44
- return pdf, psdf
45
-
46
- def test_add_prefix(self):
47
- pdf = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}, index=np.random.rand(4))
48
- psdf = ps.from_pandas(pdf)
49
- self.assert_eq(pdf.add_prefix("col_"), psdf.add_prefix("col_"))
50
-
51
- columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")])
52
- pdf.columns = columns
53
- psdf.columns = columns
54
- self.assert_eq(pdf.add_prefix("col_"), psdf.add_prefix("col_"))
55
-
56
- def test_add_suffix(self):
57
- pdf = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}, index=np.random.rand(4))
58
- psdf = ps.from_pandas(pdf)
59
- self.assert_eq(pdf.add_suffix("first_series"), psdf.add_suffix("first_series"))
60
-
61
- columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")])
62
- pdf.columns = columns
63
- psdf.columns = columns
64
- self.assert_eq(pdf.add_suffix("first_series"), psdf.add_suffix("first_series"))
65
-
66
- def test_at_time(self):
67
- idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
68
- pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
69
- psdf = ps.from_pandas(pdf)
70
- psdf.at_time("0:20")
71
- self.assert_eq(
72
- pdf.at_time("0:20").sort_index(),
73
- psdf.at_time("0:20").sort_index(),
74
- )
75
-
76
- # Index name is 'ts'
77
- pdf.index.name = "ts"
78
- psdf = ps.from_pandas(pdf)
79
- self.assert_eq(
80
- pdf.at_time("0:20").sort_index(),
81
- psdf.at_time("0:20").sort_index(),
82
- )
83
-
84
- # Index name is 'ts', column label is 'index'
85
- pdf.columns = pd.Index(["index"])
86
- psdf = ps.from_pandas(pdf)
87
- self.assert_eq(
88
- pdf.at_time("0:40").sort_index(),
89
- psdf.at_time("0:40").sort_index(),
90
- )
91
-
92
- # Both index name and column label are 'index'
93
- pdf.index.name = "index"
94
- psdf = ps.from_pandas(pdf)
95
- self.assert_eq(
96
- pdf.at_time("0:40").sort_index(),
97
- psdf.at_time("0:40").sort_index(),
98
- )
99
-
100
- # Index name is 'index', column label is ('X', 'A')
101
- pdf.columns = pd.MultiIndex.from_arrays([["X"], ["A"]])
102
- psdf = ps.from_pandas(pdf)
103
- self.assert_eq(
104
- pdf.at_time("0:40").sort_index(),
105
- psdf.at_time("0:40").sort_index(),
106
- )
107
-
108
- with self.assertRaisesRegex(NotImplementedError, "'asof' argument is not supported"):
109
- psdf.at_time("0:15", asof=True)
110
-
111
- with self.assertRaisesRegex(NotImplementedError, "at_time currently only works for axis=0"):
112
- psdf.at_time("0:15", axis=1)
113
-
114
- psdf = ps.DataFrame({"A": [1, 2, 3, 4]})
115
- with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
116
- psdf.at_time("0:15")
117
-
118
- @unittest.skipIf(
119
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
120
- "TODO(SPARK-43557): Enable DataFrameSlowTests.test_between_time for pandas 2.0.0.",
121
- )
122
- def test_between_time(self):
123
- idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
124
- pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
125
- psdf = ps.from_pandas(pdf)
126
- self.assert_eq(
127
- pdf.between_time("0:15", "0:45").sort_index(),
128
- psdf.between_time("0:15", "0:45").sort_index(),
129
- )
130
-
131
- pdf.index.name = "ts"
132
- psdf = ps.from_pandas(pdf)
133
- self.assert_eq(
134
- pdf.between_time("0:15", "0:45").sort_index(),
135
- psdf.between_time("0:15", "0:45").sort_index(),
136
- )
137
-
138
- # Column label is 'index'
139
- pdf.columns = pd.Index(["index"])
140
- psdf = ps.from_pandas(pdf)
141
- self.assert_eq(
142
- pdf.between_time("0:15", "0:45").sort_index(),
143
- psdf.between_time("0:15", "0:45").sort_index(),
144
- )
145
-
146
- # Both index name and column label are 'index'
147
- pdf.index.name = "index"
148
- psdf = ps.from_pandas(pdf)
149
- self.assert_eq(
150
- pdf.between_time("0:15", "0:45").sort_index(),
151
- psdf.between_time("0:15", "0:45").sort_index(),
152
- )
153
-
154
- # Index name is 'index', column label is ('X', 'A')
155
- pdf.columns = pd.MultiIndex.from_arrays([["X"], ["A"]])
156
- psdf = ps.from_pandas(pdf)
157
- self.assert_eq(
158
- pdf.between_time("0:15", "0:45").sort_index(),
159
- psdf.between_time("0:15", "0:45").sort_index(),
160
- )
161
-
162
- with self.assertRaisesRegex(
163
- NotImplementedError, "between_time currently only works for axis=0"
164
- ):
165
- psdf.between_time("0:15", "0:45", axis=1)
166
-
167
- psdf = ps.DataFrame({"A": [1, 2, 3, 4]})
168
- with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
169
- psdf.between_time("0:15", "0:45")
170
-
171
- def test_drop(self):
172
- pdf = pd.DataFrame({"x": [1, 2], "y": [3, 4], "z": [5, 6]}, index=np.random.rand(2))
173
- psdf = ps.from_pandas(pdf)
174
-
175
- # Assert 'labels' or 'columns' parameter is set
176
- expected_error_message = "Need to specify at least one of 'labels' or 'columns'"
177
- with self.assertRaisesRegex(ValueError, expected_error_message):
178
- psdf.drop()
179
-
180
- #
181
- # Drop columns
182
- #
183
-
184
- # Assert using a str for 'labels' works
185
- self.assert_eq(psdf.drop("x", axis=1), pdf.drop("x", axis=1))
186
- self.assert_eq((psdf + 1).drop("x", axis=1), (pdf + 1).drop("x", axis=1))
187
- # Assert using a list for 'labels' works
188
- self.assert_eq(psdf.drop(["y", "z"], axis=1), pdf.drop(["y", "z"], axis=1))
189
- self.assert_eq(psdf.drop(["x", "y", "z"], axis=1), pdf.drop(["x", "y", "z"], axis=1))
190
- # Assert using 'columns' instead of 'labels' produces the same results
191
- self.assert_eq(psdf.drop(columns="x"), pdf.drop(columns="x"))
192
- self.assert_eq(psdf.drop(columns=["y", "z"]), pdf.drop(columns=["y", "z"]))
193
- self.assert_eq(psdf.drop(columns=["x", "y", "z"]), pdf.drop(columns=["x", "y", "z"]))
194
- self.assert_eq(psdf.drop(columns=[]), pdf.drop(columns=[]))
195
-
196
- columns = pd.MultiIndex.from_tuples([(1, "x"), (1, "y"), (2, "z")])
197
- pdf.columns = columns
198
- psdf = ps.from_pandas(pdf)
199
-
200
- self.assert_eq(psdf.drop(columns=1), pdf.drop(columns=1))
201
- self.assert_eq(psdf.drop(columns=(1, "x")), pdf.drop(columns=(1, "x")))
202
- self.assert_eq(psdf.drop(columns=[(1, "x"), 2]), pdf.drop(columns=[(1, "x"), 2]))
203
- self.assert_eq(
204
- psdf.drop(columns=[(1, "x"), (1, "y"), (2, "z")]),
205
- pdf.drop(columns=[(1, "x"), (1, "y"), (2, "z")]),
206
- )
207
-
208
- self.assertRaises(KeyError, lambda: psdf.drop(columns=3))
209
- self.assertRaises(KeyError, lambda: psdf.drop(columns=(1, "z")))
210
-
211
- pdf.index = pd.MultiIndex.from_tuples([("i", 0), ("j", 1)])
212
- psdf = ps.from_pandas(pdf)
213
- self.assert_eq(
214
- psdf.drop(columns=[(1, "x"), (1, "y"), (2, "z")]),
215
- pdf.drop(columns=[(1, "x"), (1, "y"), (2, "z")]),
216
- )
217
-
218
- # non-string names
219
- pdf = pd.DataFrame({10: [1, 2], 20: [3, 4], 30: [5, 6]}, index=np.random.rand(2))
220
- psdf = ps.from_pandas(pdf)
221
-
222
- self.assert_eq(psdf.drop(10, axis=1), pdf.drop(10, axis=1))
223
- self.assert_eq(psdf.drop([20, 30], axis=1), pdf.drop([20, 30], axis=1))
224
-
225
- #
226
- # Drop rows
227
- #
228
-
229
- pdf = pd.DataFrame({"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["A", "B", "C"])
230
- psdf = ps.from_pandas(pdf)
231
-
232
- # Given labels (and axis = 0)
233
- self.assert_eq(psdf.drop(labels="A", axis=0), pdf.drop(labels="A", axis=0))
234
- self.assert_eq(psdf.drop(labels="A"), pdf.drop(labels="A"))
235
- self.assert_eq((psdf + 1).drop(labels="A"), (pdf + 1).drop(labels="A"))
236
- self.assert_eq(psdf.drop(labels=["A", "C"], axis=0), pdf.drop(labels=["A", "C"], axis=0))
237
- self.assert_eq(
238
- psdf.drop(labels=["A", "B", "C"], axis=0), pdf.drop(labels=["A", "B", "C"], axis=0)
239
- )
240
-
241
- with ps.option_context("compute.isin_limit", 2):
242
- self.assert_eq(
243
- psdf.drop(labels=["A", "B", "C"], axis=0), pdf.drop(labels=["A", "B", "C"], axis=0)
244
- )
245
-
246
- # Given index
247
- self.assert_eq(psdf.drop(index="A"), pdf.drop(index="A"))
248
- self.assert_eq(psdf.drop(index=["A", "C"]), pdf.drop(index=["A", "C"]))
249
- self.assert_eq(psdf.drop(index=["A", "B", "C"]), pdf.drop(index=["A", "B", "C"]))
250
- self.assert_eq(psdf.drop(index=[]), pdf.drop(index=[]))
251
-
252
- with ps.option_context("compute.isin_limit", 2):
253
- self.assert_eq(psdf.drop(index=["A", "B", "C"]), pdf.drop(index=["A", "B", "C"]))
254
-
255
- # Non-string names
256
- pdf.index = [10, 20, 30]
257
- psdf = ps.from_pandas(pdf)
258
- self.assert_eq(psdf.drop(labels=10, axis=0), pdf.drop(labels=10, axis=0))
259
- self.assert_eq(psdf.drop(labels=[10, 30], axis=0), pdf.drop(labels=[10, 30], axis=0))
260
- self.assert_eq(
261
- psdf.drop(labels=[10, 20, 30], axis=0), pdf.drop(labels=[10, 20, 30], axis=0)
262
- )
263
-
264
- with ps.option_context("compute.isin_limit", 2):
265
- self.assert_eq(
266
- psdf.drop(labels=[10, 20, 30], axis=0), pdf.drop(labels=[10, 20, 30], axis=0)
267
- )
268
-
269
- # MultiIndex
270
- pdf.index = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
271
- psdf = ps.from_pandas(pdf)
272
- self.assertRaises(NotImplementedError, lambda: psdf.drop(labels=[("a", "x")]))
273
-
274
- #
275
- # Drop rows and columns
276
- #
277
- pdf = pd.DataFrame({"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["A", "B", "C"])
278
- psdf = ps.from_pandas(pdf)
279
- self.assert_eq(psdf.drop(index="A", columns="X"), pdf.drop(index="A", columns="X"))
280
- self.assert_eq(
281
- psdf.drop(index=["A", "C"], columns=["X", "Z"]),
282
- pdf.drop(index=["A", "C"], columns=["X", "Z"]),
283
- )
284
- self.assert_eq(
285
- psdf.drop(index=["A", "B", "C"], columns=["X", "Z"]),
286
- pdf.drop(index=["A", "B", "C"], columns=["X", "Z"]),
287
- )
288
- with ps.option_context("compute.isin_limit", 2):
289
- self.assert_eq(
290
- psdf.drop(index=["A", "B", "C"], columns=["X", "Z"]),
291
- pdf.drop(index=["A", "B", "C"], columns=["X", "Z"]),
292
- )
293
- self.assert_eq(
294
- psdf.drop(index=[], columns=["X", "Z"]),
295
- pdf.drop(index=[], columns=["X", "Z"]),
296
- )
297
- self.assert_eq(
298
- psdf.drop(index=["A", "B", "C"], columns=[]),
299
- pdf.drop(index=["A", "B", "C"], columns=[]),
300
- )
301
- self.assert_eq(
302
- psdf.drop(index=[], columns=[]),
303
- pdf.drop(index=[], columns=[]),
304
- )
305
- self.assertRaises(
306
- ValueError,
307
- lambda: psdf.drop(labels="A", axis=0, columns="X"),
308
- )
309
-
310
- def test_droplevel(self):
311
- pdf = (
312
- pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
313
- .set_index([0, 1])
314
- .rename_axis(["a", "b"])
315
- )
316
- pdf.columns = pd.MultiIndex.from_tuples(
317
- [("c", "e"), ("d", "f")], names=["level_1", "level_2"]
318
- )
319
- psdf = ps.from_pandas(pdf)
320
-
321
- self.assertRaises(ValueError, lambda: psdf.droplevel(["a", "b"]))
322
- self.assertRaises(ValueError, lambda: psdf.droplevel([1, 1, 1, 1, 1]))
323
- self.assertRaises(IndexError, lambda: psdf.droplevel(2))
324
- self.assertRaises(IndexError, lambda: psdf.droplevel(-3))
325
- self.assertRaises(KeyError, lambda: psdf.droplevel({"a"}))
326
- self.assertRaises(KeyError, lambda: psdf.droplevel({"a": 1}))
327
-
328
- self.assertRaises(ValueError, lambda: psdf.droplevel(["level_1", "level_2"], axis=1))
329
- self.assertRaises(IndexError, lambda: psdf.droplevel(2, axis=1))
330
- self.assertRaises(IndexError, lambda: psdf.droplevel(-3, axis=1))
331
- self.assertRaises(KeyError, lambda: psdf.droplevel({"level_1"}, axis=1))
332
- self.assertRaises(KeyError, lambda: psdf.droplevel({"level_1": 1}, axis=1))
333
-
334
- self.assert_eq(pdf.droplevel("a"), psdf.droplevel("a"))
335
- self.assert_eq(pdf.droplevel(["a"]), psdf.droplevel(["a"]))
336
- self.assert_eq(pdf.droplevel(("a",)), psdf.droplevel(("a",)))
337
- self.assert_eq(pdf.droplevel(0), psdf.droplevel(0))
338
- self.assert_eq(pdf.droplevel(-1), psdf.droplevel(-1))
339
-
340
- self.assert_eq(pdf.droplevel("level_1", axis=1), psdf.droplevel("level_1", axis=1))
341
- self.assert_eq(pdf.droplevel(["level_1"], axis=1), psdf.droplevel(["level_1"], axis=1))
342
- self.assert_eq(pdf.droplevel(("level_1",), axis=1), psdf.droplevel(("level_1",), axis=1))
343
- self.assert_eq(pdf.droplevel(0, axis=1), psdf.droplevel(0, axis=1))
344
- self.assert_eq(pdf.droplevel(-1, axis=1), psdf.droplevel(-1, axis=1))
345
-
346
- # Tupled names
347
- pdf.columns.names = [("level", 1), ("level", 2)]
348
- pdf.index.names = [("a", 10), ("x", 20)]
349
- psdf = ps.from_pandas(pdf)
350
-
351
- self.assertRaises(KeyError, lambda: psdf.droplevel("a"))
352
- self.assertRaises(KeyError, lambda: psdf.droplevel(("a", 10)))
353
-
354
- self.assert_eq(pdf.droplevel([("a", 10)]), psdf.droplevel([("a", 10)]))
355
- self.assert_eq(
356
- pdf.droplevel([("level", 1)], axis=1), psdf.droplevel([("level", 1)], axis=1)
357
- )
358
-
359
- # non-string names
360
- pdf = (
361
- pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
362
- .set_index([0, 1])
363
- .rename_axis([10.0, 20.0])
364
- )
365
- pdf.columns = pd.MultiIndex.from_tuples([("c", "e"), ("d", "f")], names=[100.0, 200.0])
366
- psdf = ps.from_pandas(pdf)
367
-
368
- self.assert_eq(pdf.droplevel(10.0), psdf.droplevel(10.0))
369
- self.assert_eq(pdf.droplevel([10.0]), psdf.droplevel([10.0]))
370
- self.assert_eq(pdf.droplevel((10.0,)), psdf.droplevel((10.0,)))
371
- self.assert_eq(pdf.droplevel(0), psdf.droplevel(0))
372
- self.assert_eq(pdf.droplevel(-1), psdf.droplevel(-1))
373
- self.assert_eq(pdf.droplevel(100.0, axis=1), psdf.droplevel(100.0, axis=1))
374
- self.assert_eq(pdf.droplevel(0, axis=1), psdf.droplevel(0, axis=1))
375
-
376
- def test_drop_duplicates(self):
377
- pdf = pd.DataFrame(
378
- {"a": [1, 2, 2, 2, 3], "b": ["a", "a", "a", "c", "d"]}, index=np.random.rand(5)
379
- )
380
- psdf = ps.from_pandas(pdf)
381
-
382
- # inplace is False
383
- for keep in ["first", "last", False]:
384
- with self.subTest(keep=keep):
385
- self.assert_eq(
386
- pdf.drop_duplicates(keep=keep).sort_index(),
387
- psdf.drop_duplicates(keep=keep).sort_index(),
388
- )
389
- self.assert_eq(
390
- pdf.drop_duplicates("a", keep=keep).sort_index(),
391
- psdf.drop_duplicates("a", keep=keep).sort_index(),
392
- )
393
- self.assert_eq(
394
- pdf.drop_duplicates(["a", "b"], keep=keep).sort_index(),
395
- psdf.drop_duplicates(["a", "b"], keep=keep).sort_index(),
396
- )
397
- self.assert_eq(
398
- pdf.set_index("a", append=True).drop_duplicates(keep=keep).sort_index(),
399
- psdf.set_index("a", append=True).drop_duplicates(keep=keep).sort_index(),
400
- )
401
- self.assert_eq(
402
- pdf.set_index("a", append=True).drop_duplicates("b", keep=keep).sort_index(),
403
- psdf.set_index("a", append=True).drop_duplicates("b", keep=keep).sort_index(),
404
- )
405
-
406
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
407
- pdf.columns = columns
408
- psdf.columns = columns
409
-
410
- # inplace is False
411
- for keep in ["first", "last", False]:
412
- with self.subTest("multi-index columns", keep=keep):
413
- self.assert_eq(
414
- pdf.drop_duplicates(keep=keep).sort_index(),
415
- psdf.drop_duplicates(keep=keep).sort_index(),
416
- )
417
- self.assert_eq(
418
- pdf.drop_duplicates(("x", "a"), keep=keep).sort_index(),
419
- psdf.drop_duplicates(("x", "a"), keep=keep).sort_index(),
420
- )
421
- self.assert_eq(
422
- pdf.drop_duplicates([("x", "a"), ("y", "b")], keep=keep).sort_index(),
423
- psdf.drop_duplicates([("x", "a"), ("y", "b")], keep=keep).sort_index(),
424
- )
425
- self.assert_eq(
426
- pdf.drop_duplicates(
427
- [("x", "a"), ("y", "b")], keep=keep, ignore_index=True
428
- ).sort_index(),
429
- psdf.drop_duplicates(
430
- [("x", "a"), ("y", "b")], keep=keep, ignore_index=True
431
- ).sort_index(),
432
- )
433
-
434
- # inplace is True
435
- subset_list = [None, "a", ["a", "b"]]
436
- for subset in subset_list:
437
- pdf = pd.DataFrame(
438
- {"a": [1, 2, 2, 2, 3], "b": ["a", "a", "a", "c", "d"]}, index=np.random.rand(5)
439
- )
440
- psdf = ps.from_pandas(pdf)
441
- pser = pdf.a
442
- psser = psdf.a
443
- pdf.drop_duplicates(subset=subset, inplace=True)
444
- psdf.drop_duplicates(subset=subset, inplace=True)
445
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
446
- self.assert_eq(psser.sort_index(), pser.sort_index())
447
-
448
- # multi-index columns, inplace is True
449
- subset_list = [None, ("x", "a"), [("x", "a"), ("y", "b")]]
450
- for subset in subset_list:
451
- pdf = pd.DataFrame(
452
- {"a": [1, 2, 2, 2, 3], "b": ["a", "a", "a", "c", "d"]}, index=np.random.rand(5)
453
- )
454
- psdf = ps.from_pandas(pdf)
455
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
456
- pdf.columns = columns
457
- psdf.columns = columns
458
- pser = pdf[("x", "a")]
459
- psser = psdf[("x", "a")]
460
- pdf.drop_duplicates(subset=subset, inplace=True)
461
- pdf.drop_duplicates(subset=subset, inplace=True, ignore_index=True)
462
- psdf.drop_duplicates(subset=subset, inplace=True)
463
- psdf.drop_duplicates(subset=subset, inplace=True, ignore_index=True)
464
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
465
- self.assert_eq(psser.sort_index(), pser.sort_index())
466
-
467
- # non-string names
468
- pdf = pd.DataFrame(
469
- {10: [1, 2, 2, 2, 3], 20: ["a", "a", "a", "c", "d"]}, index=np.random.rand(5)
470
- )
471
- psdf = ps.from_pandas(pdf)
472
-
473
- self.assert_eq(
474
- pdf.drop_duplicates(10, keep=keep).sort_index(),
475
- psdf.drop_duplicates(10, keep=keep).sort_index(),
476
- )
477
- self.assert_eq(
478
- pdf.drop_duplicates([10, 20], keep=keep).sort_index(),
479
- psdf.drop_duplicates([10, 20], keep=keep).sort_index(),
480
- )
481
-
482
- def test_duplicated(self):
483
- pdf = pd.DataFrame(
484
- {"a": [1, 1, 2, 3], "b": [1, 1, 1, 4], "c": [1, 1, 1, 5]}, index=np.random.rand(4)
485
- )
486
- psdf = ps.from_pandas(pdf)
487
-
488
- self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
489
- self.assert_eq(
490
- pdf.duplicated(keep="last").sort_index(),
491
- psdf.duplicated(keep="last").sort_index(),
492
- )
493
- self.assert_eq(
494
- pdf.duplicated(keep=False).sort_index(),
495
- psdf.duplicated(keep=False).sort_index(),
496
- )
497
- self.assert_eq(
498
- pdf.duplicated(subset="b").sort_index(),
499
- psdf.duplicated(subset="b").sort_index(),
500
- )
501
- self.assert_eq(
502
- pdf.duplicated(subset=["b"]).sort_index(),
503
- psdf.duplicated(subset=["b"]).sort_index(),
504
- )
505
- with self.assertRaisesRegex(ValueError, "'keep' only supports 'first', 'last' and False"):
506
- psdf.duplicated(keep="false")
507
- with self.assertRaisesRegex(KeyError, "'d'"):
508
- psdf.duplicated(subset=["d"])
509
-
510
- pdf.index.name = "x"
511
- psdf.index.name = "x"
512
- self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
513
-
514
- # multi-index
515
- self.assert_eq(
516
- pdf.set_index("a", append=True).duplicated().sort_index(),
517
- psdf.set_index("a", append=True).duplicated().sort_index(),
518
- )
519
- self.assert_eq(
520
- pdf.set_index("a", append=True).duplicated(keep=False).sort_index(),
521
- psdf.set_index("a", append=True).duplicated(keep=False).sort_index(),
522
- )
523
- self.assert_eq(
524
- pdf.set_index("a", append=True).duplicated(subset=["b"]).sort_index(),
525
- psdf.set_index("a", append=True).duplicated(subset=["b"]).sort_index(),
526
- )
527
-
528
- # mutli-index columns
529
- columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
530
- pdf.columns = columns
531
- psdf.columns = columns
532
- self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
533
- self.assert_eq(
534
- pdf.duplicated(subset=("x", "b")).sort_index(),
535
- psdf.duplicated(subset=("x", "b")).sort_index(),
536
- )
537
- self.assert_eq(
538
- pdf.duplicated(subset=[("x", "b")]).sort_index(),
539
- psdf.duplicated(subset=[("x", "b")]).sort_index(),
540
- )
541
-
542
- # non-string names
543
- pdf = pd.DataFrame(
544
- {10: [1, 1, 2, 3], 20: [1, 1, 1, 4], 30: [1, 1, 1, 5]}, index=np.random.rand(4)
545
- )
546
- psdf = ps.from_pandas(pdf)
547
-
548
- self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
549
- self.assert_eq(
550
- pdf.duplicated(subset=10).sort_index(),
551
- psdf.duplicated(subset=10).sort_index(),
552
- )
553
-
554
- def test_filter(self):
555
- pdf = pd.DataFrame(
556
- {
557
- "aa": ["aa", "bd", "bc", "ab", "ce"],
558
- "ba": [1, 2, 3, 4, 5],
559
- "cb": [1.0, 2.0, 3.0, 4.0, 5.0],
560
- "db": [1.0, np.nan, 3.0, np.nan, 5.0],
561
- }
562
- )
563
- pdf = pdf.set_index("aa")
564
- psdf = ps.from_pandas(pdf)
565
-
566
- self.assert_eq(
567
- psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
568
- pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
569
- )
570
-
571
- with option_context("compute.isin_limit", 0):
572
- self.assert_eq(
573
- psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
574
- pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
575
- )
576
-
577
- self.assert_eq(
578
- psdf.filter(items=["ba", "db"], axis=1).sort_index(),
579
- pdf.filter(items=["ba", "db"], axis=1).sort_index(),
580
- )
581
-
582
- self.assert_eq(psdf.filter(like="b", axis="index"), pdf.filter(like="b", axis="index"))
583
- self.assert_eq(psdf.filter(like="c", axis="columns"), pdf.filter(like="c", axis="columns"))
584
-
585
- self.assert_eq(
586
- psdf.filter(regex="b.*", axis="index"), pdf.filter(regex="b.*", axis="index")
587
- )
588
- self.assert_eq(
589
- psdf.filter(regex="b.*", axis="columns"), pdf.filter(regex="b.*", axis="columns")
590
- )
591
-
592
- pdf = pdf.set_index("ba", append=True)
593
- psdf = ps.from_pandas(pdf)
594
-
595
- self.assert_eq(
596
- psdf.filter(items=[("aa", 1), ("bd", 2)], axis=0).sort_index(),
597
- pdf.filter(items=[("aa", 1), ("bd", 2)], axis=0).sort_index(),
598
- )
599
-
600
- with self.assertRaisesRegex(TypeError, "Unsupported type list"):
601
- psdf.filter(items=[["aa", 1], ("bd", 2)], axis=0)
602
-
603
- with self.assertRaisesRegex(ValueError, "The item should not be empty."):
604
- psdf.filter(items=[(), ("bd", 2)], axis=0)
605
-
606
- self.assert_eq(psdf.filter(like="b", axis=0), pdf.filter(like="b", axis=0))
607
-
608
- self.assert_eq(psdf.filter(regex="b.*", axis=0), pdf.filter(regex="b.*", axis=0))
609
-
610
- with self.assertRaisesRegex(ValueError, "items should be a list-like object"):
611
- psdf.filter(items="b")
612
-
613
- with self.assertRaisesRegex(ValueError, "No axis named"):
614
- psdf.filter(regex="b.*", axis=123)
615
-
616
- with self.assertRaisesRegex(TypeError, "Must pass either `items`, `like`"):
617
- psdf.filter()
618
-
619
- with self.assertRaisesRegex(TypeError, "mutually exclusive"):
620
- psdf.filter(regex="b.*", like="aaa")
621
-
622
- # multi-index columns
623
- pdf = pd.DataFrame(
624
- {
625
- ("x", "aa"): ["aa", "ab", "bc", "bd", "ce"],
626
- ("x", "ba"): [1, 2, 3, 4, 5],
627
- ("y", "cb"): [1.0, 2.0, 3.0, 4.0, 5.0],
628
- ("z", "db"): [1.0, np.nan, 3.0, np.nan, 5.0],
629
- }
630
- )
631
- pdf = pdf.set_index(("x", "aa"))
632
- psdf = ps.from_pandas(pdf)
633
-
634
- self.assert_eq(
635
- psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
636
- pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
637
- )
638
- self.assert_eq(
639
- psdf.filter(items=[("x", "ba"), ("z", "db")], axis=1).sort_index(),
640
- pdf.filter(items=[("x", "ba"), ("z", "db")], axis=1).sort_index(),
641
- )
642
-
643
- self.assert_eq(psdf.filter(like="b", axis="index"), pdf.filter(like="b", axis="index"))
644
- self.assert_eq(psdf.filter(like="c", axis="columns"), pdf.filter(like="c", axis="columns"))
645
-
646
- self.assert_eq(
647
- psdf.filter(regex="b.*", axis="index"), pdf.filter(regex="b.*", axis="index")
648
- )
649
- self.assert_eq(
650
- psdf.filter(regex="b.*", axis="columns"), pdf.filter(regex="b.*", axis="columns")
651
- )
652
-
653
- def test_last(self):
654
- index = pd.date_range("2018-04-09", periods=4, freq="2D")
655
- pdf = pd.DataFrame([1, 2, 3, 4], index=index)
656
- psdf = ps.from_pandas(pdf)
657
- self.assert_eq(pdf.last("1D"), psdf.last("1D"))
658
- self.assert_eq(pdf.last(DateOffset(days=1)), psdf.last(DateOffset(days=1)))
659
- with self.assertRaisesRegex(TypeError, "'last' only supports a DatetimeIndex"):
660
- ps.DataFrame([1, 2, 3, 4]).last("1D")
661
-
662
- def test_first(self):
663
- index = pd.date_range("2018-04-09", periods=4, freq="2D")
664
- pdf = pd.DataFrame([1, 2, 3, 4], index=index)
665
- psdf = ps.from_pandas(pdf)
666
- self.assert_eq(pdf.first("1D"), psdf.first("1D"))
667
- self.assert_eq(pdf.first(DateOffset(days=1)), psdf.first(DateOffset(days=1)))
668
- with self.assertRaisesRegex(TypeError, "'first' only supports a DatetimeIndex"):
669
- ps.DataFrame([1, 2, 3, 4]).first("1D")
670
-
671
- def test_swaplevel(self):
672
- # MultiIndex with two levels
673
- arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
674
- pidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
675
- pdf = pd.DataFrame({"x1": ["a", "b", "c", "d"], "x2": ["a", "b", "c", "d"]}, index=pidx)
676
- psdf = ps.from_pandas(pdf)
677
- self.assert_eq(pdf.swaplevel(), psdf.swaplevel())
678
- self.assert_eq(pdf.swaplevel(0, 1), psdf.swaplevel(0, 1))
679
- self.assert_eq(pdf.swaplevel(1, 1), psdf.swaplevel(1, 1))
680
- self.assert_eq(pdf.swaplevel("number", "color"), psdf.swaplevel("number", "color"))
681
-
682
- # MultiIndex with more than two levels
683
- arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"], ["l", "m", "s", "xs"]]
684
- pidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color", "size"))
685
- pdf = pd.DataFrame({"x1": ["a", "b", "c", "d"], "x2": ["a", "b", "c", "d"]}, index=pidx)
686
- psdf = ps.from_pandas(pdf)
687
- self.assert_eq(pdf.swaplevel(), psdf.swaplevel())
688
- self.assert_eq(pdf.swaplevel(0, 1), psdf.swaplevel(0, 1))
689
- self.assert_eq(pdf.swaplevel(0, 2), psdf.swaplevel(0, 2))
690
- self.assert_eq(pdf.swaplevel(1, 2), psdf.swaplevel(1, 2))
691
- self.assert_eq(pdf.swaplevel(1, 1), psdf.swaplevel(1, 1))
692
- self.assert_eq(pdf.swaplevel(-1, -2), psdf.swaplevel(-1, -2))
693
- self.assert_eq(pdf.swaplevel("number", "color"), psdf.swaplevel("number", "color"))
694
- self.assert_eq(pdf.swaplevel("number", "size"), psdf.swaplevel("number", "size"))
695
- self.assert_eq(pdf.swaplevel("color", "size"), psdf.swaplevel("color", "size"))
696
- self.assert_eq(
697
- pdf.swaplevel("color", "size", axis="index"),
698
- psdf.swaplevel("color", "size", axis="index"),
699
- )
700
- self.assert_eq(
701
- pdf.swaplevel("color", "size", axis=0), psdf.swaplevel("color", "size", axis=0)
702
- )
703
-
704
- pdf = pd.DataFrame(
705
- {
706
- "x1": ["a", "b", "c", "d"],
707
- "x2": ["a", "b", "c", "d"],
708
- "x3": ["a", "b", "c", "d"],
709
- "x4": ["a", "b", "c", "d"],
710
- }
711
- )
712
- pidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color", "size"))
713
- pdf.columns = pidx
714
- psdf = ps.from_pandas(pdf)
715
- self.assert_eq(pdf.swaplevel(axis=1), psdf.swaplevel(axis=1))
716
- self.assert_eq(pdf.swaplevel(0, 1, axis=1), psdf.swaplevel(0, 1, axis=1))
717
- self.assert_eq(pdf.swaplevel(0, 2, axis=1), psdf.swaplevel(0, 2, axis=1))
718
- self.assert_eq(pdf.swaplevel(1, 2, axis=1), psdf.swaplevel(1, 2, axis=1))
719
- self.assert_eq(pdf.swaplevel(1, 1, axis=1), psdf.swaplevel(1, 1, axis=1))
720
- self.assert_eq(pdf.swaplevel(-1, -2, axis=1), psdf.swaplevel(-1, -2, axis=1))
721
- self.assert_eq(
722
- pdf.swaplevel("number", "color", axis=1), psdf.swaplevel("number", "color", axis=1)
723
- )
724
- self.assert_eq(
725
- pdf.swaplevel("number", "size", axis=1), psdf.swaplevel("number", "size", axis=1)
726
- )
727
- self.assert_eq(
728
- pdf.swaplevel("color", "size", axis=1), psdf.swaplevel("color", "size", axis=1)
729
- )
730
- self.assert_eq(
731
- pdf.swaplevel("color", "size", axis="columns"),
732
- psdf.swaplevel("color", "size", axis="columns"),
733
- )
734
-
735
- # Error conditions
736
- self.assertRaises(AssertionError, lambda: ps.DataFrame([1, 2]).swaplevel())
737
- self.assertRaises(IndexError, lambda: psdf.swaplevel(0, 9, axis=1))
738
- self.assertRaises(KeyError, lambda: psdf.swaplevel("not_number", "color", axis=1))
739
- self.assertRaises(ValueError, lambda: psdf.swaplevel(axis=2))
740
-
741
- def test_swapaxes(self):
742
- pdf = pd.DataFrame(
743
- [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["x", "y", "z"], columns=["a", "b", "c"]
744
- )
745
- psdf = ps.from_pandas(pdf)
746
-
747
- self.assert_eq(psdf.swapaxes(0, 1), pdf.swapaxes(0, 1))
748
- self.assert_eq(psdf.swapaxes(1, 0), pdf.swapaxes(1, 0))
749
- self.assert_eq(psdf.swapaxes("index", "columns"), pdf.swapaxes("index", "columns"))
750
- self.assert_eq(psdf.swapaxes("columns", "index"), pdf.swapaxes("columns", "index"))
751
- self.assert_eq((psdf + 1).swapaxes(0, 1), (pdf + 1).swapaxes(0, 1))
752
-
753
- self.assertRaises(AssertionError, lambda: psdf.swapaxes(0, 1, copy=False))
754
- self.assertRaises(ValueError, lambda: psdf.swapaxes(0, -1))
755
-
756
- def test_isin(self):
757
- pdf = pd.DataFrame(
758
- {
759
- "a": [4, 2, 3, 4, 8, 6],
760
- "b": [1, 2, 9, 4, 2, 4],
761
- "c": ["one", "three", "six", "seven", "one", "5"],
762
- },
763
- index=np.random.rand(6),
764
- )
765
- psdf = ps.from_pandas(pdf)
766
-
767
- self.assert_eq(psdf.isin([4, "six"]), pdf.isin([4, "six"]))
768
- # Seems like pandas has a bug when passing `np.array` as parameter
769
- self.assert_eq(psdf.isin(np.array([4, "six"])), pdf.isin([4, "six"]))
770
- self.assert_eq(
771
- psdf.isin({"a": [2, 8], "c": ["three", "one"]}),
772
- pdf.isin({"a": [2, 8], "c": ["three", "one"]}),
773
- )
774
- self.assert_eq(
775
- psdf.isin({"a": np.array([2, 8]), "c": ["three", "one"]}),
776
- pdf.isin({"a": np.array([2, 8]), "c": ["three", "one"]}),
777
- )
778
-
779
- msg = "'DataFrame' object has no attribute {'e'}"
780
- with self.assertRaisesRegex(AttributeError, msg):
781
- psdf.isin({"e": [5, 7], "a": [1, 6]})
782
-
783
- msg = "DataFrame and Series are not supported"
784
- with self.assertRaisesRegex(NotImplementedError, msg):
785
- psdf.isin(pdf)
786
-
787
- msg = "Values should be iterable, Series, DataFrame or dict."
788
- with self.assertRaisesRegex(TypeError, msg):
789
- psdf.isin(1)
790
-
791
- pdf = pd.DataFrame(
792
- {
793
- "a": [4, 2, 3, 4, 8, 6],
794
- "b": [1, None, 9, 4, None, 4],
795
- "c": [None, 5, None, 3, 2, 1],
796
- },
797
- )
798
- psdf = ps.from_pandas(pdf)
799
-
800
- if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
801
- self.assert_eq(psdf.isin([4, 3, 1, 1, None]), pdf.isin([4, 3, 1, 1, None]))
802
- else:
803
- expected = pd.DataFrame(
804
- {
805
- "a": [True, False, True, True, False, False],
806
- "b": [True, False, False, True, False, True],
807
- "c": [False, False, False, True, False, True],
808
- }
809
- )
810
- self.assert_eq(psdf.isin([4, 3, 1, 1, None]), expected)
811
-
812
- if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
813
- self.assert_eq(
814
- psdf.isin({"b": [4, 3, 1, 1, None]}), pdf.isin({"b": [4, 3, 1, 1, None]})
815
- )
816
- else:
817
- expected = pd.DataFrame(
818
- {
819
- "a": [False, False, False, False, False, False],
820
- "b": [True, False, False, True, False, True],
821
- "c": [False, False, False, False, False, False],
822
- }
823
- )
824
- self.assert_eq(psdf.isin({"b": [4, 3, 1, 1, None]}), expected)
825
-
826
- def test_sample(self):
827
- psdf = ps.DataFrame({"A": [0, 2, 4]}, index=["x", "y", "z"])
828
-
829
- # Make sure the tests run, but we can't check the result because they are non-deterministic.
830
- psdf.sample(frac=0.1)
831
- psdf.sample(frac=0.2, replace=True)
832
- psdf.sample(frac=0.2, random_state=5)
833
- psdf["A"].sample(frac=0.2)
834
- psdf["A"].sample(frac=0.2, replace=True)
835
- psdf["A"].sample(frac=0.2, random_state=5)
836
-
837
- self.assert_eq(psdf.sample(frac=0.1, ignore_index=True).index.dtype, np.int64)
838
- self.assert_eq(psdf.sample(frac=0.2, replace=True, ignore_index=True).index.dtype, np.int64)
839
- self.assert_eq(
840
- psdf.sample(frac=0.2, random_state=5, ignore_index=True).index.dtype, np.int64
841
- )
842
- self.assert_eq(psdf["A"].sample(frac=0.2, ignore_index=True).index.dtype, np.int64)
843
- self.assert_eq(
844
- psdf["A"].sample(frac=0.2, replace=True, ignore_index=True).index.dtype, np.int64
845
- )
846
- self.assert_eq(
847
- psdf["A"].sample(frac=0.2, random_state=5, ignore_index=True).index.dtype, np.int64
848
- )
849
-
850
- with self.assertRaises(ValueError):
851
- psdf.sample()
852
- with self.assertRaises(NotImplementedError):
853
- psdf.sample(n=1)
854
-
855
-
856
- class FrameReidexingTests(FrameReindexingMixin, ComparisonTestBase, SQLTestUtils):
857
- pass
858
-
859
-
860
- if __name__ == "__main__":
861
- from pyspark.pandas.tests.frame.test_reindexing import * # noqa: F401
862
-
863
- try:
864
- import xmlrunner
865
-
866
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
867
- except ImportError:
868
- testRunner = None
869
- unittest.main(testRunner=testRunner, verbosity=2)