snowpark-connect 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (474) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/literal.py +13 -2
  7. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  11. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  12. snowflake/snowpark_connect/expression/map_unresolved_function.py +825 -353
  13. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  14. snowflake/snowpark_connect/hidden_column.py +39 -0
  15. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  16. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  17. snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
  18. snowflake/snowpark_connect/relation/map_extension.py +52 -11
  19. snowflake/snowpark_connect/relation/map_join.py +258 -62
  20. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  21. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  22. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  23. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  24. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  25. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  26. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  27. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  28. snowflake/snowpark_connect/resources_initializer.py +29 -1
  29. snowflake/snowpark_connect/server.py +18 -3
  30. snowflake/snowpark_connect/type_mapping.py +29 -25
  31. snowflake/snowpark_connect/typed_column.py +14 -0
  32. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  33. snowflake/snowpark_connect/utils/context.py +6 -1
  34. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  35. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  36. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  37. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  38. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  39. snowflake/snowpark_connect/version.py +1 -1
  40. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  41. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  42. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  46. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  47. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  48. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  49. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  50. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  51. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  52. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  53. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  93. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  94. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  95. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  96. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  97. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  98. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  100. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
  101. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
  102. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
  103. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
  104. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  105. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  360. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  361. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  362. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  439. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  440. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  444. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  445. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  466. snowpark_connect-0.24.0.dist-info/RECORD +0 -898
  467. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  468. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  469. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  470. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  471. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  472. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  473. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  474. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,741 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
-
18
- import unittest
19
- from distutils.version import LooseVersion
20
-
21
- import numpy as np
22
- import pandas as pd
23
- from pandas.api.types import CategoricalDtype
24
-
25
- import pyspark.pandas as ps
26
- from pyspark.testing.pandasutils import ComparisonTestBase, TestUtils
27
-
28
-
29
- class CategoricalTestsMixin:
30
- @property
31
- def pdf(self):
32
- return pd.DataFrame(
33
- {
34
- "a": pd.Categorical([1, 2, 3, 1, 2, 3]),
35
- "b": pd.Categorical(
36
- ["b", "a", "c", "c", "b", "a"], categories=["c", "b", "d", "a"]
37
- ),
38
- },
39
- )
40
-
41
- @property
42
- def df_pair(self):
43
- return self.pdf, self.psdf
44
-
45
- def test_categorical_frame(self):
46
- pdf, psdf = self.df_pair
47
-
48
- self.assert_eq(psdf, pdf)
49
- self.assert_eq(psdf.a, pdf.a)
50
- self.assert_eq(psdf.b, pdf.b)
51
- self.assert_eq(psdf.index, pdf.index)
52
-
53
- self.assert_eq(psdf.sort_index(), pdf.sort_index())
54
- self.assert_eq(psdf.sort_values("b"), pdf.sort_values("b"))
55
-
56
- def test_categorical_series(self):
57
- pser = pd.Series([1, 2, 3], dtype="category")
58
- psser = ps.Series([1, 2, 3], dtype="category")
59
-
60
- self.assert_eq(psser, pser)
61
- self.assert_eq(psser.cat.categories, pser.cat.categories)
62
- self.assert_eq(psser.cat.codes, pser.cat.codes)
63
- self.assert_eq(psser.cat.ordered, pser.cat.ordered)
64
-
65
- with self.assertRaisesRegex(ValueError, "Cannot call CategoricalAccessor on type int64"):
66
- ps.Series([1, 2, 3]).cat
67
-
68
- @unittest.skipIf(
69
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
70
- "TODO(SPARK-43566): Enable CategoricalTests.test_categories_setter for pandas 2.0.0.",
71
- )
72
- def test_categories_setter(self):
73
- pdf, psdf = self.df_pair
74
-
75
- pser = pdf.a
76
- psser = psdf.a
77
-
78
- pser.cat.categories = ["z", "y", "x"]
79
- psser.cat.categories = ["z", "y", "x"]
80
- if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
81
- # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
82
- pser = pser.astype(CategoricalDtype(categories=["x", "y", "z"]))
83
-
84
- self.assert_eq(pser, psser)
85
- self.assert_eq(pdf, psdf)
86
-
87
- with self.assertRaises(ValueError):
88
- psser.cat.categories = [1, 2, 3, 4]
89
-
90
- def test_add_categories(self):
91
- pdf, psdf = self.df_pair
92
-
93
- pser = pdf.a
94
- psser = psdf.a
95
-
96
- self.assert_eq(pser.cat.add_categories(4), psser.cat.add_categories(4))
97
- self.assert_eq(pser.cat.add_categories([4, 5]), psser.cat.add_categories([4, 5]))
98
- self.assert_eq(pser.cat.add_categories([]), psser.cat.add_categories([]))
99
-
100
- pser = pser.cat.add_categories(4)
101
- psser = psser.cat.add_categories(4)
102
-
103
- self.assertRaises(ValueError, lambda: psser.cat.add_categories(4))
104
- self.assertRaises(ValueError, lambda: psser.cat.add_categories([5, 5]))
105
-
106
- @unittest.skipIf(
107
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
108
- "TODO(SPARK-43605): Enable CategoricalTests.test_remove_categories for pandas 2.0.0.",
109
- )
110
- def test_remove_categories(self):
111
- pdf, psdf = self.df_pair
112
-
113
- pser = pdf.a
114
- psser = psdf.a
115
-
116
- self.assert_eq(pser.cat.remove_categories(2), psser.cat.remove_categories(2))
117
- self.assert_eq(pser.cat.remove_categories([1, 3]), psser.cat.remove_categories([1, 3]))
118
- self.assert_eq(pser.cat.remove_categories([]), psser.cat.remove_categories([]))
119
- self.assert_eq(pser.cat.remove_categories([2, 2]), psser.cat.remove_categories([2, 2]))
120
- self.assert_eq(
121
- pser.cat.remove_categories([1, 2, 3]), psser.cat.remove_categories([1, 2, 3])
122
- )
123
- self.assert_eq(pser.cat.remove_categories(None), psser.cat.remove_categories(None))
124
- self.assert_eq(pser.cat.remove_categories([None]), psser.cat.remove_categories([None]))
125
-
126
- self.assertRaises(ValueError, lambda: psser.cat.remove_categories(4))
127
- self.assertRaises(ValueError, lambda: psser.cat.remove_categories([4, None]))
128
-
129
- def test_remove_unused_categories(self):
130
- pdf, psdf = self.df_pair
131
-
132
- pser = pdf.a
133
- psser = psdf.a
134
-
135
- self.assert_eq(pser.cat.remove_unused_categories(), psser.cat.remove_unused_categories())
136
-
137
- pser = pser.cat.add_categories(4)
138
- pser = pser.cat.remove_categories(2)
139
- psser = psser.cat.add_categories(4)
140
- psser = psser.cat.remove_categories(2)
141
-
142
- self.assert_eq(pser.cat.remove_unused_categories(), psser.cat.remove_unused_categories())
143
-
144
- def test_reorder_categories(self):
145
- pdf, psdf = self.df_pair
146
-
147
- pser = pdf.a
148
- psser = psdf.a
149
-
150
- self.assert_eq(
151
- pser.cat.reorder_categories([1, 2, 3]), psser.cat.reorder_categories([1, 2, 3])
152
- )
153
- self.assert_eq(
154
- pser.cat.reorder_categories([1, 2, 3], ordered=True),
155
- psser.cat.reorder_categories([1, 2, 3], ordered=True),
156
- )
157
- self.assert_eq(
158
- pser.cat.reorder_categories([3, 2, 1]), psser.cat.reorder_categories([3, 2, 1])
159
- )
160
- self.assert_eq(
161
- pser.cat.reorder_categories([3, 2, 1], ordered=True),
162
- psser.cat.reorder_categories([3, 2, 1], ordered=True),
163
- )
164
-
165
- self.assertRaises(ValueError, lambda: psser.cat.reorder_categories([1, 2]))
166
- self.assertRaises(ValueError, lambda: psser.cat.reorder_categories([1, 2, 4]))
167
- self.assertRaises(ValueError, lambda: psser.cat.reorder_categories([1, 2, 2]))
168
- self.assertRaises(TypeError, lambda: psser.cat.reorder_categories(1))
169
- self.assertRaises(TypeError, lambda: psdf.b.cat.reorder_categories("abcd"))
170
-
171
- @unittest.skipIf(
172
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
173
- "TODO(SPARK-43565): Enable CategoricalTests.test_as_ordered_unordered for pandas 2.0.0.",
174
- )
175
- def test_as_ordered_unordered(self):
176
- pdf, psdf = self.df_pair
177
-
178
- pser = pdf.a
179
- psser = psdf.a
180
-
181
- # as_ordered
182
- self.assert_eq(pser.cat.as_ordered(), psser.cat.as_ordered())
183
-
184
- pser.cat.as_ordered(inplace=True)
185
- psser.cat.as_ordered(inplace=True)
186
- if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
187
- # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
188
- pser = pser.astype(CategoricalDtype(categories=[1, 2, 3], ordered=True))
189
-
190
- self.assert_eq(pser, psser)
191
- self.assert_eq(pdf, psdf)
192
-
193
- # as_unordered
194
- self.assert_eq(pser.cat.as_unordered(), psser.cat.as_unordered())
195
-
196
- pser.cat.as_unordered(inplace=True)
197
- psser.cat.as_unordered(inplace=True)
198
- if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
199
- # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
200
- pser = pser.astype(CategoricalDtype(categories=[1, 2, 3], ordered=False))
201
- pdf.a = pser
202
-
203
- self.assert_eq(pser, psser)
204
- self.assert_eq(pdf, psdf)
205
-
206
- def test_astype(self):
207
- pser = pd.Series(["a", "b", "c"])
208
- psser = ps.from_pandas(pser)
209
-
210
- self.assert_eq(psser.astype("category"), pser.astype("category"))
211
- self.assert_eq(
212
- psser.astype(CategoricalDtype(["c", "a", "b"])),
213
- pser.astype(CategoricalDtype(["c", "a", "b"])),
214
- )
215
-
216
- pcser = pser.astype(CategoricalDtype(["c", "a", "b"]))
217
- pscser = psser.astype(CategoricalDtype(["c", "a", "b"]))
218
-
219
- self.assert_eq(pscser.astype("category"), pcser.astype("category"))
220
-
221
- # CategoricalDtype is not updated if the dtype is same from pandas 1.3.
222
- if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
223
- self.assert_eq(
224
- pscser.astype(CategoricalDtype(["b", "c", "a"])),
225
- pcser.astype(CategoricalDtype(["b", "c", "a"])),
226
- )
227
- else:
228
- self.assert_eq(
229
- pscser.astype(CategoricalDtype(["b", "c", "a"])),
230
- pcser,
231
- )
232
-
233
- self.assert_eq(pscser.astype(str), pcser.astype(str))
234
-
235
- @unittest.skipIf(
236
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
237
- "TODO(SPARK-43564): Enable CategoricalTests.test_factorize for pandas 2.0.0.",
238
- )
239
- def test_factorize(self):
240
- pser = pd.Series(["a", "b", "c", None], dtype=CategoricalDtype(["c", "a", "d", "b"]))
241
- psser = ps.from_pandas(pser)
242
-
243
- pcodes, puniques = pser.factorize()
244
- kcodes, kuniques = psser.factorize()
245
-
246
- self.assert_eq(kcodes.tolist(), pcodes.tolist())
247
- self.assert_eq(kuniques, puniques)
248
-
249
- pcodes, puniques = pser.factorize(na_sentinel=-2)
250
- kcodes, kuniques = psser.factorize(na_sentinel=-2)
251
-
252
- self.assert_eq(kcodes.tolist(), pcodes.tolist())
253
- self.assert_eq(kuniques, puniques)
254
-
255
- def test_frame_apply(self):
256
- pdf, psdf = self.df_pair
257
-
258
- self.assert_eq(psdf.apply(lambda x: x).sort_index(), pdf.apply(lambda x: x).sort_index())
259
- self.assert_eq(
260
- psdf.apply(lambda x: x, axis=1).sort_index(),
261
- pdf.apply(lambda x: x, axis=1).sort_index(),
262
- )
263
-
264
- def test_frame_apply_without_shortcut(self):
265
- with ps.option_context("compute.shortcut_limit", 0):
266
- self.test_frame_apply()
267
-
268
- pdf = pd.DataFrame(
269
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
270
- )
271
- psdf = ps.from_pandas(pdf)
272
-
273
- dtype = CategoricalDtype(categories=["a", "b", "c"])
274
-
275
- def categorize(ser) -> ps.Series[dtype]:
276
- return ser.astype(dtype)
277
-
278
- self.assert_eq(
279
- psdf.apply(categorize).sort_values(["a", "b"]).reset_index(drop=True),
280
- pdf.apply(categorize).sort_values(["a", "b"]).reset_index(drop=True),
281
- )
282
-
283
- def test_frame_transform(self):
284
- pdf, psdf = self.df_pair
285
-
286
- self.assert_eq(psdf.transform(lambda x: x), pdf.transform(lambda x: x))
287
- self.assert_eq(psdf.transform(lambda x: x.cat.codes), pdf.transform(lambda x: x.cat.codes))
288
-
289
- pdf = pd.DataFrame(
290
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
291
- )
292
- psdf = ps.from_pandas(pdf)
293
-
294
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
295
-
296
- self.assert_eq(
297
- psdf.transform(lambda x: x.astype(dtype)).sort_index(),
298
- pdf.transform(lambda x: x.astype(dtype)).sort_index(),
299
- )
300
-
301
- def test_frame_transform_without_shortcut(self):
302
- with ps.option_context("compute.shortcut_limit", 0):
303
- self.test_frame_transform()
304
-
305
- pdf, psdf = self.df_pair
306
-
307
- def codes(pser) -> ps.Series[np.int8]:
308
- return pser.cat.codes
309
-
310
- self.assert_eq(psdf.transform(codes), pdf.transform(codes))
311
-
312
- pdf = pd.DataFrame(
313
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
314
- )
315
- psdf = ps.from_pandas(pdf)
316
-
317
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
318
-
319
- def to_category(pser) -> ps.Series[dtype]:
320
- return pser.astype(dtype)
321
-
322
- self.assert_eq(
323
- psdf.transform(to_category).sort_index(), pdf.transform(to_category).sort_index()
324
- )
325
-
326
- def test_series_apply(self):
327
- pdf, psdf = self.df_pair
328
-
329
- self.assert_eq(
330
- psdf.a.apply(lambda x: x).sort_index(), pdf.a.apply(lambda x: x).sort_index()
331
- )
332
-
333
- def test_series_apply_without_shortcut(self):
334
- with ps.option_context("compute.shortcut_limit", 0):
335
- self.test_series_apply()
336
-
337
- pdf, psdf = self.df_pair
338
- ret = psdf.a.dtype
339
-
340
- def identity(pser) -> ret:
341
- return pser
342
-
343
- self.assert_eq(psdf.a.apply(identity).sort_index(), pdf.a.apply(identity).sort_index())
344
-
345
- # TODO: The return type is still category.
346
- # def to_str(x) -> str:
347
- # return str(x)
348
- #
349
- # self.assert_eq(
350
- # psdf.a.apply(to_str).sort_index(), pdf.a.apply(to_str).sort_index()
351
- # )
352
-
353
- def test_groupby_apply(self):
354
- pdf, psdf = self.df_pair
355
-
356
- self.assert_eq(
357
- psdf.groupby("a").apply(lambda df: df).sort_index(),
358
- pdf.groupby("a").apply(lambda df: df).sort_index(),
359
- )
360
- self.assert_eq(
361
- psdf.groupby("b").apply(lambda df: df[["a"]]).sort_index(),
362
- pdf.groupby("b").apply(lambda df: df[["a"]]).sort_index(),
363
- )
364
- self.assert_eq(
365
- psdf.groupby(["a", "b"]).apply(lambda df: df).sort_index(),
366
- pdf.groupby(["a", "b"]).apply(lambda df: df).sort_index(),
367
- )
368
- self.assert_eq(
369
- psdf.groupby("a").apply(lambda df: df.b.cat.codes).sort_index(),
370
- pdf.groupby("a").apply(lambda df: df.b.cat.codes).sort_index(),
371
- )
372
- self.assert_eq(
373
- psdf.groupby("a")["b"].apply(lambda b: b.cat.codes).sort_index(),
374
- pdf.groupby("a")["b"].apply(lambda b: b.cat.codes).sort_index(),
375
- )
376
-
377
- # TODO: grouping by a categorical type sometimes preserves unused categories.
378
- # self.assert_eq(
379
- # psdf.groupby("a").apply(len).sort_index(), pdf.groupby("a").apply(len).sort_index(),
380
- # )
381
-
382
- @unittest.skipIf(
383
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
384
- "TODO(SPARK-43813): Enable CategoricalTests.test_groupby_apply_without_shortcut "
385
- "for pandas 2.0.0.",
386
- )
387
- def test_groupby_apply_without_shortcut(self):
388
- with ps.option_context("compute.shortcut_limit", 0):
389
- self.test_groupby_apply()
390
-
391
- pdf, psdf = self.df_pair
392
-
393
- def identity(df) -> ps.DataFrame[zip(psdf.columns, psdf.dtypes)]:
394
- return df
395
-
396
- self.assert_eq(
397
- psdf.groupby("a").apply(identity).sort_values(["a", "b"]).reset_index(drop=True),
398
- pdf.groupby("a").apply(identity).sort_values(["a", "b"]).reset_index(drop=True),
399
- )
400
-
401
- def test_groupby_transform(self):
402
- pdf, psdf = self.df_pair
403
-
404
- self.assert_eq(
405
- psdf.groupby("a").transform(lambda x: x).sort_index(),
406
- pdf.groupby("a").transform(lambda x: x).sort_index(),
407
- )
408
-
409
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
410
-
411
- self.assert_eq(
412
- psdf.groupby("a").transform(lambda x: x.astype(dtype)).sort_index(),
413
- pdf.groupby("a").transform(lambda x: x.astype(dtype)).sort_index(),
414
- )
415
-
416
- def test_groupby_transform_without_shortcut(self):
417
- with ps.option_context("compute.shortcut_limit", 0):
418
- self.test_groupby_transform()
419
-
420
- pdf, psdf = self.df_pair
421
-
422
- def identity(x) -> ps.Series[psdf.b.dtype]:
423
- return x
424
-
425
- self.assert_eq(
426
- psdf.groupby("a").transform(identity).sort_values("b").reset_index(drop=True),
427
- pdf.groupby("a").transform(identity).sort_values("b").reset_index(drop=True),
428
- )
429
-
430
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
431
-
432
- # The behavior for CategoricalDtype is changed from pandas 1.3
433
- if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
434
- ret_dtype = pdf.b.dtype
435
- else:
436
- ret_dtype = dtype
437
-
438
- def astype(x) -> ps.Series[ret_dtype]:
439
- return x.astype(dtype)
440
-
441
- if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
442
- self.assert_eq(
443
- psdf.groupby("a").transform(astype).sort_values("b").reset_index(drop=True),
444
- pdf.groupby("a").transform(astype).sort_values("b").reset_index(drop=True),
445
- )
446
- else:
447
- expected = pdf.groupby("a").transform(astype)
448
- expected["b"] = dtype.categories.take(expected["b"].cat.codes).astype(dtype)
449
- self.assert_eq(
450
- psdf.groupby("a").transform(astype).sort_values("b").reset_index(drop=True),
451
- expected.sort_values("b").reset_index(drop=True),
452
- )
453
-
454
- def test_frame_apply_batch(self):
455
- pdf, psdf = self.df_pair
456
-
457
- self.assert_eq(
458
- psdf.pandas_on_spark.apply_batch(lambda pdf: pdf.astype(str)).sort_index(),
459
- pdf.astype(str).sort_index(),
460
- )
461
-
462
- pdf = pd.DataFrame(
463
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
464
- )
465
- psdf = ps.from_pandas(pdf)
466
-
467
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
468
-
469
- self.assert_eq(
470
- psdf.pandas_on_spark.apply_batch(lambda pdf: pdf.astype(dtype)).sort_index(),
471
- pdf.astype(dtype).sort_index(),
472
- )
473
-
474
- def test_frame_apply_batch_without_shortcut(self):
475
- with ps.option_context("compute.shortcut_limit", 0):
476
- self.test_frame_apply_batch()
477
-
478
- pdf, psdf = self.df_pair
479
-
480
- def to_str(pdf) -> 'ps.DataFrame["a":str, "b":str]': # noqa: F405
481
- return pdf.astype(str)
482
-
483
- self.assert_eq(
484
- psdf.pandas_on_spark.apply_batch(to_str).sort_values(["a", "b"]).reset_index(drop=True),
485
- to_str(pdf).sort_values(["a", "b"]).reset_index(drop=True),
486
- )
487
-
488
- pdf = pd.DataFrame(
489
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
490
- )
491
- psdf = ps.from_pandas(pdf)
492
-
493
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
494
- ret = ps.DataFrame["a":dtype, "b":dtype]
495
-
496
- def to_category(pdf) -> ret:
497
- return pdf.astype(dtype)
498
-
499
- self.assert_eq(
500
- psdf.pandas_on_spark.apply_batch(to_category)
501
- .sort_values(["a", "b"])
502
- .reset_index(drop=True),
503
- to_category(pdf).sort_values(["a", "b"]).reset_index(drop=True),
504
- )
505
-
506
- def test_frame_transform_batch(self):
507
- pdf, psdf = self.df_pair
508
-
509
- self.assert_eq(
510
- psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.astype(str)).sort_index(),
511
- pdf.astype(str).sort_index(),
512
- )
513
- self.assert_eq(
514
- psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.b.cat.codes).sort_index(),
515
- pdf.b.cat.codes.sort_index(),
516
- )
517
-
518
- pdf = pd.DataFrame(
519
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
520
- )
521
- psdf = ps.from_pandas(pdf)
522
-
523
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
524
-
525
- self.assert_eq(
526
- psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.astype(dtype)).sort_index(),
527
- pdf.astype(dtype).sort_index(),
528
- )
529
- self.assert_eq(
530
- psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.b.astype(dtype)).sort_index(),
531
- pdf.b.astype(dtype).sort_index(),
532
- )
533
-
534
- def test_frame_transform_batch_without_shortcut(self):
535
- with ps.option_context("compute.shortcut_limit", 0):
536
- self.test_frame_transform_batch()
537
-
538
- pdf, psdf = self.df_pair
539
-
540
- def to_str(pdf) -> 'ps.DataFrame["a":str, "b":str]': # noqa: F405
541
- return pdf.astype(str)
542
-
543
- self.assert_eq(
544
- psdf.pandas_on_spark.transform_batch(to_str).sort_index(),
545
- to_str(pdf).sort_index(),
546
- )
547
-
548
- def to_codes(pdf) -> ps.Series[np.int8]:
549
- return pdf.b.cat.codes
550
-
551
- self.assert_eq(
552
- psdf.pandas_on_spark.transform_batch(to_codes).sort_index(),
553
- to_codes(pdf).sort_index(),
554
- )
555
-
556
- pdf = pd.DataFrame(
557
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
558
- )
559
- psdf = ps.from_pandas(pdf)
560
-
561
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
562
- ret = ps.DataFrame["a":dtype, "b":dtype]
563
-
564
- def to_category(pdf) -> ret:
565
- return pdf.astype(dtype)
566
-
567
- self.assert_eq(
568
- psdf.pandas_on_spark.transform_batch(to_category).sort_index(),
569
- to_category(pdf).sort_index(),
570
- )
571
-
572
- def to_category(pdf) -> ps.Series[dtype]:
573
- return pdf.b.astype(dtype)
574
-
575
- self.assert_eq(
576
- psdf.pandas_on_spark.transform_batch(to_category).sort_index(),
577
- to_category(pdf).rename().sort_index(),
578
- )
579
-
580
- def test_series_transform_batch(self):
581
- pdf, psdf = self.df_pair
582
-
583
- self.assert_eq(
584
- psdf.a.pandas_on_spark.transform_batch(lambda pser: pser.astype(str)).sort_index(),
585
- pdf.a.astype(str).sort_index(),
586
- )
587
-
588
- pdf = pd.DataFrame(
589
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
590
- )
591
- psdf = ps.from_pandas(pdf)
592
-
593
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
594
-
595
- self.assert_eq(
596
- psdf.a.pandas_on_spark.transform_batch(lambda pser: pser.astype(dtype)).sort_index(),
597
- pdf.a.astype(dtype).sort_index(),
598
- )
599
-
600
- def test_series_transform_batch_without_shortcut(self):
601
- with ps.option_context("compute.shortcut_limit", 0):
602
- self.test_series_transform_batch()
603
-
604
- pdf, psdf = self.df_pair
605
-
606
- def to_str(pser) -> ps.Series[str]:
607
- return pser.astype(str)
608
-
609
- self.assert_eq(
610
- psdf.a.pandas_on_spark.transform_batch(to_str).sort_index(), to_str(pdf.a).sort_index()
611
- )
612
-
613
- pdf = pd.DataFrame(
614
- {"a": ["a", "b", "c", "a", "b", "c"], "b": ["b", "a", "c", "c", "b", "a"]}
615
- )
616
- psdf = ps.from_pandas(pdf)
617
-
618
- dtype = CategoricalDtype(categories=["a", "b", "c", "d"])
619
-
620
- def to_category(pser) -> ps.Series[dtype]:
621
- return pser.astype(dtype)
622
-
623
- self.assert_eq(
624
- psdf.a.pandas_on_spark.transform_batch(to_category).sort_index(),
625
- to_category(pdf.a).sort_index(),
626
- )
627
-
628
- def test_unstack(self):
629
- pdf = self.pdf
630
- index = pd.MultiIndex.from_tuples(
631
- [("x", "a"), ("x", "b"), ("x", "c"), ("y", "a"), ("y", "b"), ("y", "d")]
632
- )
633
- pdf.index = index
634
- psdf = ps.from_pandas(pdf)
635
-
636
- self.assert_eq(psdf.a.unstack().sort_index(), pdf.a.unstack().sort_index())
637
- self.assert_eq(psdf.b.unstack().sort_index(), pdf.b.unstack().sort_index())
638
-
639
- def test_rename_categories(self):
640
- pdf, psdf = self.df_pair
641
-
642
- pser = pdf.b
643
- psser = psdf.b
644
-
645
- self.assert_eq(
646
- pser.cat.rename_categories([0, 1, 3, 2]), psser.cat.rename_categories([0, 1, 3, 2])
647
- )
648
- self.assert_eq(
649
- pser.cat.rename_categories({"a": "A", "c": "C"}),
650
- psser.cat.rename_categories({"a": "A", "c": "C"}),
651
- )
652
- self.assert_eq(
653
- pser.cat.rename_categories(lambda x: x.upper()),
654
- psser.cat.rename_categories(lambda x: x.upper()),
655
- )
656
-
657
- self.assertRaisesRegex(
658
- ValueError,
659
- "new categories need to have the same number of items as the old categories",
660
- lambda: psser.cat.rename_categories([0, 1, 2]),
661
- )
662
- self.assertRaises(
663
- TypeError,
664
- lambda: psser.cat.rename_categories(None),
665
- )
666
- self.assertRaises(
667
- TypeError,
668
- lambda: psser.cat.rename_categories(1),
669
- )
670
- self.assertRaises(
671
- TypeError,
672
- lambda: psser.cat.rename_categories("x"),
673
- )
674
-
675
- def test_set_categories(self):
676
- pdf, psdf = self.df_pair
677
-
678
- pser = pdf.b
679
- psser = psdf.b
680
-
681
- self.assert_eq(
682
- pser.cat.set_categories(["a", "c", "b", "o"]),
683
- psser.cat.set_categories(["a", "c", "b", "o"]),
684
- )
685
- self.assert_eq(
686
- pser.cat.set_categories(["a", "c", "b"]),
687
- psser.cat.set_categories(["a", "c", "b"]),
688
- )
689
- self.assert_eq(
690
- pser.cat.set_categories(["a", "c", "b", "d", "e"]),
691
- psser.cat.set_categories(["a", "c", "b", "d", "e"]),
692
- )
693
-
694
- self.assert_eq(
695
- pser.cat.set_categories([0, 1, 3, 2], rename=True),
696
- psser.cat.set_categories([0, 1, 3, 2], rename=True),
697
- )
698
- self.assert_eq(
699
- pser.cat.set_categories([0, 1, 3], rename=True),
700
- psser.cat.set_categories([0, 1, 3], rename=True),
701
- )
702
- self.assert_eq(
703
- pser.cat.set_categories([0, 1, 3, 2, 4], rename=True),
704
- psser.cat.set_categories([0, 1, 3, 2, 4], rename=True),
705
- )
706
-
707
- self.assert_eq(
708
- pser.cat.set_categories(["a", "c", "b", "o"], ordered=True),
709
- psser.cat.set_categories(["a", "c", "b", "o"], ordered=True),
710
- )
711
- self.assert_eq(
712
- pser.cat.set_categories(["a", "c", "b"], ordered=True),
713
- psser.cat.set_categories(["a", "c", "b"], ordered=True),
714
- )
715
- self.assert_eq(
716
- pser.cat.set_categories(["a", "c", "b", "d", "e"], ordered=True),
717
- psser.cat.set_categories(["a", "c", "b", "d", "e"], ordered=True),
718
- )
719
-
720
- self.assertRaisesRegex(
721
- TypeError,
722
- "Parameter 'new_categories' must be list-like, was",
723
- lambda: psser.cat.set_categories(None),
724
- )
725
-
726
-
727
- class CategoricalTests(CategoricalTestsMixin, ComparisonTestBase, TestUtils):
728
- pass
729
-
730
-
731
- if __name__ == "__main__":
732
- import unittest
733
- from pyspark.pandas.tests.test_categorical import * # noqa: F401
734
-
735
- try:
736
- import xmlrunner
737
-
738
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
739
- except ImportError:
740
- testRunner = None
741
- unittest.main(testRunner=testRunner, verbosity=2)