snowpark-connect 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (474) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/literal.py +13 -2
  7. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  11. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  12. snowflake/snowpark_connect/expression/map_unresolved_function.py +825 -353
  13. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  14. snowflake/snowpark_connect/hidden_column.py +39 -0
  15. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  16. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  17. snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
  18. snowflake/snowpark_connect/relation/map_extension.py +52 -11
  19. snowflake/snowpark_connect/relation/map_join.py +258 -62
  20. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  21. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  22. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  23. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  24. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  25. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  26. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  27. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  28. snowflake/snowpark_connect/resources_initializer.py +29 -1
  29. snowflake/snowpark_connect/server.py +18 -3
  30. snowflake/snowpark_connect/type_mapping.py +29 -25
  31. snowflake/snowpark_connect/typed_column.py +14 -0
  32. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  33. snowflake/snowpark_connect/utils/context.py +6 -1
  34. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  35. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  36. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  37. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  38. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  39. snowflake/snowpark_connect/version.py +1 -1
  40. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  41. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  42. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  46. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  47. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  48. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  49. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  50. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  51. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  52. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  53. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  93. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  94. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  95. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  96. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  97. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  98. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  100. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
  101. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
  102. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
  103. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
  104. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  105. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  360. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  361. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  362. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  439. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  440. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  444. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  445. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  466. snowpark_connect-0.24.0.dist-info/RECORD +0 -898
  467. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  468. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  469. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  470. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  471. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  472. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  473. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  474. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,581 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
-
18
- import unittest
19
- from distutils.version import LooseVersion
20
- import numpy as np
21
- import pandas as pd
22
-
23
- try:
24
- from pandas._testing import makeMissingDataframe
25
- except ImportError:
26
- from pandas.util.testing import makeMissingDataframe
27
-
28
- from pyspark import pandas as ps
29
- from pyspark.pandas.config import option_context
30
- from pyspark.testing.pandasutils import PandasOnSparkTestCase, SPARK_CONF_ARROW_ENABLED
31
- from pyspark.testing.sqlutils import SQLTestUtils
32
-
33
-
34
- class StatsTestsMixin:
35
- def _test_stat_functions(self, pdf_or_pser, psdf_or_psser):
36
- functions = ["max", "min", "mean", "sum", "count"]
37
- for funcname in functions:
38
- self.assert_eq(getattr(psdf_or_psser, funcname)(), getattr(pdf_or_pser, funcname)())
39
-
40
- functions = ["std", "var", "product", "sem"]
41
- for funcname in functions:
42
- self.assert_eq(
43
- getattr(psdf_or_psser, funcname)(),
44
- getattr(pdf_or_pser, funcname)(),
45
- check_exact=False,
46
- )
47
-
48
- functions = ["std", "var", "sem"]
49
- for funcname in functions:
50
- self.assert_eq(
51
- getattr(psdf_or_psser, funcname)(ddof=0),
52
- getattr(pdf_or_pser, funcname)(ddof=0),
53
- check_exact=False,
54
- )
55
-
56
- # NOTE: To test skew, kurt, and median, just make sure they run.
57
- # The numbers are different in spark and pandas.
58
- functions = ["skew", "kurt", "median"]
59
- for funcname in functions:
60
- getattr(psdf_or_psser, funcname)()
61
-
62
- def test_stat_functions(self):
63
- pdf = pd.DataFrame({"A": [1, 2, 3, 4], "B": [1, 2, 3, 4], "C": [1, np.nan, 3, np.nan]})
64
- psdf = ps.from_pandas(pdf)
65
- self._test_stat_functions(pdf.A, psdf.A)
66
- self._test_stat_functions(pdf, psdf)
67
-
68
- # empty
69
- self._test_stat_functions(pdf.A.loc[[]], psdf.A.loc[[]])
70
- self._test_stat_functions(pdf.loc[[]], psdf.loc[[]])
71
-
72
- def test_stat_functions_multiindex_column(self):
73
- arrays = [np.array(["A", "A", "B", "B"]), np.array(["one", "two", "one", "two"])]
74
- pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
75
- psdf = ps.from_pandas(pdf)
76
- self._test_stat_functions(pdf.A, psdf.A)
77
- self._test_stat_functions(pdf, psdf)
78
-
79
- @unittest.skipIf(
80
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
81
- "TODO(SPARK-43499): Enable SeriesTests.test_stat_functions_with_no_numeric_columns "
82
- "for pandas 2.0.0.",
83
- )
84
- def test_stat_functions_with_no_numeric_columns(self):
85
- pdf = pd.DataFrame(
86
- {
87
- "A": ["a", None, "c", "d", None, "f", "g"],
88
- "B": ["A", "B", "C", None, "E", "F", None],
89
- }
90
- )
91
- psdf = ps.from_pandas(pdf)
92
-
93
- self._test_stat_functions(pdf, psdf)
94
-
95
- def test_sum(self):
96
- pdf = pd.DataFrame({"a": [1, 2, 3, np.nan], "b": [0.1, np.nan, 0.3, np.nan]})
97
- psdf = ps.from_pandas(pdf)
98
-
99
- self.assert_eq(psdf.sum(), pdf.sum())
100
- self.assert_eq(psdf.sum(axis=1), pdf.sum(axis=1))
101
- self.assert_eq(psdf.sum(min_count=3), pdf.sum(min_count=3))
102
- self.assert_eq(psdf.sum(axis=1, min_count=1), pdf.sum(axis=1, min_count=1))
103
- self.assert_eq(psdf.loc[[]].sum(), pdf.loc[[]].sum())
104
- self.assert_eq(psdf.loc[[]].sum(min_count=1), pdf.loc[[]].sum(min_count=1))
105
-
106
- self.assert_eq(psdf["a"].sum(), pdf["a"].sum())
107
- self.assert_eq(psdf["a"].sum(min_count=3), pdf["a"].sum(min_count=3))
108
- self.assert_eq(psdf["b"].sum(min_count=3), pdf["b"].sum(min_count=3))
109
- self.assert_eq(psdf["a"].loc[[]].sum(), pdf["a"].loc[[]].sum())
110
- self.assert_eq(psdf["a"].loc[[]].sum(min_count=1), pdf["a"].loc[[]].sum(min_count=1))
111
-
112
- def test_product(self):
113
- pdf = pd.DataFrame(
114
- {"a": [1, -2, -3, np.nan], "b": [0.1, np.nan, -0.3, np.nan], "c": [10, 20, 0, -10]}
115
- )
116
- psdf = ps.from_pandas(pdf)
117
-
118
- self.assert_eq(psdf.product(), pdf.product(), check_exact=False)
119
- self.assert_eq(psdf.product(axis=1), pdf.product(axis=1))
120
- self.assert_eq(psdf.product(min_count=3), pdf.product(min_count=3), check_exact=False)
121
- self.assert_eq(psdf.product(axis=1, min_count=1), pdf.product(axis=1, min_count=1))
122
- self.assert_eq(psdf.loc[[]].product(), pdf.loc[[]].product())
123
- self.assert_eq(psdf.loc[[]].product(min_count=1), pdf.loc[[]].product(min_count=1))
124
-
125
- self.assert_eq(psdf["a"].product(), pdf["a"].product(), check_exact=False)
126
- self.assert_eq(
127
- psdf["a"].product(min_count=3), pdf["a"].product(min_count=3), check_exact=False
128
- )
129
- self.assert_eq(psdf["b"].product(min_count=3), pdf["b"].product(min_count=3))
130
- self.assert_eq(psdf["c"].product(min_count=3), pdf["c"].product(min_count=3))
131
- self.assert_eq(psdf["a"].loc[[]].product(), pdf["a"].loc[[]].product())
132
- self.assert_eq(
133
- psdf["a"].loc[[]].product(min_count=1), pdf["a"].loc[[]].product(min_count=1)
134
- )
135
-
136
- def test_abs(self):
137
- pdf = pd.DataFrame(
138
- {
139
- "A": [1, -2, np.nan, -4, 5],
140
- "B": [1.0, -2, np.nan, -4, 5],
141
- "C": [-6.0, -7, -8, np.nan, 10],
142
- "D": ["a", "b", "c", "d", np.nan],
143
- "E": [True, np.nan, False, True, True],
144
- }
145
- )
146
- psdf = ps.from_pandas(pdf)
147
- self.assert_eq(psdf.A.abs(), pdf.A.abs())
148
- self.assert_eq(psdf.B.abs(), pdf.B.abs())
149
- self.assert_eq(psdf.E.abs(), pdf.E.abs())
150
- # pandas' bug?
151
- # self.assert_eq(psdf[["B", "C", "E"]].abs(), pdf[["B", "C", "E"]].abs())
152
- self.assert_eq(psdf[["B", "C"]].abs(), pdf[["B", "C"]].abs())
153
- self.assert_eq(psdf[["E"]].abs(), pdf[["E"]].abs())
154
-
155
- with self.assertRaisesRegex(
156
- TypeError, "bad operand type for abs\\(\\): object \\(string\\)"
157
- ):
158
- psdf.abs()
159
- with self.assertRaisesRegex(
160
- TypeError, "bad operand type for abs\\(\\): object \\(string\\)"
161
- ):
162
- psdf.D.abs()
163
-
164
- @unittest.skipIf(
165
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
166
- "TODO(SPARK-43498): Enable SeriesTests.test_axis_on_dataframe for pandas 2.0.0.",
167
- )
168
- def test_axis_on_dataframe(self):
169
- # The number of each count is intentionally big
170
- # because when data is small, it executes a shortcut.
171
- # Less than 'compute.shortcut_limit' will execute a shortcut
172
- # by using collected pandas dataframe directly.
173
- # now we set the 'compute.shortcut_limit' as 1000 explicitly
174
- with option_context("compute.shortcut_limit", 1000):
175
- pdf = pd.DataFrame(
176
- {
177
- "A": [1, -2, 3, -4, 5] * 300,
178
- "B": [1.0, -2, 3, -4, 5] * 300,
179
- "C": [-6.0, -7, -8, -9, 10] * 300,
180
- "D": [True, False, True, False, False] * 300,
181
- },
182
- index=range(10, 15001, 10),
183
- )
184
- psdf = ps.from_pandas(pdf)
185
- self.assert_eq(psdf.count(axis=1), pdf.count(axis=1))
186
- self.assert_eq(psdf.var(axis=1), pdf.var(axis=1))
187
- self.assert_eq(psdf.var(axis=1, ddof=0), pdf.var(axis=1, ddof=0))
188
- self.assert_eq(psdf.std(axis=1), pdf.std(axis=1))
189
- self.assert_eq(psdf.std(axis=1, ddof=0), pdf.std(axis=1, ddof=0))
190
- self.assert_eq(psdf.max(axis=1), pdf.max(axis=1))
191
- self.assert_eq(psdf.min(axis=1), pdf.min(axis=1))
192
- self.assert_eq(psdf.sum(axis=1), pdf.sum(axis=1))
193
- self.assert_eq(psdf.product(axis=1), pdf.product(axis=1))
194
- self.assert_eq(psdf.kurtosis(axis=0), pdf.kurtosis(axis=0), almost=True)
195
- self.assert_eq(psdf.kurtosis(axis=1), pdf.kurtosis(axis=1))
196
- self.assert_eq(psdf.skew(axis=0), pdf.skew(axis=0), almost=True)
197
- self.assert_eq(psdf.skew(axis=1), pdf.skew(axis=1))
198
- self.assert_eq(psdf.mean(axis=1), pdf.mean(axis=1))
199
- self.assert_eq(psdf.sem(axis=1), pdf.sem(axis=1))
200
- self.assert_eq(psdf.sem(axis=1, ddof=0), pdf.sem(axis=1, ddof=0))
201
-
202
- self.assert_eq(
203
- psdf.count(axis=1, numeric_only=True), pdf.count(axis=1, numeric_only=True)
204
- )
205
- self.assert_eq(psdf.var(axis=1, numeric_only=True), pdf.var(axis=1, numeric_only=True))
206
- self.assert_eq(
207
- psdf.var(axis=1, ddof=0, numeric_only=True),
208
- pdf.var(axis=1, ddof=0, numeric_only=True),
209
- )
210
- self.assert_eq(psdf.std(axis=1, numeric_only=True), pdf.std(axis=1, numeric_only=True))
211
- self.assert_eq(
212
- psdf.std(axis=1, ddof=0, numeric_only=True),
213
- pdf.std(axis=1, ddof=0, numeric_only=True),
214
- )
215
- self.assert_eq(
216
- psdf.max(axis=1, numeric_only=True),
217
- pdf.max(axis=1, numeric_only=True).astype(float),
218
- )
219
- self.assert_eq(
220
- psdf.min(axis=1, numeric_only=True),
221
- pdf.min(axis=1, numeric_only=True).astype(float),
222
- )
223
- self.assert_eq(
224
- psdf.sum(axis=1, numeric_only=True),
225
- pdf.sum(axis=1, numeric_only=True).astype(float),
226
- )
227
- self.assert_eq(
228
- psdf.product(axis=1, numeric_only=True),
229
- pdf.product(axis=1, numeric_only=True).astype(float),
230
- )
231
- self.assert_eq(
232
- psdf.kurtosis(axis=0, numeric_only=True),
233
- pdf.kurtosis(axis=0, numeric_only=True),
234
- almost=True,
235
- )
236
- self.assert_eq(
237
- psdf.kurtosis(axis=1, numeric_only=True), pdf.kurtosis(axis=1, numeric_only=True)
238
- )
239
- self.assert_eq(
240
- psdf.skew(axis=0, numeric_only=True),
241
- pdf.skew(axis=0, numeric_only=True),
242
- almost=True,
243
- )
244
- self.assert_eq(
245
- psdf.skew(axis=1, numeric_only=True), pdf.skew(axis=1, numeric_only=True)
246
- )
247
- self.assert_eq(
248
- psdf.mean(axis=1, numeric_only=True), pdf.mean(axis=1, numeric_only=True)
249
- )
250
- self.assert_eq(psdf.sem(axis=1, numeric_only=True), pdf.sem(axis=1, numeric_only=True))
251
- self.assert_eq(
252
- psdf.sem(axis=1, ddof=0, numeric_only=True),
253
- pdf.sem(axis=1, ddof=0, numeric_only=True),
254
- )
255
-
256
- def test_skew_kurt_numerical_stability(self):
257
- pdf = pd.DataFrame(
258
- {
259
- "A": [1, 1, 1, 1, 1],
260
- "B": [1.0, np.nan, 4, 2, 5],
261
- "C": [-6.0, -7, np.nan, np.nan, 10],
262
- "D": [1.2, np.nan, np.nan, 9.8, np.nan],
263
- "E": [1, np.nan, np.nan, np.nan, np.nan],
264
- "F": [np.nan, np.nan, np.nan, np.nan, np.nan],
265
- }
266
- )
267
- psdf = ps.from_pandas(pdf)
268
- self.assert_eq(psdf.skew(), pdf.skew(), almost=True)
269
- self.assert_eq(psdf.kurt(), pdf.kurt(), almost=True)
270
-
271
- def test_dataframe_corr(self):
272
- pdf = makeMissingDataframe(0.3, 42)
273
- psdf = ps.from_pandas(pdf)
274
-
275
- with self.assertRaisesRegex(ValueError, "Invalid method"):
276
- psdf.corr("std")
277
- with self.assertRaisesRegex(TypeError, "Invalid min_periods type"):
278
- psdf.corr(min_periods="3")
279
-
280
- for method in ["pearson", "spearman", "kendall"]:
281
- self.assert_eq(psdf.corr(method=method), pdf.corr(method=method), check_exact=False)
282
- self.assert_eq(
283
- psdf.corr(method=method, min_periods=1),
284
- pdf.corr(method=method, min_periods=1),
285
- check_exact=False,
286
- )
287
- self.assert_eq(
288
- psdf.corr(method=method, min_periods=3),
289
- pdf.corr(method=method, min_periods=3),
290
- check_exact=False,
291
- )
292
- self.assert_eq(
293
- (psdf + 1).corr(method=method, min_periods=2),
294
- (pdf + 1).corr(method=method, min_periods=2),
295
- check_exact=False,
296
- )
297
-
298
- # multi-index columns
299
- columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C"), ("Z", "D")])
300
- pdf.columns = columns
301
- psdf.columns = columns
302
-
303
- for method in ["pearson", "spearman", "kendall"]:
304
- self.assert_eq(psdf.corr(method=method), pdf.corr(method=method), check_exact=False)
305
- self.assert_eq(
306
- psdf.corr(method=method, min_periods=1),
307
- pdf.corr(method=method, min_periods=1),
308
- check_exact=False,
309
- )
310
- self.assert_eq(
311
- psdf.corr(method=method, min_periods=3),
312
- pdf.corr(method=method, min_periods=3),
313
- check_exact=False,
314
- )
315
- self.assert_eq(
316
- (psdf + 1).corr(method=method, min_periods=2),
317
- (pdf + 1).corr(method=method, min_periods=2),
318
- check_exact=False,
319
- )
320
-
321
- # test with identical values
322
- pdf = pd.DataFrame(
323
- {
324
- "a": [0, 1, 1, 1, 0],
325
- "b": [2, 2, -1, 1, np.nan],
326
- "c": [3, 3, 3, 3, 3],
327
- "d": [np.nan, np.nan, np.nan, np.nan, np.nan],
328
- }
329
- )
330
- psdf = ps.from_pandas(pdf)
331
-
332
- for method in ["pearson", "spearman", "kendall"]:
333
- self.assert_eq(psdf.corr(method=method), pdf.corr(method=method), check_exact=False)
334
- self.assert_eq(
335
- psdf.corr(method=method, min_periods=1),
336
- pdf.corr(method=method, min_periods=1),
337
- check_exact=False,
338
- )
339
- self.assert_eq(
340
- psdf.corr(method=method, min_periods=3),
341
- pdf.corr(method=method, min_periods=3),
342
- check_exact=False,
343
- )
344
-
345
- def test_series_corr(self):
346
- pdf = makeMissingDataframe(0.3, 42)
347
- pser1 = pdf.A
348
- pser2 = pdf.B
349
- psdf = ps.from_pandas(pdf)
350
- psser1 = psdf.A
351
- psser2 = psdf.B
352
-
353
- with self.assertRaisesRegex(ValueError, "Invalid method"):
354
- psser1.corr(psser2, method="std")
355
- with self.assertRaisesRegex(TypeError, "Invalid min_periods type"):
356
- psser1.corr(psser2, min_periods="3")
357
-
358
- for method in ["pearson", "spearman", "kendall"]:
359
- self.assert_eq(
360
- psser1.corr(psser2, method=method),
361
- pser1.corr(pser2, method=method),
362
- almost=True,
363
- )
364
- self.assert_eq(
365
- psser1.corr(psser2, method=method, min_periods=1),
366
- pser1.corr(pser2, method=method, min_periods=1),
367
- almost=True,
368
- )
369
- self.assert_eq(
370
- psser1.corr(psser2, method=method, min_periods=3),
371
- pser1.corr(pser2, method=method, min_periods=3),
372
- almost=True,
373
- )
374
- self.assert_eq(
375
- (psser1 + 1).corr(psser2 - 2, method=method, min_periods=2),
376
- (pser1 + 1).corr(pser2 - 2, method=method, min_periods=2),
377
- almost=True,
378
- )
379
-
380
- # different anchors
381
- psser1 = ps.from_pandas(pser1)
382
- psser2 = ps.from_pandas(pser2)
383
-
384
- with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
385
- psser1.corr(psser2)
386
-
387
- for method in ["pearson", "spearman", "kendall"]:
388
- with ps.option_context("compute.ops_on_diff_frames", True):
389
- self.assert_eq(
390
- psser1.corr(psser2, method=method),
391
- pser1.corr(pser2, method=method),
392
- almost=True,
393
- )
394
- self.assert_eq(
395
- psser1.corr(psser2, method=method, min_periods=1),
396
- pser1.corr(pser2, method=method, min_periods=1),
397
- almost=True,
398
- )
399
- self.assert_eq(
400
- psser1.corr(psser2, method=method, min_periods=3),
401
- pser1.corr(pser2, method=method, min_periods=3),
402
- almost=True,
403
- )
404
- self.assert_eq(
405
- (psser1 + 1).corr(psser2 - 2, method=method, min_periods=2),
406
- (pser1 + 1).corr(pser2 - 2, method=method, min_periods=2),
407
- almost=True,
408
- )
409
-
410
- @unittest.skipIf(
411
- LooseVersion(pd.__version__) >= LooseVersion("2.0.0"),
412
- "TODO(SPARK-43497): Enable SeriesTests.test_cov_corr_meta for pandas 2.0.0.",
413
- )
414
- def test_cov_corr_meta(self):
415
- # Disable arrow execution since corr() is using UDT internally which is not supported.
416
- with self.sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
417
- pdf = pd.DataFrame(
418
- {
419
- "a": np.array([1, 2, 3], dtype="i1"),
420
- "b": np.array([1, 2, 3], dtype="i2"),
421
- "c": np.array([1, 2, 3], dtype="i4"),
422
- "d": np.array([1, 2, 3]),
423
- "e": np.array([1.0, 2.0, 3.0], dtype="f4"),
424
- "f": np.array([1.0, 2.0, 3.0]),
425
- "g": np.array([True, False, True]),
426
- "h": np.array(list("abc")),
427
- },
428
- index=pd.Index([1, 2, 3], name="myindex"),
429
- )
430
- psdf = ps.from_pandas(pdf)
431
- self.assert_eq(psdf.corr(), pdf.corr(), check_exact=False)
432
-
433
- def test_stats_on_boolean_dataframe(self):
434
- pdf = pd.DataFrame({"A": [True, False, True], "B": [False, False, True]})
435
- psdf = ps.from_pandas(pdf)
436
-
437
- self.assert_eq(psdf.min(), pdf.min())
438
- self.assert_eq(psdf.max(), pdf.max())
439
- self.assert_eq(psdf.count(), pdf.count())
440
-
441
- self.assert_eq(psdf.sum(), pdf.sum())
442
- self.assert_eq(psdf.product(), pdf.product())
443
- self.assert_eq(psdf.mean(), pdf.mean())
444
-
445
- self.assert_eq(psdf.var(), pdf.var(), check_exact=False)
446
- self.assert_eq(psdf.var(ddof=0), pdf.var(ddof=0), check_exact=False)
447
- self.assert_eq(psdf.std(), pdf.std(), check_exact=False)
448
- self.assert_eq(psdf.std(ddof=0), pdf.std(ddof=0), check_exact=False)
449
- self.assert_eq(psdf.sem(), pdf.sem(), check_exact=False)
450
- self.assert_eq(psdf.sem(ddof=0), pdf.sem(ddof=0), check_exact=False)
451
-
452
- def test_stats_on_boolean_series(self):
453
- pser = pd.Series([True, False, True])
454
- psser = ps.from_pandas(pser)
455
-
456
- self.assert_eq(psser.min(), pser.min())
457
- self.assert_eq(psser.max(), pser.max())
458
- self.assert_eq(psser.count(), pser.count())
459
-
460
- self.assert_eq(psser.sum(), pser.sum())
461
- self.assert_eq(psser.product(), pser.product())
462
- self.assert_eq(psser.mean(), pser.mean())
463
-
464
- self.assert_eq(psser.var(), pser.var(), almost=True)
465
- self.assert_eq(psser.var(ddof=0), pser.var(ddof=0), almost=True)
466
- self.assert_eq(psser.var(ddof=2), pser.var(ddof=2), almost=True)
467
- self.assert_eq(psser.std(), pser.std(), almost=True)
468
- self.assert_eq(psser.std(ddof=0), pser.std(ddof=0), almost=True)
469
- self.assert_eq(psser.std(ddof=2), pser.std(ddof=2), almost=True)
470
- self.assert_eq(psser.sem(), pser.sem(), almost=True)
471
- self.assert_eq(psser.sem(ddof=0), pser.sem(ddof=0), almost=True)
472
- self.assert_eq(psser.sem(ddof=2), pser.sem(ddof=2), almost=True)
473
-
474
- def test_stats_on_non_numeric_columns_should_be_discarded_if_numeric_only_is_true(self):
475
- pdf = pd.DataFrame({"i": [0, 1, 2], "b": [False, False, True], "s": ["x", "y", "z"]})
476
- psdf = ps.from_pandas(pdf)
477
-
478
- self.assert_eq(
479
- psdf[["i", "s"]].max(numeric_only=True), pdf[["i", "s"]].max(numeric_only=True)
480
- )
481
- self.assert_eq(
482
- psdf[["b", "s"]].max(numeric_only=True), pdf[["b", "s"]].max(numeric_only=True)
483
- )
484
- self.assert_eq(
485
- psdf[["i", "s"]].min(numeric_only=True), pdf[["i", "s"]].min(numeric_only=True)
486
- )
487
- self.assert_eq(
488
- psdf[["b", "s"]].min(numeric_only=True), pdf[["b", "s"]].min(numeric_only=True)
489
- )
490
- self.assert_eq(psdf.count(numeric_only=True), pdf.count(numeric_only=True))
491
-
492
- self.assert_eq(psdf.sum(numeric_only=True), pdf.sum(numeric_only=True))
493
- self.assert_eq(psdf.product(numeric_only=True), pdf.product(numeric_only=True))
494
-
495
- self.assert_eq(psdf.mean(numeric_only=True), pdf.mean(numeric_only=True))
496
-
497
- self.assert_eq(psdf.var(numeric_only=True), pdf.var(numeric_only=True), check_exact=False)
498
- self.assert_eq(
499
- psdf.var(ddof=0, numeric_only=True),
500
- pdf.var(ddof=0, numeric_only=True),
501
- check_exact=False,
502
- )
503
- self.assert_eq(
504
- psdf.var(ddof=2, numeric_only=True),
505
- pdf.var(ddof=2, numeric_only=True),
506
- check_exact=False,
507
- )
508
- self.assert_eq(psdf.std(numeric_only=True), pdf.std(numeric_only=True), check_exact=False)
509
- self.assert_eq(
510
- psdf.std(ddof=0, numeric_only=True),
511
- pdf.std(ddof=0, numeric_only=True),
512
- check_exact=False,
513
- )
514
- self.assert_eq(
515
- psdf.std(ddof=2, numeric_only=True),
516
- pdf.std(ddof=2, numeric_only=True),
517
- check_exact=False,
518
- )
519
- self.assert_eq(psdf.sem(numeric_only=True), pdf.sem(numeric_only=True), check_exact=False)
520
- self.assert_eq(
521
- psdf.sem(ddof=0, numeric_only=True),
522
- pdf.sem(ddof=0, numeric_only=True),
523
- check_exact=False,
524
- )
525
- self.assert_eq(
526
- psdf.sem(ddof=2, numeric_only=True),
527
- pdf.sem(ddof=2, numeric_only=True),
528
- check_exact=False,
529
- )
530
-
531
- self.assert_eq(len(psdf.median(numeric_only=True)), len(pdf.median(numeric_only=True)))
532
- self.assert_eq(len(psdf.kurtosis(numeric_only=True)), len(pdf.kurtosis(numeric_only=True)))
533
- self.assert_eq(len(psdf.skew(numeric_only=True)), len(pdf.skew(numeric_only=True)))
534
-
535
- # Boolean was excluded because of a behavior change in NumPy
536
- # https://github.com/numpy/numpy/pull/16273#discussion_r641264085 which pandas inherits
537
- # but this behavior is inconsistent in pandas context.
538
- # Boolean column in quantile tests are excluded for now.
539
- # TODO(SPARK-35555): track and match the behavior of quantile to pandas'
540
- pdf = pd.DataFrame({"i": [0, 1, 2], "s": ["x", "y", "z"]})
541
- psdf = ps.from_pandas(pdf)
542
- self.assert_eq(
543
- len(psdf.quantile(q=0.5, numeric_only=True)),
544
- len(pdf.quantile(q=0.5, numeric_only=True)),
545
- )
546
- self.assert_eq(
547
- len(psdf.quantile(q=[0.25, 0.5, 0.75], numeric_only=True)),
548
- len(pdf.quantile(q=[0.25, 0.5, 0.75], numeric_only=True)),
549
- )
550
-
551
- def test_numeric_only_unsupported(self):
552
- pdf = pd.DataFrame({"i": [0, 1, 2], "b": [False, False, True], "s": ["x", "y", "z"]})
553
- psdf = ps.from_pandas(pdf)
554
-
555
- self.assert_eq(psdf.sum(numeric_only=True), pdf.sum(numeric_only=True))
556
- self.assert_eq(
557
- psdf[["i", "b"]].sum(numeric_only=False), pdf[["i", "b"]].sum(numeric_only=False)
558
- )
559
-
560
- with self.assertRaisesRegex(TypeError, "Could not convert object \\(string\\) to numeric"):
561
- psdf.sum(numeric_only=False)
562
-
563
- with self.assertRaisesRegex(TypeError, "Could not convert object \\(string\\) to numeric"):
564
- psdf.s.sum()
565
-
566
-
567
- class StatsTests(StatsTestsMixin, PandasOnSparkTestCase, SQLTestUtils):
568
- pass
569
-
570
-
571
- if __name__ == "__main__":
572
- import unittest
573
- from pyspark.pandas.tests.test_stats import * # noqa: F401
574
-
575
- try:
576
- import xmlrunner
577
-
578
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
579
- except ImportError:
580
- testRunner = None
581
- unittest.main(testRunner=testRunner, verbosity=2)