snowpark-connect 0.24.0__py3-none-any.whl → 0.26.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (484) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +23 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +22 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/literal.py +13 -2
  7. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +88 -29
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  11. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  12. snowflake/snowpark_connect/expression/map_unresolved_function.py +840 -367
  13. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  14. snowflake/snowpark_connect/hidden_column.py +39 -0
  15. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  16. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  17. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  18. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  19. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  20. snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
  21. snowflake/snowpark_connect/relation/map_extension.py +52 -11
  22. snowflake/snowpark_connect/relation/map_join.py +258 -62
  23. snowflake/snowpark_connect/relation/map_map_partitions.py +9 -4
  24. snowflake/snowpark_connect/relation/map_relation.py +12 -1
  25. snowflake/snowpark_connect/relation/map_row_ops.py +8 -1
  26. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  27. snowflake/snowpark_connect/relation/map_udtf.py +100 -46
  28. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  29. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  30. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  31. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  32. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  33. snowflake/snowpark_connect/relation/utils.py +44 -0
  34. snowflake/snowpark_connect/relation/write/map_write.py +175 -75
  35. snowflake/snowpark_connect/resources_initializer.py +47 -6
  36. snowflake/snowpark_connect/server.py +26 -4
  37. snowflake/snowpark_connect/type_mapping.py +29 -25
  38. snowflake/snowpark_connect/typed_column.py +14 -0
  39. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  40. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  41. snowflake/snowpark_connect/utils/context.py +6 -1
  42. snowflake/snowpark_connect/utils/external_udxf_cache.py +36 -0
  43. snowflake/snowpark_connect/utils/scala_udf_utils.py +596 -0
  44. snowflake/snowpark_connect/utils/session.py +4 -0
  45. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  46. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  47. snowflake/snowpark_connect/utils/udf_utils.py +22 -1
  48. snowflake/snowpark_connect/utils/udtf_utils.py +1 -0
  49. snowflake/snowpark_connect/version.py +1 -1
  50. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/METADATA +1 -1
  51. snowpark_connect-0.26.0.dist-info/RECORD +481 -0
  52. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  56. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  57. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  93. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  94. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  95. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  96. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  97. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  98. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  99. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  100. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  101. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  102. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  103. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  104. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  105. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  106. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  107. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  108. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  109. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  370. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  371. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  439. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  440. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  441. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  442. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  443. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  444. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  445. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  446. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  447. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  448. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  449. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  450. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  451. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  452. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  453. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  466. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  467. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  468. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  469. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  470. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  471. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  472. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  473. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  474. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  475. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  476. snowpark_connect-0.24.0.dist-info/RECORD +0 -898
  477. {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-connect +0 -0
  478. {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-session +0 -0
  479. {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-submit +0 -0
  480. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/WHEEL +0 -0
  481. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE-binary +0 -0
  482. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE.txt +0 -0
  483. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/NOTICE-binary +0 -0
  484. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/top_level.txt +0 -0
@@ -1,1686 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- #
3
- # Licensed to the Apache Software Foundation (ASF) under one or more
4
- # contributor license agreements. See the NOTICE file distributed with
5
- # this work for additional information regarding copyright ownership.
6
- # The ASF licenses this file to You under the Apache License, Version 2.0
7
- # (the "License"); you may not use this file except in compliance with
8
- # the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
- import unittest
19
- import difflib
20
- from itertools import zip_longest
21
-
22
- from pyspark.sql.functions import sha2, to_timestamp
23
- from pyspark.errors import (
24
- AnalysisException,
25
- ParseException,
26
- PySparkAssertionError,
27
- IllegalArgumentException,
28
- SparkUpgradeException,
29
- )
30
- from pyspark.testing.utils import assertDataFrameEqual, assertSchemaEqual, _context_diff, have_numpy
31
- from pyspark.testing.sqlutils import ReusedSQLTestCase
32
- from pyspark.sql import Row
33
- import pyspark.sql.functions as F
34
- from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime
35
- from pyspark.sql.types import (
36
- StringType,
37
- ArrayType,
38
- LongType,
39
- StructType,
40
- MapType,
41
- FloatType,
42
- DoubleType,
43
- StructField,
44
- IntegerType,
45
- BooleanType,
46
- )
47
- from pyspark.testing.sqlutils import have_pandas
48
-
49
-
50
- class UtilsTestsMixin:
51
- def test_assert_equal_inttype(self):
52
- df1 = self.spark.createDataFrame(
53
- data=[
54
- ("1", 1000),
55
- ("2", 3000),
56
- ],
57
- schema=["id", "amount"],
58
- )
59
- df2 = self.spark.createDataFrame(
60
- data=[
61
- ("1", 1000),
62
- ("2", 3000),
63
- ],
64
- schema=["id", "amount"],
65
- )
66
-
67
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
68
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
69
-
70
- def test_assert_equal_arraytype(self):
71
- df1 = self.spark.createDataFrame(
72
- data=[
73
- ("john", ["Python", "Java"]),
74
- ("jane", ["Scala", "SQL", "Java"]),
75
- ],
76
- schema=StructType(
77
- [
78
- StructField("name", StringType(), True),
79
- StructField("languages", ArrayType(StringType()), True),
80
- ]
81
- ),
82
- )
83
- df2 = self.spark.createDataFrame(
84
- data=[
85
- ("john", ["Python", "Java"]),
86
- ("jane", ["Scala", "SQL", "Java"]),
87
- ],
88
- schema=StructType(
89
- [
90
- StructField("name", StringType(), True),
91
- StructField("languages", ArrayType(StringType()), True),
92
- ]
93
- ),
94
- )
95
-
96
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
97
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
98
-
99
- def test_assert_approx_equal_arraytype_float(self):
100
- df1 = self.spark.createDataFrame(
101
- data=[
102
- ("student1", [97.01, 89.23]),
103
- ("student2", [91.86, 84.34]),
104
- ],
105
- schema=StructType(
106
- [
107
- StructField("student", StringType(), True),
108
- StructField("grades", ArrayType(FloatType()), True),
109
- ]
110
- ),
111
- )
112
- df2 = self.spark.createDataFrame(
113
- data=[
114
- ("student1", [97.01, 89.23]),
115
- ("student2", [91.86, 84.339999]),
116
- ],
117
- schema=StructType(
118
- [
119
- StructField("student", StringType(), True),
120
- StructField("grades", ArrayType(FloatType()), True),
121
- ]
122
- ),
123
- )
124
-
125
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
126
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
127
-
128
- def test_assert_approx_equal_arraytype_float_default_rtol_fail(self):
129
- # fails with default rtol, 1e-5
130
- df1 = self.spark.createDataFrame(
131
- data=[
132
- ("student1", [97.01, 89.23]),
133
- ("student2", [91.86, 84.34]),
134
- ],
135
- schema=StructType(
136
- [
137
- StructField("student", StringType(), True),
138
- StructField("grades", ArrayType(FloatType()), True),
139
- ]
140
- ),
141
- )
142
- df2 = self.spark.createDataFrame(
143
- data=[
144
- ("student1", [97.01, 89.23]),
145
- ("student2", [91.86, 84.341]),
146
- ],
147
- schema=StructType(
148
- [
149
- StructField("student", StringType(), True),
150
- StructField("grades", ArrayType(FloatType()), True),
151
- ]
152
- ),
153
- )
154
-
155
- rows_str1 = ""
156
- rows_str2 = ""
157
-
158
- # count different rows
159
- for r1, r2 in list(zip_longest(df1.collect(), df2.collect())):
160
- rows_str1 += str(r1) + "\n"
161
- rows_str2 += str(r2) + "\n"
162
-
163
- generated_diff = _context_diff(
164
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=2
165
- )
166
-
167
- error_msg = "Results do not match: "
168
- percent_diff = (1 / 2) * 100
169
- error_msg += "( %.5f %% )" % percent_diff
170
- error_msg += "\n" + "\n".join(generated_diff)
171
-
172
- with self.assertRaises(PySparkAssertionError) as pe:
173
- assertDataFrameEqual(df1, df2)
174
-
175
- self.check_error(
176
- exception=pe.exception,
177
- error_class="DIFFERENT_ROWS",
178
- message_parameters={"error_msg": error_msg},
179
- )
180
-
181
- with self.assertRaises(PySparkAssertionError) as pe:
182
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
183
-
184
- self.check_error(
185
- exception=pe.exception,
186
- error_class="DIFFERENT_ROWS",
187
- message_parameters={"error_msg": error_msg},
188
- )
189
-
190
- def test_assert_approx_equal_arraytype_float_custom_rtol_pass(self):
191
- # passes with custom rtol, 1e-2
192
- df1 = self.spark.createDataFrame(
193
- data=[
194
- ("student1", [97.01, 89.23]),
195
- ("student2", [91.86, 84.34]),
196
- ],
197
- schema=StructType(
198
- [
199
- StructField("student", StringType(), True),
200
- StructField("grades", ArrayType(FloatType()), True),
201
- ]
202
- ),
203
- )
204
- df2 = self.spark.createDataFrame(
205
- data=[
206
- ("student1", [97.01, 89.23]),
207
- ("student2", [91.86, 84.341]),
208
- ],
209
- schema=StructType(
210
- [
211
- StructField("student", StringType(), True),
212
- StructField("grades", ArrayType(FloatType()), True),
213
- ]
214
- ),
215
- )
216
-
217
- assertDataFrameEqual(df1, df2, rtol=1e-2)
218
-
219
- def test_assert_approx_equal_doubletype_custom_rtol_pass(self):
220
- # passes with custom rtol, 1e-2
221
- df1 = self.spark.createDataFrame(
222
- data=[
223
- ("student1", 97.01),
224
- ("student2", 84.34),
225
- ],
226
- schema=StructType(
227
- [
228
- StructField("student", StringType(), True),
229
- StructField("grade", DoubleType(), True),
230
- ]
231
- ),
232
- )
233
- df2 = self.spark.createDataFrame(
234
- data=[
235
- ("student1", 97.01),
236
- ("student2", 84.341),
237
- ],
238
- schema=StructType(
239
- [
240
- StructField("student", StringType(), True),
241
- StructField("grade", DoubleType(), True),
242
- ]
243
- ),
244
- )
245
-
246
- assertDataFrameEqual(df1, df2, rtol=1e-2)
247
-
248
- def test_assert_approx_equal_decimaltype_custom_rtol_pass(self):
249
- # passes with custom rtol, 1e-2
250
- df1 = self.spark.createDataFrame(
251
- data=[
252
- ("student1", 83.14),
253
- ("student2", 97.12),
254
- ],
255
- schema=StructType(
256
- [
257
- StructField("student", StringType(), True),
258
- StructField("grade", DoubleType(), True),
259
- ]
260
- ),
261
- )
262
- df2 = self.spark.createDataFrame(
263
- data=[
264
- ("student1", 83.14),
265
- ("student2", 97.111),
266
- ],
267
- schema=StructType(
268
- [
269
- StructField("student", StringType(), True),
270
- StructField("grade", DoubleType(), True),
271
- ]
272
- ),
273
- )
274
-
275
- # cast to DecimalType
276
- df1 = df1.withColumn("col_1", F.col("grade").cast("decimal(4,3)"))
277
- df2 = df2.withColumn("col_1", F.col("grade").cast("decimal(4,3)"))
278
-
279
- assertDataFrameEqual(df1, df2, rtol=1e-1)
280
-
281
- def test_assert_notequal_arraytype(self):
282
- df1 = self.spark.createDataFrame(
283
- data=[
284
- ("Amy", ["C++", "Rust"]),
285
- ("John", ["Python", "Java"]),
286
- ("Jane", ["Scala", "SQL", "Java"]),
287
- ],
288
- schema=StructType(
289
- [
290
- StructField("name", StringType(), True),
291
- StructField("languages", ArrayType(StringType()), True),
292
- ]
293
- ),
294
- )
295
- df2 = self.spark.createDataFrame(
296
- data=[
297
- ("Amy", ["C++", "Rust"]),
298
- ("John", ["Python", "Java"]),
299
- ("Jane", ["Scala", "Java"]),
300
- ],
301
- schema=StructType(
302
- [
303
- StructField("name", StringType(), True),
304
- StructField("languages", ArrayType(StringType()), True),
305
- ]
306
- ),
307
- )
308
-
309
- rows_str1 = ""
310
- rows_str2 = ""
311
-
312
- sorted_list1 = sorted(df1.collect(), key=lambda x: str(x))
313
- sorted_list2 = sorted(df2.collect(), key=lambda x: str(x))
314
-
315
- # count different rows
316
- for r1, r2 in list(zip_longest(sorted_list1, sorted_list2)):
317
- rows_str1 += str(r1) + "\n"
318
- rows_str2 += str(r2) + "\n"
319
-
320
- generated_diff = _context_diff(
321
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=3
322
- )
323
-
324
- error_msg = "Results do not match: "
325
- percent_diff = (1 / 3) * 100
326
- error_msg += "( %.5f %% )" % percent_diff
327
- error_msg += "\n" + "\n".join(generated_diff)
328
-
329
- with self.assertRaises(PySparkAssertionError) as pe:
330
- assertDataFrameEqual(df1, df2)
331
-
332
- self.check_error(
333
- exception=pe.exception,
334
- error_class="DIFFERENT_ROWS",
335
- message_parameters={"error_msg": error_msg},
336
- )
337
-
338
- rows_str1 = ""
339
- rows_str2 = ""
340
-
341
- # count different rows
342
- for r1, r2 in list(zip_longest(df1.collect(), df2.collect())):
343
- rows_str1 += str(r1) + "\n"
344
- rows_str2 += str(r2) + "\n"
345
-
346
- generated_diff = _context_diff(
347
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=3
348
- )
349
-
350
- error_msg = "Results do not match: "
351
- percent_diff = (1 / 3) * 100
352
- error_msg += "( %.5f %% )" % percent_diff
353
- error_msg += "\n" + "\n".join(generated_diff)
354
-
355
- with self.assertRaises(PySparkAssertionError) as pe:
356
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
357
-
358
- self.check_error(
359
- exception=pe.exception,
360
- error_class="DIFFERENT_ROWS",
361
- message_parameters={"error_msg": error_msg},
362
- )
363
-
364
- def test_assert_equal_maptype(self):
365
- df1 = self.spark.createDataFrame(
366
- data=[
367
- ("student1", {"id": 222342203655477580}),
368
- ("student2", {"id": 422322203155477692}),
369
- ],
370
- schema=StructType(
371
- [
372
- StructField("student", StringType(), True),
373
- StructField("properties", MapType(StringType(), LongType()), True),
374
- ]
375
- ),
376
- )
377
- df2 = self.spark.createDataFrame(
378
- data=[
379
- ("student1", {"id": 222342203655477580}),
380
- ("student2", {"id": 422322203155477692}),
381
- ],
382
- schema=StructType(
383
- [
384
- StructField("student", StringType(), True),
385
- StructField("properties", MapType(StringType(), LongType()), True),
386
- ]
387
- ),
388
- )
389
-
390
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
391
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
392
-
393
- def test_assert_approx_equal_maptype_double(self):
394
- df1 = self.spark.createDataFrame(
395
- data=[
396
- ("student1", {"math": 76.23, "english": 92.64}),
397
- ("student2", {"math": 87.89, "english": 84.48}),
398
- ],
399
- schema=StructType(
400
- [
401
- StructField("student", StringType(), True),
402
- StructField("grades", MapType(StringType(), DoubleType()), True),
403
- ]
404
- ),
405
- )
406
- df2 = self.spark.createDataFrame(
407
- data=[
408
- ("student1", {"math": 76.23, "english": 92.63999999}),
409
- ("student2", {"math": 87.89, "english": 84.48}),
410
- ],
411
- schema=StructType(
412
- [
413
- StructField("student", StringType(), True),
414
- StructField("grades", MapType(StringType(), DoubleType()), True),
415
- ]
416
- ),
417
- )
418
-
419
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
420
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
421
-
422
- def test_assert_approx_equal_nested_struct_double(self):
423
- df1 = self.spark.createDataFrame(
424
- data=[
425
- ("jane", (64.57, 76.63, 97.81)),
426
- ("john", (93.92, 91.57, 84.36)),
427
- ],
428
- schema=StructType(
429
- [
430
- StructField("name", StringType(), True),
431
- StructField(
432
- "grades",
433
- StructType(
434
- [
435
- StructField("math", DoubleType(), True),
436
- StructField("english", DoubleType(), True),
437
- StructField("biology", DoubleType(), True),
438
- ]
439
- ),
440
- ),
441
- ]
442
- ),
443
- )
444
-
445
- df2 = self.spark.createDataFrame(
446
- data=[
447
- ("jane", (64.57, 76.63, 97.81000001)),
448
- ("john", (93.92, 91.57, 84.36)),
449
- ],
450
- schema=StructType(
451
- [
452
- StructField("name", StringType(), True),
453
- StructField(
454
- "grades",
455
- StructType(
456
- [
457
- StructField("math", DoubleType(), True),
458
- StructField("english", DoubleType(), True),
459
- StructField("biology", DoubleType(), True),
460
- ]
461
- ),
462
- ),
463
- ]
464
- ),
465
- )
466
-
467
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
468
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
469
-
470
- def test_assert_equal_nested_struct_str(self):
471
- df1 = self.spark.createDataFrame(
472
- data=[
473
- (1, ("jane", "anne", "doe")),
474
- (2, ("john", "bob", "smith")),
475
- ],
476
- schema=StructType(
477
- [
478
- StructField("id", IntegerType(), True),
479
- StructField(
480
- "name",
481
- StructType(
482
- [
483
- StructField("first", StringType(), True),
484
- StructField("middle", StringType(), True),
485
- StructField("last", StringType(), True),
486
- ]
487
- ),
488
- ),
489
- ]
490
- ),
491
- )
492
-
493
- df2 = self.spark.createDataFrame(
494
- data=[
495
- (1, ("jane", "anne", "doe")),
496
- (2, ("john", "bob", "smith")),
497
- ],
498
- schema=StructType(
499
- [
500
- StructField("id", IntegerType(), True),
501
- StructField(
502
- "name",
503
- StructType(
504
- [
505
- StructField("first", StringType(), True),
506
- StructField("middle", StringType(), True),
507
- StructField("last", StringType(), True),
508
- ]
509
- ),
510
- ),
511
- ]
512
- ),
513
- )
514
-
515
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
516
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
517
-
518
- def test_assert_equal_nested_struct_str_duplicate(self):
519
- df1 = self.spark.createDataFrame(
520
- data=[
521
- (1, ("jane doe", "jane doe")),
522
- (2, ("john smith", "john smith")),
523
- ],
524
- schema=StructType(
525
- [
526
- StructField("id", IntegerType(), True),
527
- StructField(
528
- "full name",
529
- StructType(
530
- [
531
- StructField("name", StringType(), True),
532
- StructField("name", StringType(), True),
533
- ]
534
- ),
535
- ),
536
- ]
537
- ),
538
- )
539
-
540
- df2 = self.spark.createDataFrame(
541
- data=[
542
- (1, ("jane doe", "jane doe")),
543
- (2, ("john smith", "john smith")),
544
- ],
545
- schema=StructType(
546
- [
547
- StructField("id", IntegerType(), True),
548
- StructField(
549
- "full name",
550
- StructType(
551
- [
552
- StructField("name", StringType(), True),
553
- StructField("name", StringType(), True),
554
- ]
555
- ),
556
- ),
557
- ]
558
- ),
559
- )
560
-
561
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
562
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
563
-
564
- def test_assert_equal_duplicate_col(self):
565
- df1 = self.spark.createDataFrame(
566
- data=[
567
- (1, "Python", 1, 1),
568
- (2, "Scala", 2, 2),
569
- ],
570
- schema=["number", "language", "number", "number"],
571
- )
572
- df2 = self.spark.createDataFrame(
573
- data=[
574
- (1, "Python", 1, 1),
575
- (2, "Scala", 2, 2),
576
- ],
577
- schema=["number", "language", "number", "number"],
578
- )
579
-
580
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
581
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
582
-
583
- def test_assert_equal_timestamp(self):
584
- df1 = self.spark.createDataFrame(
585
- data=[("1", "2023-01-01 12:01:01.000")], schema=["id", "timestamp"]
586
- )
587
-
588
- df2 = self.spark.createDataFrame(
589
- data=[("1", "2023-01-01 12:01:01.000")], schema=["id", "timestamp"]
590
- )
591
-
592
- df1 = df1.withColumn("timestamp", to_timestamp("timestamp"))
593
- df2 = df2.withColumn("timestamp", to_timestamp("timestamp"))
594
-
595
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
596
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
597
-
598
- def test_assert_equal_nullrow(self):
599
- df1 = self.spark.createDataFrame(
600
- data=[
601
- ("1", 1000),
602
- (None, None),
603
- ],
604
- schema=["id", "amount"],
605
- )
606
- df2 = self.spark.createDataFrame(
607
- data=[
608
- ("1", 1000),
609
- (None, None),
610
- ],
611
- schema=["id", "amount"],
612
- )
613
-
614
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
615
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
616
-
617
- def test_assert_notequal_nullval(self):
618
- df1 = self.spark.createDataFrame(
619
- data=[
620
- ("1", 1000),
621
- ("2", 2000),
622
- ],
623
- schema=["id", "amount"],
624
- )
625
- df2 = self.spark.createDataFrame(
626
- data=[
627
- ("1", 1000),
628
- ("2", None),
629
- ],
630
- schema=["id", "amount"],
631
- )
632
-
633
- rows_str1 = ""
634
- rows_str2 = ""
635
-
636
- # count different rows
637
- for r1, r2 in list(zip_longest(df1.collect(), df2.collect())):
638
- rows_str1 += str(r1) + "\n"
639
- rows_str2 += str(r2) + "\n"
640
-
641
- generated_diff = _context_diff(
642
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=2
643
- )
644
-
645
- error_msg = "Results do not match: "
646
- percent_diff = (1 / 2) * 100
647
- error_msg += "( %.5f %% )" % percent_diff
648
- error_msg += "\n" + "\n".join(generated_diff)
649
-
650
- with self.assertRaises(PySparkAssertionError) as pe:
651
- assertDataFrameEqual(df1, df2)
652
-
653
- self.check_error(
654
- exception=pe.exception,
655
- error_class="DIFFERENT_ROWS",
656
- message_parameters={"error_msg": error_msg},
657
- )
658
-
659
- with self.assertRaises(PySparkAssertionError) as pe:
660
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
661
-
662
- self.check_error(
663
- exception=pe.exception,
664
- error_class="DIFFERENT_ROWS",
665
- message_parameters={"error_msg": error_msg},
666
- )
667
-
668
- def test_assert_equal_nulldf(self):
669
- df1 = None
670
- df2 = None
671
-
672
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
673
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
674
-
675
- def test_assert_unequal_null_actual(self):
676
- df1 = None
677
- df2 = self.spark.createDataFrame(
678
- data=[
679
- ("1", 1000),
680
- ("2", 3000),
681
- ],
682
- schema=["id", "amount"],
683
- )
684
-
685
- with self.assertRaises(PySparkAssertionError) as pe:
686
- assertDataFrameEqual(df1, df2)
687
-
688
- self.check_error(
689
- exception=pe.exception,
690
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
691
- message_parameters={
692
- "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
693
- "arg_name": "actual",
694
- "actual_type": None,
695
- },
696
- )
697
-
698
- with self.assertRaises(PySparkAssertionError) as pe:
699
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
700
-
701
- self.check_error(
702
- exception=pe.exception,
703
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
704
- message_parameters={
705
- "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
706
- "arg_name": "actual",
707
- "actual_type": None,
708
- },
709
- )
710
-
711
- def test_assert_unequal_null_expected(self):
712
- df1 = self.spark.createDataFrame(
713
- data=[
714
- ("1", 1000),
715
- ("2", 3000),
716
- ],
717
- schema=["id", "amount"],
718
- )
719
- df2 = None
720
-
721
- with self.assertRaises(PySparkAssertionError) as pe:
722
- assertDataFrameEqual(df1, df2)
723
-
724
- self.check_error(
725
- exception=pe.exception,
726
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
727
- message_parameters={
728
- "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
729
- "arg_name": "expected",
730
- "actual_type": None,
731
- },
732
- )
733
-
734
- with self.assertRaises(PySparkAssertionError) as pe:
735
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
736
-
737
- self.check_error(
738
- exception=pe.exception,
739
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
740
- message_parameters={
741
- "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
742
- "arg_name": "expected",
743
- "actual_type": None,
744
- },
745
- )
746
-
747
- @unittest.skipIf(not have_pandas or not have_numpy, "no pandas or numpy dependency")
748
- def test_assert_equal_exact_pandas_df(self):
749
- import pandas as pd
750
- import numpy as np
751
-
752
- df1 = pd.DataFrame(
753
- data=np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)]), columns=["a", "b", "c"]
754
- )
755
- df2 = pd.DataFrame(
756
- data=np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)]), columns=["a", "b", "c"]
757
- )
758
-
759
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
760
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
761
-
762
- @unittest.skipIf(not have_pandas or not have_numpy, "no pandas or numpy dependency")
763
- def test_assert_approx_equal_pandas_df(self):
764
- import pandas as pd
765
- import numpy as np
766
-
767
- # test that asserts close enough equality for pandas df
768
- df1 = pd.DataFrame(
769
- data=np.array([(1, 2, 3), (4, 5, 6), (7, 8, 59)]), columns=["a", "b", "c"]
770
- )
771
- df2 = pd.DataFrame(
772
- data=np.array([(1, 2, 3), (4, 5, 6), (7, 8, 59.0001)]), columns=["a", "b", "c"]
773
- )
774
-
775
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
776
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
777
-
778
- @unittest.skipIf(not have_pandas or not have_numpy, "no pandas or numpy dependency")
779
- def test_assert_approx_equal_fail_exact_pandas_df(self):
780
- import pandas as pd
781
- import numpy as np
782
-
783
- # test that asserts close enough equality for pandas df
784
- df1 = pd.DataFrame(
785
- data=np.array([(1, 2, 3), (4, 5, 6), (7, 8, 59)]), columns=["a", "b", "c"]
786
- )
787
- df2 = pd.DataFrame(
788
- data=np.array([(1, 2, 3), (4, 5, 6), (7, 8, 59.0001)]), columns=["a", "b", "c"]
789
- )
790
-
791
- with self.assertRaises(PySparkAssertionError) as pe:
792
- assertDataFrameEqual(df1, df2, checkRowOrder=False, rtol=0, atol=0)
793
-
794
- self.check_error(
795
- exception=pe.exception,
796
- error_class="DIFFERENT_PANDAS_DATAFRAME",
797
- message_parameters={
798
- "left": df1.to_string(),
799
- "left_dtype": str(df1.dtypes),
800
- "right": df2.to_string(),
801
- "right_dtype": str(df2.dtypes),
802
- },
803
- )
804
-
805
- with self.assertRaises(PySparkAssertionError) as pe:
806
- assertDataFrameEqual(df1, df2, checkRowOrder=True, rtol=0, atol=0)
807
-
808
- self.check_error(
809
- exception=pe.exception,
810
- error_class="DIFFERENT_PANDAS_DATAFRAME",
811
- message_parameters={
812
- "left": df1.to_string(),
813
- "left_dtype": str(df1.dtypes),
814
- "right": df2.to_string(),
815
- "right_dtype": str(df2.dtypes),
816
- },
817
- )
818
-
819
- @unittest.skipIf(not have_pandas or not have_numpy, "no pandas or numpy dependency")
820
- def test_assert_unequal_pandas_df(self):
821
- import pandas as pd
822
- import numpy as np
823
-
824
- df1 = pd.DataFrame(
825
- data=np.array([(1, 2, 3), (4, 5, 6), (6, 5, 4)]), columns=["a", "b", "c"]
826
- )
827
- df2 = pd.DataFrame(
828
- data=np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)]), columns=["a", "b", "c"]
829
- )
830
-
831
- with self.assertRaises(PySparkAssertionError) as pe:
832
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
833
-
834
- self.check_error(
835
- exception=pe.exception,
836
- error_class="DIFFERENT_PANDAS_DATAFRAME",
837
- message_parameters={
838
- "left": df1.to_string(),
839
- "left_dtype": str(df1.dtypes),
840
- "right": df2.to_string(),
841
- "right_dtype": str(df2.dtypes),
842
- },
843
- )
844
-
845
- with self.assertRaises(PySparkAssertionError) as pe:
846
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
847
-
848
- self.check_error(
849
- exception=pe.exception,
850
- error_class="DIFFERENT_PANDAS_DATAFRAME",
851
- message_parameters={
852
- "left": df1.to_string(),
853
- "left_dtype": str(df1.dtypes),
854
- "right": df2.to_string(),
855
- "right_dtype": str(df2.dtypes),
856
- },
857
- )
858
-
859
- @unittest.skipIf(not have_pandas or not have_numpy, "no pandas or numpy dependency")
860
- def test_assert_type_error_pandas_df(self):
861
- import pyspark.pandas as ps
862
- import pandas as pd
863
- import numpy as np
864
-
865
- df1 = ps.DataFrame(data=[10, 20, 30], columns=["Numbers"])
866
- df2 = pd.DataFrame(
867
- data=np.array([(1, 2, 3), (4, 5, 6), (6, 5, 4)]), columns=["a", "b", "c"]
868
- )
869
-
870
- with self.assertRaises(PySparkAssertionError) as pe:
871
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
872
-
873
- self.check_error(
874
- exception=pe.exception,
875
- error_class="DIFFERENT_PANDAS_DATAFRAME",
876
- message_parameters={
877
- "left": df1.to_string(),
878
- "left_dtype": str(df1.dtypes),
879
- "right": df2.to_string(),
880
- "right_dtype": str(df2.dtypes),
881
- },
882
- )
883
-
884
- with self.assertRaises(PySparkAssertionError) as pe:
885
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
886
-
887
- self.check_error(
888
- exception=pe.exception,
889
- error_class="DIFFERENT_PANDAS_DATAFRAME",
890
- message_parameters={
891
- "left": df1.to_string(),
892
- "left_dtype": str(df1.dtypes),
893
- "right": df2.to_string(),
894
- "right_dtype": str(df2.dtypes),
895
- },
896
- )
897
-
898
- @unittest.skipIf(not have_pandas, "no pandas dependency")
899
- def test_assert_equal_exact_pandas_on_spark_df(self):
900
- import pyspark.pandas as ps
901
-
902
- df1 = ps.DataFrame(data=[10, 20, 30], columns=["Numbers"])
903
- df2 = ps.DataFrame(data=[10, 20, 30], columns=["Numbers"])
904
-
905
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
906
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
907
-
908
- @unittest.skipIf(not have_pandas, "no pandas dependency")
909
- def test_assert_equal_exact_pandas_on_spark_df(self):
910
- import pyspark.pandas as ps
911
-
912
- df1 = ps.DataFrame(data=[10, 20, 30], columns=["Numbers"])
913
- df2 = ps.DataFrame(data=[30, 20, 10], columns=["Numbers"])
914
-
915
- assertDataFrameEqual(df1, df2)
916
-
917
- @unittest.skipIf(not have_pandas, "no pandas dependency")
918
- def test_assert_equal_approx_pandas_on_spark_df(self):
919
- import pyspark.pandas as ps
920
-
921
- df1 = ps.DataFrame(data=[10.0001, 20.32, 30.1], columns=["Numbers"])
922
- df2 = ps.DataFrame(data=[10.0, 20.32, 30.1], columns=["Numbers"])
923
-
924
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
925
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
926
-
927
- @unittest.skipIf(not have_pandas, "no pandas dependency")
928
- def test_assert_error_pandas_pyspark_df(self):
929
- import pyspark.pandas as ps
930
- import pandas as pd
931
-
932
- df1 = ps.DataFrame(data=[10, 20, 30], columns=["Numbers"])
933
- df2 = self.spark.createDataFrame([(10,), (11,), (13,)], ["Numbers"])
934
-
935
- with self.assertRaises(PySparkAssertionError) as pe:
936
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
937
-
938
- self.check_error(
939
- exception=pe.exception,
940
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
941
- message_parameters={
942
- "expected_type": f"{ps.DataFrame.__name__}, "
943
- f"{pd.DataFrame.__name__}, "
944
- f"{ps.Series.__name__}, "
945
- f"{pd.Series.__name__}, "
946
- f"{ps.Index.__name__}"
947
- f"{pd.Index.__name__}, ",
948
- "arg_name": "expected",
949
- "actual_type": type(df2),
950
- },
951
- )
952
-
953
- with self.assertRaises(PySparkAssertionError) as pe:
954
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
955
-
956
- self.check_error(
957
- exception=pe.exception,
958
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
959
- message_parameters={
960
- "expected_type": f"{ps.DataFrame.__name__}, "
961
- f"{pd.DataFrame.__name__}, "
962
- f"{ps.Series.__name__}, "
963
- f"{pd.Series.__name__}, "
964
- f"{ps.Index.__name__}"
965
- f"{pd.Index.__name__}, ",
966
- "arg_name": "expected",
967
- "actual_type": type(df2),
968
- },
969
- )
970
-
971
- def test_assert_error_non_pyspark_df(self):
972
- dict1 = {"a": 1, "b": 2}
973
- dict2 = {"a": 1, "b": 2}
974
-
975
- with self.assertRaises(PySparkAssertionError) as pe:
976
- assertDataFrameEqual(dict1, dict2)
977
-
978
- self.check_error(
979
- exception=pe.exception,
980
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
981
- message_parameters={
982
- "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
983
- "arg_name": "actual",
984
- "actual_type": type(dict1),
985
- },
986
- )
987
-
988
- with self.assertRaises(PySparkAssertionError) as pe:
989
- assertDataFrameEqual(dict1, dict2, checkRowOrder=True)
990
-
991
- self.check_error(
992
- exception=pe.exception,
993
- error_class="INVALID_TYPE_DF_EQUALITY_ARG",
994
- message_parameters={
995
- "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
996
- "arg_name": "actual",
997
- "actual_type": type(dict1),
998
- },
999
- )
1000
-
1001
- def test_row_order_ignored(self):
1002
- # test that row order is ignored (not checked) by default
1003
- df1 = self.spark.createDataFrame(
1004
- data=[
1005
- ("2", 3000.00),
1006
- ("1", 1000.00),
1007
- ],
1008
- schema=["id", "amount"],
1009
- )
1010
- df2 = self.spark.createDataFrame(
1011
- data=[
1012
- ("1", 1000.00),
1013
- ("2", 3000.00),
1014
- ],
1015
- schema=["id", "amount"],
1016
- )
1017
-
1018
- assertDataFrameEqual(df1, df2)
1019
-
1020
- def test_check_row_order_error(self):
1021
- # test checkRowOrder=True
1022
- df1 = self.spark.createDataFrame(
1023
- data=[
1024
- ("2", 3000.00),
1025
- ("1", 1000.00),
1026
- ],
1027
- schema=["id", "amount"],
1028
- )
1029
- df2 = self.spark.createDataFrame(
1030
- data=[
1031
- ("1", 1000.00),
1032
- ("2", 3000.00),
1033
- ],
1034
- schema=["id", "amount"],
1035
- )
1036
-
1037
- rows_str1 = ""
1038
- rows_str2 = ""
1039
-
1040
- # count different rows
1041
- for r1, r2 in list(zip_longest(df1.collect(), df2.collect())):
1042
- rows_str1 += str(r1) + "\n"
1043
- rows_str2 += str(r2) + "\n"
1044
-
1045
- generated_diff = _context_diff(
1046
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=2
1047
- )
1048
-
1049
- error_msg = "Results do not match: "
1050
- percent_diff = (2 / 2) * 100
1051
- error_msg += "( %.5f %% )" % percent_diff
1052
- error_msg += "\n" + "\n".join(generated_diff)
1053
-
1054
- with self.assertRaises(PySparkAssertionError) as pe:
1055
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1056
-
1057
- self.check_error(
1058
- exception=pe.exception,
1059
- error_class="DIFFERENT_ROWS",
1060
- message_parameters={"error_msg": error_msg},
1061
- )
1062
-
1063
- def test_remove_non_word_characters_long(self):
1064
- def remove_non_word_characters(col):
1065
- return F.regexp_replace(col, "[^\\w\\s]+", "")
1066
-
1067
- source_data = [("jo&&se",), ("**li**",), ("#::luisa",), (None,)]
1068
- source_df = self.spark.createDataFrame(source_data, ["name"])
1069
-
1070
- actual_df = source_df.withColumn("clean_name", remove_non_word_characters(F.col("name")))
1071
-
1072
- expected_data = [("jo&&se", "jose"), ("**li**", "li"), ("#::luisa", "luisa"), (None, None)]
1073
- expected_df = self.spark.createDataFrame(expected_data, ["name", "clean_name"])
1074
-
1075
- assertDataFrameEqual(actual_df, expected_df)
1076
-
1077
- def test_assert_pyspark_approx_equal(self):
1078
- df1 = self.spark.createDataFrame(
1079
- data=[
1080
- ("1", 1000.00),
1081
- ("2", 3000.00),
1082
- ],
1083
- schema=["id", "amount"],
1084
- )
1085
- df2 = self.spark.createDataFrame(
1086
- data=[
1087
- ("1", 1000.0000001),
1088
- ("2", 3000.00),
1089
- ],
1090
- schema=["id", "amount"],
1091
- )
1092
-
1093
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1094
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1095
-
1096
- def test_assert_pyspark_approx_equal_custom_rtol(self):
1097
- df1 = self.spark.createDataFrame(
1098
- data=[
1099
- ("1", 1000.00),
1100
- ("2", 3000.00),
1101
- ],
1102
- schema=["id", "amount"],
1103
- )
1104
- df2 = self.spark.createDataFrame(
1105
- data=[
1106
- ("1", 1000.01),
1107
- ("2", 3000.00),
1108
- ],
1109
- schema=["id", "amount"],
1110
- )
1111
-
1112
- assertDataFrameEqual(df1, df2, rtol=1e-2)
1113
-
1114
- def test_assert_pyspark_df_not_equal(self):
1115
- df1 = self.spark.createDataFrame(
1116
- data=[
1117
- ("1", 1000.00),
1118
- ("2", 3000.00),
1119
- ("3", 2000.00),
1120
- ],
1121
- schema=["id", "amount"],
1122
- )
1123
- df2 = self.spark.createDataFrame(
1124
- data=[
1125
- ("1", 1001.00),
1126
- ("2", 3000.00),
1127
- ("3", 2003.00),
1128
- ],
1129
- schema=["id", "amount"],
1130
- )
1131
-
1132
- rows_str1 = ""
1133
- rows_str2 = ""
1134
-
1135
- # count different rows
1136
- for r1, r2 in list(zip_longest(df1.collect(), df2.collect())):
1137
- rows_str1 += str(r1) + "\n"
1138
- rows_str2 += str(r2) + "\n"
1139
-
1140
- generated_diff = _context_diff(
1141
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=3
1142
- )
1143
-
1144
- error_msg = "Results do not match: "
1145
- percent_diff = (2 / 3) * 100
1146
- error_msg += "( %.5f %% )" % percent_diff
1147
- error_msg += "\n" + "\n".join(generated_diff)
1148
-
1149
- with self.assertRaises(PySparkAssertionError) as pe:
1150
- assertDataFrameEqual(df1, df2)
1151
-
1152
- self.check_error(
1153
- exception=pe.exception,
1154
- error_class="DIFFERENT_ROWS",
1155
- message_parameters={"error_msg": error_msg},
1156
- )
1157
-
1158
- with self.assertRaises(PySparkAssertionError) as pe:
1159
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1160
-
1161
- self.check_error(
1162
- exception=pe.exception,
1163
- error_class="DIFFERENT_ROWS",
1164
- message_parameters={"error_msg": error_msg},
1165
- )
1166
-
1167
- def test_assert_notequal_schema(self):
1168
- df1 = self.spark.createDataFrame(
1169
- data=[
1170
- (1, 1000),
1171
- (2, 3000),
1172
- ],
1173
- schema=["id", "number"],
1174
- )
1175
- df2 = self.spark.createDataFrame(
1176
- data=[
1177
- ("1", 1000),
1178
- ("2", 5000),
1179
- ],
1180
- schema=["id", "amount"],
1181
- )
1182
-
1183
- generated_diff = difflib.ndiff(str(df1.schema).splitlines(), str(df2.schema).splitlines())
1184
-
1185
- expected_error_msg = "\n".join(generated_diff)
1186
-
1187
- with self.assertRaises(PySparkAssertionError) as pe:
1188
- assertDataFrameEqual(df1, df2)
1189
-
1190
- self.check_error(
1191
- exception=pe.exception,
1192
- error_class="DIFFERENT_SCHEMA",
1193
- message_parameters={"error_msg": expected_error_msg},
1194
- )
1195
-
1196
- def test_diff_schema_lens(self):
1197
- df1 = self.spark.createDataFrame(
1198
- data=[
1199
- (1, 3000),
1200
- (2, 1000),
1201
- ],
1202
- schema=["id", "amount"],
1203
- )
1204
-
1205
- df2 = self.spark.createDataFrame(
1206
- data=[
1207
- (1, 3000, "a"),
1208
- (2, 1000, "b"),
1209
- ],
1210
- schema=["id", "amount", "letter"],
1211
- )
1212
-
1213
- generated_diff = difflib.ndiff(str(df1.schema).splitlines(), str(df2.schema).splitlines())
1214
-
1215
- expected_error_msg = "\n".join(generated_diff)
1216
-
1217
- with self.assertRaises(PySparkAssertionError) as pe:
1218
- assertDataFrameEqual(df1, df2)
1219
-
1220
- self.check_error(
1221
- exception=pe.exception,
1222
- error_class="DIFFERENT_SCHEMA",
1223
- message_parameters={"error_msg": expected_error_msg},
1224
- )
1225
-
1226
- def test_schema_ignore_nullable(self):
1227
- s1 = StructType(
1228
- [StructField("id", IntegerType(), True), StructField("name", StringType(), True)]
1229
- )
1230
-
1231
- df1 = self.spark.createDataFrame([(1, "jane"), (2, "john")], s1)
1232
-
1233
- s2 = StructType(
1234
- [StructField("id", IntegerType(), True), StructField("name", StringType(), False)]
1235
- )
1236
-
1237
- df2 = self.spark.createDataFrame([(1, "jane"), (2, "john")], s2)
1238
-
1239
- assertDataFrameEqual(df1, df2)
1240
-
1241
- def test_schema_ignore_nullable_array_equal(self):
1242
- s1 = StructType([StructField("names", ArrayType(DoubleType(), True), True)])
1243
- s2 = StructType([StructField("names", ArrayType(DoubleType(), False), False)])
1244
-
1245
- assertSchemaEqual(s1, s2)
1246
-
1247
- def test_schema_ignore_nullable_struct_equal(self):
1248
- s1 = StructType(
1249
- [StructField("names", StructType([StructField("age", IntegerType(), True)]), True)]
1250
- )
1251
- s2 = StructType(
1252
- [StructField("names", StructType([StructField("age", IntegerType(), False)]), False)]
1253
- )
1254
- assertSchemaEqual(s1, s2)
1255
-
1256
- def test_schema_array_unequal(self):
1257
- s1 = StructType([StructField("names", ArrayType(IntegerType(), True), True)])
1258
- s2 = StructType([StructField("names", ArrayType(DoubleType(), False), False)])
1259
-
1260
- generated_diff = difflib.ndiff(str(s1).splitlines(), str(s2).splitlines())
1261
-
1262
- expected_error_msg = "\n".join(generated_diff)
1263
-
1264
- with self.assertRaises(PySparkAssertionError) as pe:
1265
- assertSchemaEqual(s1, s2)
1266
-
1267
- self.check_error(
1268
- exception=pe.exception,
1269
- error_class="DIFFERENT_SCHEMA",
1270
- message_parameters={"error_msg": expected_error_msg},
1271
- )
1272
-
1273
- def test_schema_struct_unequal(self):
1274
- s1 = StructType(
1275
- [StructField("names", StructType([StructField("age", DoubleType(), True)]), True)]
1276
- )
1277
- s2 = StructType(
1278
- [StructField("names", StructType([StructField("age", IntegerType(), True)]), True)]
1279
- )
1280
-
1281
- generated_diff = difflib.ndiff(str(s1).splitlines(), str(s2).splitlines())
1282
-
1283
- expected_error_msg = "\n".join(generated_diff)
1284
-
1285
- with self.assertRaises(PySparkAssertionError) as pe:
1286
- assertSchemaEqual(s1, s2)
1287
-
1288
- self.check_error(
1289
- exception=pe.exception,
1290
- error_class="DIFFERENT_SCHEMA",
1291
- message_parameters={"error_msg": expected_error_msg},
1292
- )
1293
-
1294
- def test_schema_more_nested_struct_unequal(self):
1295
- s1 = StructType(
1296
- [
1297
- StructField(
1298
- "name",
1299
- StructType(
1300
- [
1301
- StructField("firstname", StringType(), True),
1302
- StructField("middlename", StringType(), True),
1303
- StructField("lastname", StringType(), True),
1304
- ]
1305
- ),
1306
- ),
1307
- ]
1308
- )
1309
-
1310
- s2 = StructType(
1311
- [
1312
- StructField(
1313
- "name",
1314
- StructType(
1315
- [
1316
- StructField("firstname", StringType(), True),
1317
- StructField("middlename", BooleanType(), True),
1318
- StructField("lastname", StringType(), True),
1319
- ]
1320
- ),
1321
- ),
1322
- ]
1323
- )
1324
-
1325
- generated_diff = difflib.ndiff(str(s1).splitlines(), str(s2).splitlines())
1326
-
1327
- expected_error_msg = "\n".join(generated_diff)
1328
-
1329
- with self.assertRaises(PySparkAssertionError) as pe:
1330
- assertSchemaEqual(s1, s2)
1331
-
1332
- self.check_error(
1333
- exception=pe.exception,
1334
- error_class="DIFFERENT_SCHEMA",
1335
- message_parameters={"error_msg": expected_error_msg},
1336
- )
1337
-
1338
- def test_schema_unsupported_type(self):
1339
- s1 = "names: int"
1340
- s2 = "names: int"
1341
-
1342
- with self.assertRaises(PySparkAssertionError) as pe:
1343
- assertSchemaEqual(s1, s2)
1344
-
1345
- self.check_error(
1346
- exception=pe.exception,
1347
- error_class="UNSUPPORTED_DATA_TYPE",
1348
- message_parameters={"data_type": type(s1)},
1349
- )
1350
-
1351
- def test_spark_sql(self):
1352
- assertDataFrameEqual(self.spark.sql("select 1 + 2 AS x"), self.spark.sql("select 3 AS x"))
1353
- assertDataFrameEqual(
1354
- self.spark.sql("select 1 + 2 AS x"),
1355
- self.spark.sql("select 3 AS x"),
1356
- checkRowOrder=True,
1357
- )
1358
-
1359
- def test_spark_sql_sort_rows(self):
1360
- df1 = self.spark.createDataFrame(
1361
- data=[
1362
- (1, 3000),
1363
- (2, 1000),
1364
- ],
1365
- schema=["id", "amount"],
1366
- )
1367
-
1368
- df2 = self.spark.createDataFrame(
1369
- data=[
1370
- (2, 1000),
1371
- (1, 3000),
1372
- ],
1373
- schema=["id", "amount"],
1374
- )
1375
-
1376
- df1.createOrReplaceTempView("df1")
1377
- df2.createOrReplaceTempView("df2")
1378
-
1379
- assertDataFrameEqual(
1380
- self.spark.sql("select * from df1 order by amount"), self.spark.sql("select * from df2")
1381
- )
1382
-
1383
- assertDataFrameEqual(
1384
- self.spark.sql("select * from df1 order by amount"),
1385
- self.spark.sql("select * from df2"),
1386
- checkRowOrder=True,
1387
- )
1388
-
1389
- def test_empty_dataset(self):
1390
- df1 = self.spark.range(0, 10).limit(0)
1391
-
1392
- df2 = self.spark.range(0, 10).limit(0)
1393
-
1394
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1395
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1396
-
1397
- def test_no_column(self):
1398
- df1 = self.spark.range(0, 10).drop("id")
1399
-
1400
- df2 = self.spark.range(0, 10).drop("id")
1401
-
1402
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1403
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1404
-
1405
- def test_empty_no_column(self):
1406
- df1 = self.spark.range(0, 10).drop("id").limit(0)
1407
-
1408
- df2 = self.spark.range(0, 10).drop("id").limit(0)
1409
-
1410
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1411
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1412
-
1413
- def test_empty_expected_list(self):
1414
- df1 = self.spark.range(0, 5).drop("id")
1415
-
1416
- df2 = [Row(), Row(), Row(), Row(), Row()]
1417
-
1418
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1419
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1420
-
1421
- def test_no_column_expected_list(self):
1422
- df1 = self.spark.range(0, 10).limit(0)
1423
-
1424
- df2 = []
1425
-
1426
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1427
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1428
-
1429
- def test_empty_no_column_expected_list(self):
1430
- df1 = self.spark.range(0, 10).drop("id").limit(0)
1431
-
1432
- df2 = []
1433
-
1434
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1435
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1436
-
1437
- def test_special_vals(self):
1438
- df1 = self.spark.createDataFrame(
1439
- data=[
1440
- (1, float("nan")),
1441
- (2, float("inf")),
1442
- (2, float("-inf")),
1443
- ],
1444
- schema=["id", "amount"],
1445
- )
1446
-
1447
- df2 = self.spark.createDataFrame(
1448
- data=[
1449
- (1, float("nan")),
1450
- (2, float("inf")),
1451
- (2, float("-inf")),
1452
- ],
1453
- schema=["id", "amount"],
1454
- )
1455
-
1456
- assertDataFrameEqual(df1, df2, checkRowOrder=False)
1457
- assertDataFrameEqual(df1, df2, checkRowOrder=True)
1458
-
1459
- def test_df_list_row_equal(self):
1460
- df1 = self.spark.createDataFrame(
1461
- data=[
1462
- (1, 3000),
1463
- (2, 1000),
1464
- ],
1465
- schema=["id", "amount"],
1466
- )
1467
-
1468
- list_of_rows = [Row(1, 3000), Row(2, 1000)]
1469
-
1470
- assertDataFrameEqual(df1, list_of_rows, checkRowOrder=False)
1471
- assertDataFrameEqual(df1, list_of_rows, checkRowOrder=True)
1472
-
1473
- def test_list_rows_equal(self):
1474
- list_of_rows1 = [Row(1, "abc", 5000), Row(2, "def", 1000)]
1475
- list_of_rows2 = [Row(1, "abc", 5000), Row(2, "def", 1000)]
1476
-
1477
- assertDataFrameEqual(list_of_rows1, list_of_rows2, checkRowOrder=False)
1478
- assertDataFrameEqual(list_of_rows1, list_of_rows2, checkRowOrder=True)
1479
-
1480
- def test_list_rows_unequal(self):
1481
- list_of_rows1 = [Row(1, "abc", 5000), Row(2, "def", 1000)]
1482
- list_of_rows2 = [Row(1, "abc", 5000), Row(2, "defg", 1000)]
1483
-
1484
- rows_str1 = ""
1485
- rows_str2 = ""
1486
-
1487
- # count different rows
1488
- for r1, r2 in list(zip_longest(list_of_rows1, list_of_rows2)):
1489
- rows_str1 += str(r1) + "\n"
1490
- rows_str2 += str(r2) + "\n"
1491
-
1492
- generated_diff = _context_diff(
1493
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=2
1494
- )
1495
-
1496
- error_msg = "Results do not match: "
1497
- percent_diff = (1 / 2) * 100
1498
- error_msg += "( %.5f %% )" % percent_diff
1499
- error_msg += "\n" + "\n".join(generated_diff)
1500
-
1501
- with self.assertRaises(PySparkAssertionError) as pe:
1502
- assertDataFrameEqual(list_of_rows1, list_of_rows2)
1503
-
1504
- self.check_error(
1505
- exception=pe.exception,
1506
- error_class="DIFFERENT_ROWS",
1507
- message_parameters={"error_msg": error_msg},
1508
- )
1509
-
1510
- with self.assertRaises(PySparkAssertionError) as pe:
1511
- assertDataFrameEqual(list_of_rows1, list_of_rows2, checkRowOrder=True)
1512
-
1513
- self.check_error(
1514
- exception=pe.exception,
1515
- error_class="DIFFERENT_ROWS",
1516
- message_parameters={"error_msg": error_msg},
1517
- )
1518
-
1519
- def test_list_row_unequal_schema(self):
1520
- df1 = self.spark.createDataFrame(
1521
- data=[
1522
- (1, 3000),
1523
- (2, 1000),
1524
- (3, 10),
1525
- ],
1526
- schema=["id", "amount"],
1527
- )
1528
-
1529
- list_of_rows = [Row(id=1, amount=300), Row(id=2, amount=100), Row(id=3, amount=10)]
1530
-
1531
- rows_str1 = ""
1532
- rows_str2 = ""
1533
-
1534
- # count different rows
1535
- for r1, r2 in list(zip_longest(df1, list_of_rows)):
1536
- rows_str1 += str(r1) + "\n"
1537
- rows_str2 += str(r2) + "\n"
1538
-
1539
- generated_diff = _context_diff(
1540
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=3
1541
- )
1542
-
1543
- error_msg = "Results do not match: "
1544
- percent_diff = (2 / 3) * 100
1545
- error_msg += "( %.5f %% )" % percent_diff
1546
- error_msg += "\n" + "\n".join(generated_diff)
1547
-
1548
- with self.assertRaises(PySparkAssertionError) as pe:
1549
- assertDataFrameEqual(df1, list_of_rows)
1550
-
1551
- self.check_error(
1552
- exception=pe.exception,
1553
- error_class="DIFFERENT_ROWS",
1554
- message_parameters={"error_msg": error_msg},
1555
- )
1556
-
1557
- with self.assertRaises(PySparkAssertionError) as pe:
1558
- assertDataFrameEqual(df1, list_of_rows, checkRowOrder=True)
1559
-
1560
- self.check_error(
1561
- exception=pe.exception,
1562
- error_class="DIFFERENT_ROWS",
1563
- message_parameters={"error_msg": error_msg},
1564
- )
1565
-
1566
- def test_list_row_unequal_schema(self):
1567
- from pyspark.sql import Row
1568
-
1569
- df1 = self.spark.createDataFrame(
1570
- data=[
1571
- (1, 3000),
1572
- (2, 1000),
1573
- ],
1574
- schema=["id", "amount"],
1575
- )
1576
-
1577
- list_of_rows = [Row(1, "3000"), Row(2, "1000")]
1578
-
1579
- rows_str1 = ""
1580
- rows_str2 = ""
1581
-
1582
- # count different rows
1583
- for r1, r2 in list(zip_longest(df1.collect(), list_of_rows)):
1584
- rows_str1 += str(r1) + "\n"
1585
- rows_str2 += str(r2) + "\n"
1586
-
1587
- generated_diff = _context_diff(
1588
- actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=2
1589
- )
1590
-
1591
- error_msg = "Results do not match: "
1592
- percent_diff = (2 / 2) * 100
1593
- error_msg += "( %.5f %% )" % percent_diff
1594
- error_msg += "\n" + "\n".join(generated_diff)
1595
-
1596
- with self.assertRaises(PySparkAssertionError) as pe:
1597
- assertDataFrameEqual(df1, list_of_rows)
1598
-
1599
- self.check_error(
1600
- exception=pe.exception,
1601
- error_class="DIFFERENT_ROWS",
1602
- message_parameters={"error_msg": error_msg},
1603
- )
1604
-
1605
- with self.assertRaises(PySparkAssertionError) as pe:
1606
- assertDataFrameEqual(df1, list_of_rows, checkRowOrder=True)
1607
-
1608
- self.check_error(
1609
- exception=pe.exception,
1610
- error_class="DIFFERENT_ROWS",
1611
- message_parameters={"error_msg": error_msg},
1612
- )
1613
-
1614
- def test_assert_data_frame_equal_not_support_streaming(self):
1615
- df1 = self.spark.readStream.format("rate").load()
1616
- df2 = self.spark.readStream.format("rate").load()
1617
- exception_thrown = False
1618
- try:
1619
- assertDataFrameEqual(df1, df2)
1620
- except PySparkAssertionError as e:
1621
- self.assertEqual(e.getErrorClass(), "UNSUPPORTED_OPERATION")
1622
- exception_thrown = True
1623
-
1624
- self.assertTrue(exception_thrown)
1625
-
1626
-
1627
- class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin):
1628
- def test_capture_analysis_exception(self):
1629
- self.assertRaises(AnalysisException, lambda: self.spark.sql("select abc"))
1630
- self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
1631
-
1632
- def test_capture_user_friendly_exception(self):
1633
- try:
1634
- self.spark.sql("select `中文字段`")
1635
- except AnalysisException as e:
1636
- self.assertRegex(str(e), ".*UNRESOLVED_COLUMN.*`中文字段`.*")
1637
-
1638
- def test_spark_upgrade_exception(self):
1639
- # SPARK-32161 : Test case to Handle SparkUpgradeException in pythonic way
1640
- df = self.spark.createDataFrame([("2014-31-12",)], ["date_str"])
1641
- df2 = df.select(
1642
- "date_str", to_date(from_unixtime(unix_timestamp("date_str", "yyyy-dd-aa")))
1643
- )
1644
- self.assertRaises(SparkUpgradeException, df2.collect)
1645
-
1646
- def test_capture_parse_exception(self):
1647
- self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
1648
-
1649
- def test_capture_illegalargument_exception(self):
1650
- self.assertRaisesRegex(
1651
- IllegalArgumentException,
1652
- "Setting negative mapred.reduce.tasks",
1653
- lambda: self.spark.sql("SET mapred.reduce.tasks=-1"),
1654
- )
1655
- df = self.spark.createDataFrame([(1, 2)], ["a", "b"])
1656
- self.assertRaisesRegex(
1657
- IllegalArgumentException,
1658
- "1024 is not in the permitted values",
1659
- lambda: df.select(sha2(df.a, 1024)).collect(),
1660
- )
1661
- try:
1662
- df.select(sha2(df.a, 1024)).collect()
1663
- except IllegalArgumentException as e:
1664
- self.assertRegex(e.desc, "1024 is not in the permitted values")
1665
- self.assertRegex(e.stackTrace, "org.apache.spark.sql.functions")
1666
-
1667
- def test_get_error_class_state(self):
1668
- # SPARK-36953: test CapturedException.getErrorClass and getSqlState (from SparkThrowable)
1669
- try:
1670
- self.spark.sql("""SELECT a""")
1671
- except AnalysisException as e:
1672
- self.assertEquals(e.getErrorClass(), "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION")
1673
- self.assertEquals(e.getSqlState(), "42703")
1674
-
1675
-
1676
- if __name__ == "__main__":
1677
- import unittest
1678
- from pyspark.sql.tests.test_utils import * # noqa: F401
1679
-
1680
- try:
1681
- import xmlrunner
1682
-
1683
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
1684
- except ImportError:
1685
- testRunner = None
1686
- unittest.main(testRunner=testRunner, verbosity=2)