snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (476) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/function_defaults.py +207 -0
  7. snowflake/snowpark_connect/expression/literal.py +18 -2
  8. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  9. snowflake/snowpark_connect/expression/map_expression.py +10 -1
  10. snowflake/snowpark_connect/expression/map_extension.py +12 -2
  11. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  12. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  13. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  14. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  15. snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
  16. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  17. snowflake/snowpark_connect/hidden_column.py +39 -0
  18. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  19. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  20. snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
  21. snowflake/snowpark_connect/relation/map_extension.py +56 -15
  22. snowflake/snowpark_connect/relation/map_join.py +258 -62
  23. snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
  24. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  25. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  26. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  27. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  28. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  29. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  30. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  31. snowflake/snowpark_connect/relation/read/utils.py +6 -7
  32. snowflake/snowpark_connect/relation/utils.py +1 -170
  33. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  34. snowflake/snowpark_connect/resources_initializer.py +29 -1
  35. snowflake/snowpark_connect/server.py +18 -3
  36. snowflake/snowpark_connect/type_mapping.py +29 -25
  37. snowflake/snowpark_connect/typed_column.py +14 -0
  38. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  39. snowflake/snowpark_connect/utils/context.py +6 -1
  40. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  41. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  42. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  43. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  44. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  45. snowflake/snowpark_connect/version.py +1 -1
  46. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  47. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  48. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  52. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  53. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  93. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  94. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  95. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  96. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  97. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  98. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  100. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  101. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  102. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  103. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  104. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  105. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  362. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  439. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  440. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  444. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  445. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  466. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  467. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  468. snowpark_connect-0.23.0.dist-info/RECORD +0 -893
  469. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  470. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  471. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  472. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  473. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  474. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  475. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  476. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,1042 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
-
18
- import decimal
19
- import datetime
20
-
21
- from pyspark.sql.types import (
22
- Row,
23
- StructField,
24
- StructType,
25
- MapType,
26
- NullType,
27
- DateType,
28
- TimestampType,
29
- TimestampNTZType,
30
- ByteType,
31
- BinaryType,
32
- ShortType,
33
- IntegerType,
34
- FloatType,
35
- DayTimeIntervalType,
36
- StringType,
37
- DoubleType,
38
- LongType,
39
- DecimalType,
40
- BooleanType,
41
- )
42
- from pyspark.errors import PySparkTypeError, PySparkValueError
43
- from pyspark.errors.exceptions.connect import SparkConnectException
44
- from pyspark.testing.connectutils import should_test_connect
45
- from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
46
-
47
-
48
- if should_test_connect:
49
- import pandas as pd
50
- from pyspark.sql import functions as SF
51
- from pyspark.sql.connect import functions as CF
52
- from pyspark.sql.connect.column import Column
53
- from pyspark.sql.connect.expressions import DistributedSequenceID, LiteralExpression
54
- from pyspark.sql.connect.types import (
55
- JVM_BYTE_MIN,
56
- JVM_BYTE_MAX,
57
- JVM_SHORT_MIN,
58
- JVM_SHORT_MAX,
59
- JVM_INT_MIN,
60
- JVM_INT_MAX,
61
- JVM_LONG_MIN,
62
- JVM_LONG_MAX,
63
- )
64
-
65
-
66
- class SparkConnectColumnTests(SparkConnectSQLTestCase):
67
- def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
68
- from pyspark.sql.dataframe import DataFrame as SDF
69
- from pyspark.sql.connect.dataframe import DataFrame as CDF
70
-
71
- assert isinstance(df1, (SDF, CDF))
72
- if isinstance(df1, SDF):
73
- str1 = df1._jdf.showString(n, truncate, False)
74
- else:
75
- str1 = df1._show_string(n, truncate, False)
76
-
77
- assert isinstance(df2, (SDF, CDF))
78
- if isinstance(df2, SDF):
79
- str2 = df2._jdf.showString(n, truncate, False)
80
- else:
81
- str2 = df2._show_string(n, truncate, False)
82
-
83
- self.assertEqual(str1, str2)
84
-
85
- def test_column_operator(self):
86
- # SPARK-41351: Column needs to support !=
87
- df = self.connect.range(10)
88
- self.assertEqual(9, len(df.filter(df.id != CF.lit(1)).collect()))
89
-
90
- def test_columns(self):
91
- # SPARK-41036: test `columns` API for python client.
92
- df = self.connect.read.table(self.tbl_name)
93
- df2 = self.spark.read.table(self.tbl_name)
94
- self.assertEqual(["id", "name"], df.columns)
95
-
96
- self.assert_eq(
97
- df.filter(df.name.rlike("20")).toPandas(), df2.filter(df2.name.rlike("20")).toPandas()
98
- )
99
- self.assert_eq(
100
- df.filter(df.name.like("20")).toPandas(), df2.filter(df2.name.like("20")).toPandas()
101
- )
102
- self.assert_eq(
103
- df.filter(df.name.ilike("20")).toPandas(), df2.filter(df2.name.ilike("20")).toPandas()
104
- )
105
- self.assert_eq(
106
- df.filter(df.name.contains("20")).toPandas(),
107
- df2.filter(df2.name.contains("20")).toPandas(),
108
- )
109
- self.assert_eq(
110
- df.filter(df.name.startswith("2")).toPandas(),
111
- df2.filter(df2.name.startswith("2")).toPandas(),
112
- )
113
- self.assert_eq(
114
- df.filter(df.name.endswith("0")).toPandas(),
115
- df2.filter(df2.name.endswith("0")).toPandas(),
116
- )
117
- self.assert_eq(
118
- df.select(df.name.substr(0, 1).alias("col")).toPandas(),
119
- df2.select(df2.name.substr(0, 1).alias("col")).toPandas(),
120
- )
121
- self.assert_eq(
122
- df.select(df.name.substr(0, 1).name("col")).toPandas(),
123
- df2.select(df2.name.substr(0, 1).name("col")).toPandas(),
124
- )
125
- df3 = self.connect.sql("SELECT cast(null as int) as name")
126
- df4 = self.spark.sql("SELECT cast(null as int) as name")
127
- self.assert_eq(
128
- df3.filter(df3.name.isNull()).toPandas(),
129
- df4.filter(df4.name.isNull()).toPandas(),
130
- )
131
- self.assert_eq(
132
- df3.filter(df3.name.isNotNull()).toPandas(),
133
- df4.filter(df4.name.isNotNull()).toPandas(),
134
- )
135
-
136
- # check error
137
- with self.assertRaises(PySparkTypeError) as pe:
138
- df.name.substr(df.id, 10)
139
-
140
- self.check_error(
141
- exception=pe.exception,
142
- error_class="NOT_SAME_TYPE",
143
- message_parameters={
144
- "arg_name1": "startPos",
145
- "arg_name2": "length",
146
- "arg_type1": "Column",
147
- "arg_type2": "int",
148
- },
149
- )
150
-
151
- with self.assertRaises(PySparkTypeError) as pe:
152
- df.name.substr(10.5, 10.5)
153
-
154
- self.check_error(
155
- exception=pe.exception,
156
- error_class="NOT_COLUMN_OR_INT",
157
- message_parameters={
158
- "arg_name": "length",
159
- "arg_type": "float",
160
- },
161
- )
162
-
163
- def test_column_with_null(self):
164
- # SPARK-41751: test isNull, isNotNull, eqNullSafe
165
-
166
- query = """
167
- SELECT * FROM VALUES
168
- (1, 1, NULL), (2, NULL, NULL), (3, 3, 1)
169
- AS tab(a, b, c)
170
- """
171
-
172
- # +---+----+----+
173
- # | a| b| c|
174
- # +---+----+----+
175
- # | 1| 1|NULL|
176
- # | 2|NULL|NULL|
177
- # | 3| 3| 1|
178
- # +---+----+----+
179
-
180
- cdf = self.connect.sql(query)
181
- sdf = self.spark.sql(query)
182
-
183
- # test isNull
184
- self.assert_eq(
185
- cdf.select(cdf.a.isNull(), cdf["b"].isNull(), CF.col("c").isNull()).toPandas(),
186
- sdf.select(sdf.a.isNull(), sdf["b"].isNull(), SF.col("c").isNull()).toPandas(),
187
- )
188
-
189
- # test isNotNull
190
- self.assert_eq(
191
- cdf.select(cdf.a.isNotNull(), cdf["b"].isNotNull(), CF.col("c").isNotNull()).toPandas(),
192
- sdf.select(sdf.a.isNotNull(), sdf["b"].isNotNull(), SF.col("c").isNotNull()).toPandas(),
193
- )
194
-
195
- # test eqNullSafe
196
- self.assert_eq(
197
- cdf.select(cdf.a.eqNullSafe(cdf.b), cdf["b"].eqNullSafe(CF.col("c"))).toPandas(),
198
- sdf.select(sdf.a.eqNullSafe(sdf.b), sdf["b"].eqNullSafe(SF.col("c"))).toPandas(),
199
- )
200
-
201
- def test_invalid_ops(self):
202
- query = """
203
- SELECT * FROM VALUES
204
- (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
205
- AS tab(a, b, c, d)
206
- """
207
- cdf = self.connect.sql(query)
208
-
209
- with self.assertRaisesRegex(
210
- ValueError,
211
- "Cannot apply 'in' operator against a column",
212
- ):
213
- 1 in cdf.a
214
-
215
- with self.assertRaisesRegex(
216
- ValueError,
217
- "Cannot convert column into bool",
218
- ):
219
- cdf.a > 2 and cdf.b < 1
220
-
221
- with self.assertRaisesRegex(
222
- ValueError,
223
- "Cannot convert column into bool",
224
- ):
225
- cdf.a > 2 or cdf.b < 1
226
-
227
- with self.assertRaisesRegex(
228
- ValueError,
229
- "Cannot convert column into bool",
230
- ):
231
- not (cdf.a > 2)
232
-
233
- with self.assertRaisesRegex(
234
- TypeError,
235
- "Column is not iterable",
236
- ):
237
- for x in cdf.a:
238
- pass
239
-
240
- def test_datetime(self):
241
- query = """
242
- SELECT * FROM VALUES
243
- (TIMESTAMP('2022-12-22 15:50:00'), DATE('2022-12-25'), 1.1),
244
- (TIMESTAMP('2022-12-22 18:50:00'), NULL, 2.2),
245
- (TIMESTAMP('2022-12-23 15:50:00'), DATE('2022-12-24'), 3.3),
246
- (NULL, DATE('2022-12-22'), NULL)
247
- AS tab(a, b, c)
248
- """
249
- # +-------------------+----------+----+
250
- # | a| b| c|
251
- # +-------------------+----------+----+
252
- # |2022-12-22 15:50:00|2022-12-25| 1.1|
253
- # |2022-12-22 18:50:00| NULL| 2.2|
254
- # |2022-12-23 15:50:00|2022-12-24| 3.3|
255
- # | NULL|2022-12-22|NULL|
256
- # +-------------------+----------+----+
257
-
258
- cdf = self.connect.sql(query)
259
- sdf = self.spark.sql(query)
260
-
261
- # datetime.date
262
- self.assert_eq(
263
- cdf.select(cdf.a < datetime.date(2022, 12, 23)).toPandas(),
264
- sdf.select(sdf.a < datetime.date(2022, 12, 23)).toPandas(),
265
- )
266
- self.assert_eq(
267
- cdf.select(cdf.a != datetime.date(2022, 12, 23)).toPandas(),
268
- sdf.select(sdf.a != datetime.date(2022, 12, 23)).toPandas(),
269
- )
270
- self.assert_eq(
271
- cdf.select(cdf.a == datetime.date(2022, 12, 22)).toPandas(),
272
- sdf.select(sdf.a == datetime.date(2022, 12, 22)).toPandas(),
273
- )
274
- self.assert_eq(
275
- cdf.select(cdf.b < datetime.date(2022, 12, 23)).toPandas(),
276
- sdf.select(sdf.b < datetime.date(2022, 12, 23)).toPandas(),
277
- )
278
- self.assert_eq(
279
- cdf.select(cdf.b >= datetime.date(2022, 12, 23)).toPandas(),
280
- sdf.select(sdf.b >= datetime.date(2022, 12, 23)).toPandas(),
281
- )
282
-
283
- # datetime.datetime
284
- self.assert_eq(
285
- cdf.select(cdf.a < datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
286
- sdf.select(sdf.a < datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
287
- )
288
- self.assert_eq(
289
- cdf.select(cdf.a > datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
290
- sdf.select(sdf.a > datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
291
- )
292
- self.assert_eq(
293
- cdf.select(cdf.b >= datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
294
- sdf.select(sdf.b >= datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
295
- )
296
- self.assert_eq(
297
- cdf.select(cdf.b < datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
298
- sdf.select(sdf.b < datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
299
- )
300
-
301
- def test_decimal(self):
302
- # SPARK-41701: test decimal
303
- query = """
304
- SELECT * FROM VALUES
305
- (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
306
- AS tab(a, b, c, d)
307
- """
308
- # +---+----+---+----+
309
- # | a| b| c| d|
310
- # +---+----+---+----+
311
- # | 1| 1| 0|NULL|
312
- # | 2|NULL| 1| 2.0|
313
- # | 3| 3| 4| 3.5|
314
- # +---+----+---+----+
315
-
316
- cdf = self.connect.sql(query)
317
- sdf = self.spark.sql(query)
318
-
319
- self.assert_eq(
320
- cdf.select(cdf.a < decimal.Decimal(3)).toPandas(),
321
- sdf.select(sdf.a < decimal.Decimal(3)).toPandas(),
322
- )
323
- self.assert_eq(
324
- cdf.select(cdf.a != decimal.Decimal(2)).toPandas(),
325
- sdf.select(sdf.a != decimal.Decimal(2)).toPandas(),
326
- )
327
- self.assert_eq(
328
- cdf.select(cdf.a == decimal.Decimal(2)).toPandas(),
329
- sdf.select(sdf.a == decimal.Decimal(2)).toPandas(),
330
- )
331
- self.assert_eq(
332
- cdf.select(cdf.b < decimal.Decimal(2.5)).toPandas(),
333
- sdf.select(sdf.b < decimal.Decimal(2.5)).toPandas(),
334
- )
335
- self.assert_eq(
336
- cdf.select(cdf.d >= decimal.Decimal(3.0)).toPandas(),
337
- sdf.select(sdf.d >= decimal.Decimal(3.0)).toPandas(),
338
- )
339
-
340
- def test_none(self):
341
- # SPARK-41783: test none
342
-
343
- query = """
344
- SELECT * FROM VALUES
345
- (1, 1, NULL), (2, NULL, 1), (NULL, 3, 4)
346
- AS tab(a, b, c)
347
- """
348
-
349
- # +----+----+----+
350
- # | a| b| c|
351
- # +----+----+----+
352
- # | 1| 1|NULL|
353
- # | 2|NULL| 1|
354
- # |NULL| 3| 4|
355
- # +----+----+----+
356
-
357
- cdf = self.connect.sql(query)
358
- sdf = self.spark.sql(query)
359
-
360
- self.assert_eq(
361
- cdf.select(cdf.b > None, CF.col("c") >= None).toPandas(),
362
- sdf.select(sdf.b > None, SF.col("c") >= None).toPandas(),
363
- )
364
- self.assert_eq(
365
- cdf.select(cdf.b < None, CF.col("c") <= None).toPandas(),
366
- sdf.select(sdf.b < None, SF.col("c") <= None).toPandas(),
367
- )
368
- self.assert_eq(
369
- cdf.select(cdf.b.eqNullSafe(None), CF.col("c").eqNullSafe(None)).toPandas(),
370
- sdf.select(sdf.b.eqNullSafe(None), SF.col("c").eqNullSafe(None)).toPandas(),
371
- )
372
-
373
- def test_simple_binary_expressions(self):
374
- """Test complex expression"""
375
- cdf = self.connect.read.table(self.tbl_name)
376
- pdf = (
377
- cdf.select(cdf.id).where(cdf.id % CF.lit(30) == CF.lit(0)).sort(cdf.id.asc()).toPandas()
378
- )
379
- self.assertEqual(len(pdf.index), 4)
380
-
381
- res = pd.DataFrame(data={"id": [0, 30, 60, 90]})
382
- self.assert_(pdf.equals(res), f"{pdf.to_string()} != {res.to_string()}")
383
-
384
- def test_literal_with_acceptable_type(self):
385
- for value, dataType in [
386
- (b"binary\0\0asas", BinaryType()),
387
- (True, BooleanType()),
388
- (False, BooleanType()),
389
- (0, ByteType()),
390
- (JVM_BYTE_MIN, ByteType()),
391
- (JVM_BYTE_MAX, ByteType()),
392
- (0, ShortType()),
393
- (JVM_SHORT_MIN, ShortType()),
394
- (JVM_SHORT_MAX, ShortType()),
395
- (0, IntegerType()),
396
- (JVM_INT_MIN, IntegerType()),
397
- (JVM_INT_MAX, IntegerType()),
398
- (0, LongType()),
399
- (JVM_LONG_MIN, LongType()),
400
- (JVM_LONG_MAX, LongType()),
401
- (0.0, FloatType()),
402
- (1.234567, FloatType()),
403
- (float("nan"), FloatType()),
404
- (float("inf"), FloatType()),
405
- (float("-inf"), FloatType()),
406
- (0.0, DoubleType()),
407
- (1.234567, DoubleType()),
408
- (float("nan"), DoubleType()),
409
- (float("inf"), DoubleType()),
410
- (float("-inf"), DoubleType()),
411
- (decimal.Decimal(0.0), DecimalType()),
412
- (decimal.Decimal(1.234567), DecimalType()),
413
- ("sss", StringType()),
414
- (datetime.date(2022, 12, 13), DateType()),
415
- (datetime.datetime.now(), DateType()),
416
- (datetime.datetime.now(), TimestampType()),
417
- (datetime.datetime.now(), TimestampNTZType()),
418
- (datetime.timedelta(1, 2, 3), DayTimeIntervalType()),
419
- ]:
420
- lit = LiteralExpression(value=value, dataType=dataType)
421
- self.assertEqual(dataType, lit._dataType)
422
-
423
- def test_literal_with_unsupported_type(self):
424
- for value, dataType in [
425
- (b"binary\0\0asas", BooleanType()),
426
- (True, StringType()),
427
- (False, DoubleType()),
428
- (JVM_BYTE_MIN - 1, ByteType()),
429
- (JVM_BYTE_MAX + 1, ByteType()),
430
- (JVM_SHORT_MIN - 1, ShortType()),
431
- (JVM_SHORT_MAX + 1, ShortType()),
432
- (JVM_INT_MIN - 1, IntegerType()),
433
- (JVM_INT_MAX + 1, IntegerType()),
434
- (JVM_LONG_MIN - 1, LongType()),
435
- (JVM_LONG_MAX + 1, LongType()),
436
- (0.1, DecimalType()),
437
- (datetime.date(2022, 12, 13), TimestampType()),
438
- (datetime.timedelta(1, 2, 3), DateType()),
439
- ({1: 2}, MapType(IntegerType(), IntegerType())),
440
- (
441
- {"a": "xyz", "b": 1},
442
- StructType([StructField("a", StringType()), StructField("b", IntegerType())]),
443
- ),
444
- ]:
445
- with self.assertRaises(AssertionError):
446
- LiteralExpression(value=value, dataType=dataType)
447
-
448
- def test_literal_null(self):
449
- for dataType in [
450
- NullType(),
451
- BinaryType(),
452
- BooleanType(),
453
- ByteType(),
454
- ShortType(),
455
- IntegerType(),
456
- LongType(),
457
- FloatType(),
458
- DoubleType(),
459
- DecimalType(),
460
- DateType(),
461
- TimestampType(),
462
- TimestampNTZType(),
463
- DayTimeIntervalType(),
464
- ]:
465
- lit_null = LiteralExpression(value=None, dataType=dataType)
466
- self.assertTrue(lit_null._value is None)
467
- self.assertEqual(dataType, lit_null._dataType)
468
-
469
- cdf = self.connect.range(0, 1).select(Column(lit_null))
470
- self.assertEqual(dataType, cdf.schema.fields[0].dataType)
471
-
472
- for value, dataType in [
473
- ("123", NullType()),
474
- (123, NullType()),
475
- (None, MapType(IntegerType(), IntegerType())),
476
- (None, StructType([StructField("a", StringType())])),
477
- ]:
478
- with self.assertRaises(AssertionError):
479
- LiteralExpression(value=value, dataType=dataType)
480
-
481
- def test_literal_integers(self):
482
- cdf = self.connect.range(0, 1)
483
- sdf = self.spark.range(0, 1)
484
-
485
- cdf1 = cdf.select(
486
- CF.lit(0),
487
- CF.lit(1),
488
- CF.lit(-1),
489
- CF.lit(JVM_INT_MAX),
490
- CF.lit(JVM_INT_MIN),
491
- CF.lit(JVM_INT_MAX + 1),
492
- CF.lit(JVM_INT_MIN - 1),
493
- CF.lit(JVM_LONG_MAX),
494
- CF.lit(JVM_LONG_MIN),
495
- CF.lit(JVM_LONG_MAX - 1),
496
- CF.lit(JVM_LONG_MIN + 1),
497
- )
498
-
499
- sdf1 = sdf.select(
500
- SF.lit(0),
501
- SF.lit(1),
502
- SF.lit(-1),
503
- SF.lit(JVM_INT_MAX),
504
- SF.lit(JVM_INT_MIN),
505
- SF.lit(JVM_INT_MAX + 1),
506
- SF.lit(JVM_INT_MIN - 1),
507
- SF.lit(JVM_LONG_MAX),
508
- SF.lit(JVM_LONG_MIN),
509
- SF.lit(JVM_LONG_MAX - 1),
510
- SF.lit(JVM_LONG_MIN + 1),
511
- )
512
-
513
- self.assertEqual(cdf1.schema, sdf1.schema)
514
- self.assert_eq(cdf1.toPandas(), sdf1.toPandas())
515
-
516
- # negative test for incorrect type
517
- with self.assertRaises(PySparkValueError) as pe:
518
- cdf.select(CF.lit(JVM_LONG_MAX + 1)).show()
519
-
520
- self.check_error(
521
- exception=pe.exception,
522
- error_class="VALUE_NOT_BETWEEN",
523
- message_parameters={"arg_name": "value", "min": "-9223372036854775808", "max": "32767"},
524
- )
525
-
526
- with self.assertRaises(PySparkValueError) as pe:
527
- cdf.select(CF.lit(JVM_LONG_MIN - 1)).show()
528
-
529
- self.check_error(
530
- exception=pe.exception,
531
- error_class="VALUE_NOT_BETWEEN",
532
- message_parameters={"arg_name": "value", "min": "-9223372036854775808", "max": "32767"},
533
- )
534
-
535
- def test_cast(self):
536
- # SPARK-41412: test basic Column.cast
537
- df = self.connect.read.table(self.tbl_name)
538
- df2 = self.spark.read.table(self.tbl_name)
539
-
540
- self.assert_eq(
541
- df.select(df.id.cast("string")).toPandas(), df2.select(df2.id.cast("string")).toPandas()
542
- )
543
- self.assert_eq(
544
- df.select(df.id.astype("string")).toPandas(),
545
- df2.select(df2.id.astype("string")).toPandas(),
546
- )
547
-
548
- for x in [
549
- StringType(),
550
- ShortType(),
551
- IntegerType(),
552
- LongType(),
553
- FloatType(),
554
- DoubleType(),
555
- ByteType(),
556
- DecimalType(10, 2),
557
- BooleanType(),
558
- DayTimeIntervalType(),
559
- ]:
560
- self.assert_eq(
561
- df.select(df.id.cast(x)).toPandas(), df2.select(df2.id.cast(x)).toPandas()
562
- )
563
-
564
- with self.assertRaises(PySparkTypeError) as pe:
565
- df.id.cast(10)
566
-
567
- self.check_error(
568
- exception=pe.exception,
569
- error_class="NOT_DATATYPE_OR_STR",
570
- message_parameters={"arg_name": "dataType", "arg_type": "int"},
571
- )
572
-
573
- def test_isin(self):
574
- # SPARK-41526: test Column.isin
575
- query = """
576
- SELECT * FROM VALUES
577
- (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
578
- AS tab(a, b, c, d)
579
- """
580
- # +---+----+---+----+
581
- # | a| b| c| d|
582
- # +---+----+---+----+
583
- # | 1| 1| 0|NULL|
584
- # | 2|NULL| 1| 2.0|
585
- # | 3| 3| 4| 3.5|
586
- # +---+----+---+----+
587
-
588
- cdf = self.connect.sql(query)
589
- sdf = self.spark.sql(query)
590
-
591
- # test literals
592
- self.assert_eq(
593
- cdf.select(cdf.b.isin(1, 2, 3)).toPandas(),
594
- sdf.select(sdf.b.isin(1, 2, 3)).toPandas(),
595
- )
596
- self.assert_eq(
597
- cdf.select(cdf.b.isin([1, 2, 3])).toPandas(),
598
- sdf.select(sdf.b.isin([1, 2, 3])).toPandas(),
599
- )
600
- self.assert_eq(
601
- cdf.select(cdf.b.isin(set([1, 2, 3]))).toPandas(),
602
- sdf.select(sdf.b.isin(set([1, 2, 3]))).toPandas(),
603
- )
604
- self.assert_eq(
605
- cdf.select(cdf.d.isin([1.0, None, 3.5])).toPandas(),
606
- sdf.select(sdf.d.isin([1.0, None, 3.5])).toPandas(),
607
- )
608
-
609
- # test columns
610
- self.assert_eq(
611
- cdf.select(cdf.a.isin(cdf.b)).toPandas(),
612
- sdf.select(sdf.a.isin(sdf.b)).toPandas(),
613
- )
614
- self.assert_eq(
615
- cdf.select(cdf.a.isin(cdf.b, cdf.c)).toPandas(),
616
- sdf.select(sdf.a.isin(sdf.b, sdf.c)).toPandas(),
617
- )
618
-
619
- # test columns mixed with literals
620
- self.assert_eq(
621
- cdf.select(cdf.a.isin(cdf.b, 4, 5, 6)).toPandas(),
622
- sdf.select(sdf.a.isin(sdf.b, 4, 5, 6)).toPandas(),
623
- )
624
-
625
- def test_between(self):
626
- query = """
627
- SELECT * FROM VALUES
628
- (TIMESTAMP('2022-12-22 15:50:00'), DATE('2022-12-25'), 1.1),
629
- (TIMESTAMP('2022-12-22 18:50:00'), NULL, 2.2),
630
- (TIMESTAMP('2022-12-23 15:50:00'), DATE('2022-12-24'), 3.3),
631
- (NULL, DATE('2022-12-22'), NULL)
632
- AS tab(a, b, c)
633
- """
634
-
635
- # +-------------------+----------+----+
636
- # | a| b| c|
637
- # +-------------------+----------+----+
638
- # |2022-12-22 15:50:00|2022-12-25| 1.1|
639
- # |2022-12-22 18:50:00| NULL| 2.2|
640
- # |2022-12-23 15:50:00|2022-12-24| 3.3|
641
- # | NULL|2022-12-22|NULL|
642
- # +-------------------+----------+----+
643
-
644
- cdf = self.connect.sql(query)
645
- sdf = self.spark.sql(query)
646
-
647
- self.assert_eq(
648
- cdf.select(cdf.c.between(0, 2)).toPandas(),
649
- sdf.select(sdf.c.between(0, 2)).toPandas(),
650
- )
651
- self.assert_eq(
652
- cdf.select(cdf.c.between(1.1, 2.2)).toPandas(),
653
- sdf.select(sdf.c.between(1.1, 2.2)).toPandas(),
654
- )
655
-
656
- self.assert_eq(
657
- cdf.select(cdf.c.between(decimal.Decimal(0), decimal.Decimal(2))).toPandas(),
658
- sdf.select(sdf.c.between(decimal.Decimal(0), decimal.Decimal(2))).toPandas(),
659
- )
660
-
661
- self.assert_eq(
662
- cdf.select(
663
- cdf.a.between(
664
- datetime.datetime(2022, 12, 22, 17, 0, 0),
665
- datetime.datetime(2022, 12, 23, 6, 0, 0),
666
- )
667
- ).toPandas(),
668
- sdf.select(
669
- sdf.a.between(
670
- datetime.datetime(2022, 12, 22, 17, 0, 0),
671
- datetime.datetime(2022, 12, 23, 6, 0, 0),
672
- )
673
- ).toPandas(),
674
- )
675
- self.assert_eq(
676
- cdf.select(
677
- cdf.b.between(datetime.date(2022, 12, 23), datetime.date(2022, 12, 24))
678
- ).toPandas(),
679
- sdf.select(
680
- sdf.b.between(datetime.date(2022, 12, 23), datetime.date(2022, 12, 24))
681
- ).toPandas(),
682
- )
683
-
684
- def test_column_bitwise_ops(self):
685
- # SPARK-41751: test bitwiseAND, bitwiseOR, bitwiseXOR
686
- query = """
687
- SELECT * FROM VALUES
688
- (1, 1, 0), (2, NULL, 1), (3, 3, 4)
689
- AS tab(a, b, c)
690
- """
691
-
692
- # +---+----+---+
693
- # | a| b| c|
694
- # +---+----+---+
695
- # | 1| 1| 0|
696
- # | 2|NULL| 1|
697
- # | 3| 3| 4|
698
- # +---+----+---+
699
-
700
- cdf = self.connect.sql(query)
701
- sdf = self.spark.sql(query)
702
-
703
- # test bitwiseAND
704
- self.assert_eq(
705
- cdf.select(cdf.a.bitwiseAND(cdf.b), cdf["a"].bitwiseAND(CF.col("c"))).toPandas(),
706
- sdf.select(sdf.a.bitwiseAND(sdf.b), sdf["a"].bitwiseAND(SF.col("c"))).toPandas(),
707
- )
708
-
709
- # test bitwiseOR
710
- self.assert_eq(
711
- cdf.select(cdf.a.bitwiseOR(cdf.b), cdf["a"].bitwiseOR(CF.col("c"))).toPandas(),
712
- sdf.select(sdf.a.bitwiseOR(sdf.b), sdf["a"].bitwiseOR(SF.col("c"))).toPandas(),
713
- )
714
-
715
- # test bitwiseXOR
716
- self.assert_eq(
717
- cdf.select(cdf.a.bitwiseXOR(cdf.b), cdf["a"].bitwiseXOR(CF.col("c"))).toPandas(),
718
- sdf.select(sdf.a.bitwiseXOR(sdf.b), sdf["a"].bitwiseXOR(SF.col("c"))).toPandas(),
719
- )
720
-
721
- def test_column_accessor(self):
722
- query = """
723
- SELECT STRUCT(a, b, c) AS x, y, z, c FROM VALUES
724
- (float(1.0), double(1.0), '2022', MAP('b', '123', 'a', 'kk'), ARRAY(1, 2, 3)),
725
- (float(2.0), double(2.0), '2018', MAP('a', 'xy'), ARRAY(-1, -2, -3)),
726
- (float(3.0), double(3.0), NULL, MAP('a', 'ab'), ARRAY(-1, 0, 1))
727
- AS tab(a, b, c, y, z)
728
- """
729
-
730
- # +----------------+-------------------+------------+----+
731
- # | x| y| z| c|
732
- # +----------------+-------------------+------------+----+
733
- # |{1.0, 1.0, 2022}|{b -> 123, a -> kk}| [1, 2, 3]|2022|
734
- # |{2.0, 2.0, 2018}| {a -> xy}|[-1, -2, -3]|2018|
735
- # |{3.0, 3.0, null}| {a -> ab}| [-1, 0, 1]|NULL|
736
- # +----------------+-------------------+------------+----+
737
-
738
- cdf = self.connect.sql(query)
739
- sdf = self.spark.sql(query)
740
-
741
- # test struct
742
- self.assert_eq(
743
- cdf.select(cdf.x.a, cdf.x["b"], cdf["x"].c).toPandas(),
744
- sdf.select(sdf.x.a, sdf.x["b"], sdf["x"].c).toPandas(),
745
- )
746
- self.assert_eq(
747
- cdf.select(CF.col("x").a, cdf.x.b, CF.col("x")["c"]).toPandas(),
748
- sdf.select(SF.col("x").a, sdf.x.b, SF.col("x")["c"]).toPandas(),
749
- )
750
- self.assert_eq(
751
- cdf.select(cdf.x.getItem("a"), cdf.x.getItem("b"), cdf["x"].getField("c")).toPandas(),
752
- sdf.select(sdf.x.getItem("a"), sdf.x.getItem("b"), sdf["x"].getField("c")).toPandas(),
753
- )
754
-
755
- # test map
756
- self.assert_eq(
757
- cdf.select(cdf.y.a, cdf.y["b"], cdf["y"].c).toPandas(),
758
- sdf.select(sdf.y.a, sdf.y["b"], sdf["y"].c).toPandas(),
759
- )
760
- self.assert_eq(
761
- cdf.select(CF.col("y").a, cdf.y.b, CF.col("y")["c"]).toPandas(),
762
- sdf.select(SF.col("y").a, sdf.y.b, SF.col("y")["c"]).toPandas(),
763
- )
764
- self.assert_eq(
765
- cdf.select(cdf.y.getItem("a"), cdf.y.getItem("b"), cdf["y"].getField("c")).toPandas(),
766
- sdf.select(sdf.y.getItem("a"), sdf.y.getItem("b"), sdf["y"].getField("c")).toPandas(),
767
- )
768
-
769
- # test array
770
- self.assert_eq(
771
- cdf.select(cdf.z[0], cdf.z[1], cdf["z"][2]).toPandas(),
772
- sdf.select(sdf.z[0], sdf.z[1], sdf["z"][2]).toPandas(),
773
- )
774
- self.assert_eq(
775
- cdf.select(CF.col("z")[0], cdf.z[10], CF.col("z")[-10]).toPandas(),
776
- sdf.select(SF.col("z")[0], sdf.z[10], SF.col("z")[-10]).toPandas(),
777
- )
778
- self.assert_eq(
779
- cdf.select(cdf.z.getItem(0), cdf.z.getItem(1), cdf["z"].getField(2)).toPandas(),
780
- sdf.select(sdf.z.getItem(0), sdf.z.getItem(1), sdf["z"].getField(2)).toPandas(),
781
- )
782
-
783
- # test string with slice
784
- self.assert_eq(
785
- cdf.select(cdf.c[0:1], cdf["c"][2:10]).toPandas(),
786
- sdf.select(sdf.c[0:1], sdf["c"][2:10]).toPandas(),
787
- )
788
-
789
- def test_column_arithmetic_ops(self):
790
- # SPARK-41761: test arithmetic ops
791
- query = """
792
- SELECT * FROM VALUES
793
- (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
794
- AS tab(a, b, c, d)
795
- """
796
- # +---+----+---+----+
797
- # | a| b| c| d|
798
- # +---+----+---+----+
799
- # | 1| 1| 0|NULL|
800
- # | 2|NULL| 1| 2.0|
801
- # | 3| 3| 4| 3.5|
802
- # +---+----+---+----+
803
-
804
- cdf = self.connect.sql(query)
805
- sdf = self.spark.sql(query)
806
-
807
- self.assert_eq(
808
- cdf.select(
809
- cdf.a + cdf["b"] - 1, cdf.a - cdf["b"] * cdf["c"] / 2, cdf.d / cdf.b / 3
810
- ).toPandas(),
811
- sdf.select(
812
- sdf.a + sdf["b"] - 1, sdf.a - sdf["b"] * sdf["c"] / 2, sdf.d / sdf.b / 3
813
- ).toPandas(),
814
- )
815
-
816
- # TODO(SPARK-41762): make __neg__ return the correct column name
817
- # [left]: Index(['negative(a)'], dtype='object')
818
- # [right]: Index(['(- a)'], dtype='object')
819
- self.assert_eq(
820
- cdf.select((-cdf.a).alias("x")).toPandas(),
821
- sdf.select((-sdf.a).alias("x")).toPandas(),
822
- )
823
-
824
- self.assert_eq(
825
- cdf.select(3 - cdf.a + cdf["b"] * cdf["c"] - cdf.d / cdf.b).toPandas(),
826
- sdf.select(3 - sdf.a + sdf["b"] * sdf["c"] - sdf.d / sdf.b).toPandas(),
827
- )
828
-
829
- self.assert_eq(
830
- cdf.select(cdf.a % cdf["b"], cdf["a"] % 2, 12 % cdf.c).toPandas(),
831
- sdf.select(sdf.a % sdf["b"], sdf["a"] % 2, 12 % sdf.c).toPandas(),
832
- )
833
-
834
- self.assert_eq(
835
- cdf.select(cdf.a ** cdf["b"], cdf.d**2, 2**cdf.c).toPandas(),
836
- sdf.select(sdf.a ** sdf["b"], sdf.d**2, 2**sdf.c).toPandas(),
837
- )
838
-
839
- def test_column_field_ops(self):
840
- # SPARK-41767: test withField, dropFields
841
- query = """
842
- SELECT STRUCT(a, b, c, d) AS x, e FROM VALUES
843
- (float(1.0), double(1.0), '2022', 1, 0),
844
- (float(2.0), double(2.0), '2018', NULL, 2),
845
- (float(3.0), double(3.0), NULL, 3, NULL)
846
- AS tab(a, b, c, d, e)
847
- """
848
-
849
- # +----------------------+----+
850
- # | x| e|
851
- # +----------------------+----+
852
- # | {1.0, 1.0, 2022, 1}| 0|
853
- # |{2.0, 2.0, 2018, null}| 2|
854
- # | {3.0, 3.0, null, 3}|NULL|
855
- # +----------------------+----+
856
-
857
- cdf = self.connect.sql(query)
858
- sdf = self.spark.sql(query)
859
-
860
- # add field
861
- self.compare_by_show(
862
- cdf.select(cdf.x.withField("z", cdf.e)),
863
- sdf.select(sdf.x.withField("z", sdf.e)),
864
- truncate=100,
865
- )
866
- self.compare_by_show(
867
- cdf.select(cdf.x.withField("z", CF.col("e"))),
868
- sdf.select(sdf.x.withField("z", SF.col("e"))),
869
- truncate=100,
870
- )
871
- self.compare_by_show(
872
- cdf.select(cdf.x.withField("z", CF.lit("xyz"))),
873
- sdf.select(sdf.x.withField("z", SF.lit("xyz"))),
874
- truncate=100,
875
- )
876
-
877
- # replace field
878
- self.compare_by_show(
879
- cdf.select(cdf.x.withField("a", cdf.e)),
880
- sdf.select(sdf.x.withField("a", sdf.e)),
881
- truncate=100,
882
- )
883
- self.compare_by_show(
884
- cdf.select(cdf.x.withField("a", CF.col("e"))),
885
- sdf.select(sdf.x.withField("a", SF.col("e"))),
886
- truncate=100,
887
- )
888
- self.compare_by_show(
889
- cdf.select(cdf.x.withField("a", CF.lit("xyz"))),
890
- sdf.select(sdf.x.withField("a", SF.lit("xyz"))),
891
- truncate=100,
892
- )
893
-
894
- # drop field
895
- self.compare_by_show(
896
- cdf.select(cdf.x.dropFields("a")),
897
- sdf.select(sdf.x.dropFields("a")),
898
- truncate=100,
899
- )
900
- self.compare_by_show(
901
- cdf.select(cdf.x.dropFields("z")),
902
- sdf.select(sdf.x.dropFields("z")),
903
- truncate=100,
904
- )
905
- self.compare_by_show(
906
- cdf.select(cdf.x.dropFields("a", "b", "z")),
907
- sdf.select(sdf.x.dropFields("a", "b", "z")),
908
- truncate=100,
909
- )
910
-
911
- # check error
912
- # invalid column: not a struct column
913
- with self.assertRaises(SparkConnectException):
914
- cdf.select(cdf.e.withField("a", CF.lit(1))).show()
915
-
916
- # invalid column: not a struct column
917
- with self.assertRaises(SparkConnectException):
918
- cdf.select(cdf.e.dropFields("a")).show()
919
-
920
- # cannot drop all fields in struct
921
- with self.assertRaises(SparkConnectException):
922
- cdf.select(cdf.x.dropFields("a", "b", "c", "d")).show()
923
-
924
- with self.assertRaises(PySparkTypeError) as pe:
925
- cdf.select(cdf.x.withField(CF.col("a"), cdf.e)).show()
926
-
927
- self.check_error(
928
- exception=pe.exception,
929
- error_class="NOT_STR",
930
- message_parameters={"arg_name": "fieldName", "arg_type": "Column"},
931
- )
932
-
933
- with self.assertRaises(PySparkTypeError) as pe:
934
- cdf.select(cdf.x.withField("a", 2)).show()
935
-
936
- self.check_error(
937
- exception=pe.exception,
938
- error_class="NOT_COLUMN",
939
- message_parameters={"arg_name": "col", "arg_type": "int"},
940
- )
941
-
942
- with self.assertRaises(PySparkTypeError) as pe:
943
- cdf.select(cdf.x.dropFields("a", 1, 2)).show()
944
-
945
- self.check_error(
946
- exception=pe.exception,
947
- error_class="NOT_STR",
948
- message_parameters={"arg_name": "fieldName", "arg_type": "int"},
949
- )
950
-
951
- with self.assertRaises(PySparkValueError) as pe:
952
- cdf.select(cdf.x.dropFields()).show()
953
-
954
- self.check_error(
955
- exception=pe.exception,
956
- error_class="CANNOT_BE_EMPTY",
957
- message_parameters={"item": "dropFields"},
958
- )
959
-
960
- def test_column_string_ops(self):
961
- # SPARK-41764: test string ops
962
- query = """
963
- SELECT * FROM VALUES
964
- (1, 'abcdef', 'ghij', 'hello world', 'a'),
965
- (2, 'abcd', 'efghij', 'how are you', 'd')
966
- AS tab(a, b, c, d, e)
967
- """
968
-
969
- # +---+------+------+-----------+---+
970
- # | a| b| c| d| e|
971
- # +---+------+------+-----------+---+
972
- # | 1|abcdef| ghij|hello world| a|
973
- # | 2| abcd|efghij|how are you| d|
974
- # +---+------+------+-----------+---+
975
-
976
- cdf = self.connect.sql(query)
977
- sdf = self.spark.sql(query)
978
-
979
- self.assert_eq(
980
- cdf.select(
981
- cdf.b.startswith("a"), cdf["c"].startswith("g"), cdf["b"].startswith(cdf.e)
982
- ).toPandas(),
983
- sdf.select(
984
- sdf.b.startswith("a"), sdf["c"].startswith("g"), sdf["b"].startswith(sdf.e)
985
- ).toPandas(),
986
- )
987
-
988
- self.assert_eq(
989
- cdf.select(
990
- cdf.b.endswith("a"), cdf["c"].endswith("j"), cdf["b"].endswith(cdf.e)
991
- ).toPandas(),
992
- sdf.select(
993
- sdf.b.endswith("a"), sdf["c"].endswith("j"), sdf["b"].endswith(sdf.e)
994
- ).toPandas(),
995
- )
996
-
997
- self.assert_eq(
998
- cdf.select(
999
- cdf.b.contains("a"), cdf["c"].contains("j"), cdf["b"].contains(cdf.e)
1000
- ).toPandas(),
1001
- sdf.select(
1002
- sdf.b.contains("a"), sdf["c"].contains("j"), sdf["b"].contains(sdf.e)
1003
- ).toPandas(),
1004
- )
1005
-
1006
- def test_with_field_column_name(self):
1007
- data = [Row(a=Row(b=1, c=2))]
1008
-
1009
- cdf = self.connect.createDataFrame(data)
1010
- cdf1 = cdf.withColumn("a", cdf["a"].withField("b", CF.lit(3))).select("a.b")
1011
-
1012
- sdf = self.spark.createDataFrame(data)
1013
- sdf1 = sdf.withColumn("a", sdf["a"].withField("b", SF.lit(3))).select("a.b")
1014
-
1015
- self.assertEqual(cdf1.schema, sdf1.schema)
1016
- self.assertEqual(cdf1.collect(), sdf1.collect())
1017
-
1018
- def test_distributed_sequence_id(self):
1019
- cdf = self.connect.range(10)
1020
- expected = self.connect.range(0, 10).selectExpr("id as index", "id")
1021
- self.assertEqual(
1022
- cdf.select(Column(DistributedSequenceID()).alias("index"), "*").collect(),
1023
- expected.collect(),
1024
- )
1025
-
1026
-
1027
- if __name__ == "__main__":
1028
- import os
1029
- import unittest
1030
- from pyspark.sql.tests.connect.test_connect_column import * # noqa: F401
1031
-
1032
- # TODO(SPARK-41794): Enable ANSI mode in this file.
1033
- os.environ["SPARK_ANSI_SQL_MODE"] = "false"
1034
-
1035
- try:
1036
- import xmlrunner
1037
-
1038
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
1039
- except ImportError:
1040
- testRunner = None
1041
-
1042
- unittest.main(testRunner=testRunner, verbosity=2)