snowpark-connect 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (474) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/literal.py +13 -2
  7. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  11. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  12. snowflake/snowpark_connect/expression/map_unresolved_function.py +825 -353
  13. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  14. snowflake/snowpark_connect/hidden_column.py +39 -0
  15. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  16. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  17. snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
  18. snowflake/snowpark_connect/relation/map_extension.py +52 -11
  19. snowflake/snowpark_connect/relation/map_join.py +258 -62
  20. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  21. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  22. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  23. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  24. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  25. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  26. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  27. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  28. snowflake/snowpark_connect/resources_initializer.py +29 -1
  29. snowflake/snowpark_connect/server.py +18 -3
  30. snowflake/snowpark_connect/type_mapping.py +29 -25
  31. snowflake/snowpark_connect/typed_column.py +14 -0
  32. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  33. snowflake/snowpark_connect/utils/context.py +6 -1
  34. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  35. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  36. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  37. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  38. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  39. snowflake/snowpark_connect/version.py +1 -1
  40. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  41. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  42. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  46. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  47. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  48. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  49. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  50. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  51. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  52. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  53. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  93. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  94. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  95. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  96. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  97. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  98. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  100. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
  101. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
  102. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
  103. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
  104. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  105. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  360. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  361. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  362. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  439. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  440. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  444. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  445. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  466. snowpark_connect-0.24.0.dist-info/RECORD +0 -898
  467. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  468. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  469. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  470. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  471. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  472. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  473. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  474. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,680 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
-
18
- import array as pyarray
19
- import unittest
20
-
21
- from numpy import array, array_equal, zeros, arange, tile, ones, inf
22
-
23
- import pyspark.ml.linalg as newlinalg
24
- from pyspark.serializers import CPickleSerializer
25
- from pyspark.mllib.linalg import (
26
- Vector,
27
- SparseVector,
28
- DenseVector,
29
- VectorUDT,
30
- _convert_to_vector,
31
- DenseMatrix,
32
- SparseMatrix,
33
- Vectors,
34
- Matrices,
35
- MatrixUDT,
36
- )
37
- from pyspark.mllib.linalg.distributed import RowMatrix, IndexedRowMatrix, IndexedRow
38
- from pyspark.mllib.regression import LabeledPoint
39
- from pyspark.sql import Row
40
- from pyspark.testing.mllibutils import MLlibTestCase
41
- from pyspark.testing.utils import have_scipy
42
-
43
-
44
- class VectorTests(MLlibTestCase):
45
- def _test_serialize(self, v):
46
- ser = CPickleSerializer()
47
- self.assertEqual(v, ser.loads(ser.dumps(v)))
48
- jvec = self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(bytearray(ser.dumps(v)))
49
- nv = ser.loads(bytes(self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(jvec)))
50
- self.assertEqual(v, nv)
51
- vs = [v] * 100
52
- jvecs = self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(bytearray(ser.dumps(vs)))
53
- nvs = ser.loads(bytes(self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(jvecs)))
54
- self.assertEqual(vs, nvs)
55
-
56
- def test_serialize(self):
57
- self._test_serialize(DenseVector(range(10)))
58
- self._test_serialize(DenseVector(array([1.0, 2.0, 3.0, 4.0])))
59
- self._test_serialize(DenseVector(pyarray.array("d", range(10))))
60
- self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
61
- self._test_serialize(SparseVector(3, {}))
62
- self._test_serialize(DenseMatrix(2, 3, range(6)))
63
- sm1 = SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
64
- self._test_serialize(sm1)
65
-
66
- def test_dot(self):
67
- sv = SparseVector(4, {1: 1, 3: 2})
68
- dv = DenseVector(array([1.0, 2.0, 3.0, 4.0]))
69
- lst = DenseVector([1, 2, 3, 4])
70
- mat = array(
71
- [[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]]
72
- )
73
- arr = pyarray.array("d", [0, 1, 2, 3])
74
- self.assertEqual(10.0, sv.dot(dv))
75
- self.assertTrue(array_equal(array([3.0, 6.0, 9.0, 12.0]), sv.dot(mat)))
76
- self.assertEqual(30.0, dv.dot(dv))
77
- self.assertTrue(array_equal(array([10.0, 20.0, 30.0, 40.0]), dv.dot(mat)))
78
- self.assertEqual(30.0, lst.dot(dv))
79
- self.assertTrue(array_equal(array([10.0, 20.0, 30.0, 40.0]), lst.dot(mat)))
80
- self.assertEqual(7.0, sv.dot(arr))
81
-
82
- def test_squared_distance(self):
83
- def squared_distance(a, b):
84
- if isinstance(a, Vector):
85
- return a.squared_distance(b)
86
- else:
87
- return b.squared_distance(a)
88
-
89
- sv = SparseVector(4, {1: 1, 3: 2})
90
- dv = DenseVector(array([1.0, 2.0, 3.0, 4.0]))
91
- lst = DenseVector([4, 3, 2, 1])
92
- lst1 = [4, 3, 2, 1]
93
- arr = pyarray.array("d", [0, 2, 1, 3])
94
- narr = array([0, 2, 1, 3])
95
- self.assertEqual(15.0, squared_distance(sv, dv))
96
- self.assertEqual(25.0, squared_distance(sv, lst))
97
- self.assertEqual(20.0, squared_distance(dv, lst))
98
- self.assertEqual(15.0, squared_distance(dv, sv))
99
- self.assertEqual(25.0, squared_distance(lst, sv))
100
- self.assertEqual(20.0, squared_distance(lst, dv))
101
- self.assertEqual(0.0, squared_distance(sv, sv))
102
- self.assertEqual(0.0, squared_distance(dv, dv))
103
- self.assertEqual(0.0, squared_distance(lst, lst))
104
- self.assertEqual(25.0, squared_distance(sv, lst1))
105
- self.assertEqual(3.0, squared_distance(sv, arr))
106
- self.assertEqual(3.0, squared_distance(sv, narr))
107
-
108
- def test_hash(self):
109
- v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
110
- v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
111
- v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
112
- v4 = SparseVector(4, [(1, 1.0), (3, 2.5)])
113
- self.assertEqual(hash(v1), hash(v2))
114
- self.assertEqual(hash(v1), hash(v3))
115
- self.assertEqual(hash(v2), hash(v3))
116
- self.assertFalse(hash(v1) == hash(v4))
117
- self.assertFalse(hash(v2) == hash(v4))
118
-
119
- def test_eq(self):
120
- v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
121
- v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
122
- v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
123
- v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
124
- v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
125
- v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
126
- dm1 = DenseMatrix(2, 2, [2, 0, 0, 0])
127
- sm1 = SparseMatrix(2, 2, [0, 2, 3], [0], [2])
128
- self.assertEqual(v1, v2)
129
- self.assertEqual(v1, v3)
130
- self.assertFalse(v2 == v4)
131
- self.assertFalse(v1 == v5)
132
- self.assertFalse(v1 == v6)
133
- # this is done as Dense and Sparse matrices can be semantically
134
- # equal while still implementing a different __eq__ method
135
- self.assertEqual(dm1, sm1)
136
- self.assertEqual(sm1, dm1)
137
-
138
- def test_equals(self):
139
- indices = [1, 2, 4]
140
- values = [1.0, 3.0, 2.0]
141
- self.assertTrue(Vectors._equals(indices, values, list(range(5)), [0.0, 1.0, 3.0, 0.0, 2.0]))
142
- self.assertFalse(
143
- Vectors._equals(indices, values, list(range(5)), [0.0, 3.0, 1.0, 0.0, 2.0])
144
- )
145
- self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0.0, 3.0, 0.0, 2.0]))
146
- self.assertFalse(
147
- Vectors._equals(indices, values, list(range(5)), [0.0, 1.0, 3.0, 2.0, 2.0])
148
- )
149
-
150
- def test_conversion(self):
151
- # numpy arrays should be automatically upcast to float64
152
- # tests for fix of [SPARK-5089]
153
- v = array([1, 2, 3, 4], dtype="float64")
154
- dv = DenseVector(v)
155
- self.assertTrue(dv.array.dtype == "float64")
156
- v = array([1, 2, 3, 4], dtype="float32")
157
- dv = DenseVector(v)
158
- self.assertTrue(dv.array.dtype == "float64")
159
-
160
- def test_sparse_vector_indexing(self):
161
- sv = SparseVector(5, {1: 1, 3: 2})
162
- self.assertEqual(sv[0], 0.0)
163
- self.assertEqual(sv[3], 2.0)
164
- self.assertEqual(sv[1], 1.0)
165
- self.assertEqual(sv[2], 0.0)
166
- self.assertEqual(sv[4], 0.0)
167
- self.assertEqual(sv[-1], 0.0)
168
- self.assertEqual(sv[-2], 2.0)
169
- self.assertEqual(sv[-3], 0.0)
170
- self.assertEqual(sv[-5], 0.0)
171
- for ind in [5, -6]:
172
- self.assertRaises(IndexError, sv.__getitem__, ind)
173
- for ind in [7.8, "1"]:
174
- self.assertRaises(TypeError, sv.__getitem__, ind)
175
-
176
- zeros = SparseVector(4, {})
177
- self.assertEqual(zeros[0], 0.0)
178
- self.assertEqual(zeros[3], 0.0)
179
- for ind in [4, -5]:
180
- self.assertRaises(IndexError, zeros.__getitem__, ind)
181
-
182
- empty = SparseVector(0, {})
183
- for ind in [-1, 0, 1]:
184
- self.assertRaises(IndexError, empty.__getitem__, ind)
185
-
186
- def test_sparse_vector_iteration(self):
187
- self.assertListEqual(list(SparseVector(3, [], [])), [0.0, 0.0, 0.0])
188
- self.assertListEqual(list(SparseVector(5, [0, 3], [1.0, 2.0])), [1.0, 0.0, 0.0, 2.0, 0.0])
189
-
190
- def test_matrix_indexing(self):
191
- mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
192
- expected = [[0, 6], [1, 8], [4, 10]]
193
- for i in range(3):
194
- for j in range(2):
195
- self.assertEqual(mat[i, j], expected[i][j])
196
-
197
- for i, j in [(-1, 0), (4, 1), (3, 4)]:
198
- self.assertRaises(IndexError, mat.__getitem__, (i, j))
199
-
200
- def test_repr_dense_matrix(self):
201
- mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
202
- self.assertTrue(repr(mat), "DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)")
203
-
204
- mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10], True)
205
- self.assertTrue(repr(mat), "DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)")
206
-
207
- mat = DenseMatrix(6, 3, zeros(18))
208
- self.assertTrue(
209
- repr(mat),
210
- "DenseMatrix(6, 3, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..., \
211
- 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], False)",
212
- )
213
-
214
- def test_repr_sparse_matrix(self):
215
- sm1t = SparseMatrix(
216
- 3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], isTransposed=True
217
- )
218
- self.assertTrue(
219
- repr(sm1t),
220
- "SparseMatrix(3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], True)",
221
- )
222
-
223
- indices = tile(arange(6), 3)
224
- values = ones(18)
225
- sm = SparseMatrix(6, 3, [0, 6, 12, 18], indices, values)
226
- self.assertTrue(
227
- repr(sm),
228
- "SparseMatrix(6, 3, [0, 6, 12, 18], \
229
- [0, 1, 2, 3, 4, 5, 0, 1, ..., 4, 5, 0, 1, 2, 3, 4, 5], \
230
- [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..., \
231
- 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], False)",
232
- )
233
-
234
- self.assertTrue(
235
- str(sm),
236
- "6 X 3 CSCMatrix\n\
237
- (0,0) 1.0\n(1,0) 1.0\n(2,0) 1.0\n(3,0) 1.0\n(4,0) 1.0\n(5,0) 1.0\n\
238
- (0,1) 1.0\n(1,1) 1.0\n(2,1) 1.0\n(3,1) 1.0\n(4,1) 1.0\n(5,1) 1.0\n\
239
- (0,2) 1.0\n(1,2) 1.0\n(2,2) 1.0\n(3,2) 1.0\n..\n..",
240
- )
241
-
242
- sm = SparseMatrix(1, 18, zeros(19), [], [])
243
- self.assertTrue(
244
- repr(sm),
245
- "SparseMatrix(1, 18, \
246
- [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0], [], [], False)",
247
- )
248
-
249
- def test_sparse_matrix(self):
250
- # Test sparse matrix creation.
251
- sm1 = SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
252
- self.assertEqual(sm1.numRows, 3)
253
- self.assertEqual(sm1.numCols, 4)
254
- self.assertEqual(sm1.colPtrs.tolist(), [0, 2, 2, 4, 4])
255
- self.assertEqual(sm1.rowIndices.tolist(), [1, 2, 1, 2])
256
- self.assertEqual(sm1.values.tolist(), [1.0, 2.0, 4.0, 5.0])
257
- self.assertTrue(
258
- repr(sm1),
259
- "SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0], False)",
260
- )
261
-
262
- # Test indexing
263
- expected = [[0, 0, 0, 0], [1, 0, 4, 0], [2, 0, 5, 0]]
264
-
265
- for i in range(3):
266
- for j in range(4):
267
- self.assertEqual(expected[i][j], sm1[i, j])
268
- self.assertTrue(array_equal(sm1.toArray(), expected))
269
-
270
- for i, j in [(-1, 1), (4, 3), (3, 5)]:
271
- self.assertRaises(IndexError, sm1.__getitem__, (i, j))
272
-
273
- # Test conversion to dense and sparse.
274
- smnew = sm1.toDense().toSparse()
275
- self.assertEqual(sm1.numRows, smnew.numRows)
276
- self.assertEqual(sm1.numCols, smnew.numCols)
277
- self.assertTrue(array_equal(sm1.colPtrs, smnew.colPtrs))
278
- self.assertTrue(array_equal(sm1.rowIndices, smnew.rowIndices))
279
- self.assertTrue(array_equal(sm1.values, smnew.values))
280
-
281
- sm1t = SparseMatrix(
282
- 3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], isTransposed=True
283
- )
284
- self.assertEqual(sm1t.numRows, 3)
285
- self.assertEqual(sm1t.numCols, 4)
286
- self.assertEqual(sm1t.colPtrs.tolist(), [0, 2, 3, 5])
287
- self.assertEqual(sm1t.rowIndices.tolist(), [0, 1, 2, 0, 2])
288
- self.assertEqual(sm1t.values.tolist(), [3.0, 2.0, 4.0, 9.0, 8.0])
289
-
290
- expected = [[3, 2, 0, 0], [0, 0, 4, 0], [9, 0, 8, 0]]
291
-
292
- for i in range(3):
293
- for j in range(4):
294
- self.assertEqual(expected[i][j], sm1t[i, j])
295
- self.assertTrue(array_equal(sm1t.toArray(), expected))
296
-
297
- def test_dense_matrix_is_transposed(self):
298
- mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True)
299
- mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9])
300
- self.assertEqual(mat1, mat)
301
-
302
- expected = [[0, 4], [1, 6], [3, 9]]
303
- for i in range(3):
304
- for j in range(2):
305
- self.assertEqual(mat1[i, j], expected[i][j])
306
- self.assertTrue(array_equal(mat1.toArray(), expected))
307
-
308
- sm = mat1.toSparse()
309
- self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2]))
310
- self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
311
- self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
312
-
313
- def test_parse_vector(self):
314
- a = DenseVector([])
315
- self.assertEqual(str(a), "[]")
316
- self.assertEqual(Vectors.parse(str(a)), a)
317
- a = DenseVector([3, 4, 6, 7])
318
- self.assertEqual(str(a), "[3.0,4.0,6.0,7.0]")
319
- self.assertEqual(Vectors.parse(str(a)), a)
320
- a = SparseVector(4, [], [])
321
- self.assertEqual(str(a), "(4,[],[])")
322
- self.assertEqual(SparseVector.parse(str(a)), a)
323
- a = SparseVector(4, [0, 2], [3, 4])
324
- self.assertEqual(str(a), "(4,[0,2],[3.0,4.0])")
325
- self.assertEqual(Vectors.parse(str(a)), a)
326
- a = SparseVector(10, [0, 1], [4, 5])
327
- self.assertEqual(SparseVector.parse(" (10, [0,1 ],[ 4.0,5.0] )"), a)
328
-
329
- def test_norms(self):
330
- a = DenseVector([0, 2, 3, -1])
331
- self.assertAlmostEqual(a.norm(2), 3.742, 3)
332
- self.assertTrue(a.norm(1), 6)
333
- self.assertTrue(a.norm(inf), 3)
334
- a = SparseVector(4, [0, 2], [3, -4])
335
- self.assertAlmostEqual(a.norm(2), 5)
336
- self.assertTrue(a.norm(1), 7)
337
- self.assertTrue(a.norm(inf), 4)
338
-
339
- tmp = SparseVector(4, [0, 2], [3, 0])
340
- self.assertEqual(tmp.numNonzeros(), 1)
341
-
342
- def test_ml_mllib_vector_conversion(self):
343
- # to ml
344
- # dense
345
- mllibDV = Vectors.dense([1, 2, 3])
346
- mlDV1 = newlinalg.Vectors.dense([1, 2, 3])
347
- mlDV2 = mllibDV.asML()
348
- self.assertEqual(mlDV2, mlDV1)
349
- # sparse
350
- mllibSV = Vectors.sparse(4, {1: 1.0, 3: 5.5})
351
- mlSV1 = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
352
- mlSV2 = mllibSV.asML()
353
- self.assertEqual(mlSV2, mlSV1)
354
- # from ml
355
- # dense
356
- mllibDV1 = Vectors.dense([1, 2, 3])
357
- mlDV = newlinalg.Vectors.dense([1, 2, 3])
358
- mllibDV2 = Vectors.fromML(mlDV)
359
- self.assertEqual(mllibDV1, mllibDV2)
360
- # sparse
361
- mllibSV1 = Vectors.sparse(4, {1: 1.0, 3: 5.5})
362
- mlSV = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
363
- mllibSV2 = Vectors.fromML(mlSV)
364
- self.assertEqual(mllibSV1, mllibSV2)
365
-
366
- def test_ml_mllib_matrix_conversion(self):
367
- # to ml
368
- # dense
369
- mllibDM = Matrices.dense(2, 2, [0, 1, 2, 3])
370
- mlDM1 = newlinalg.Matrices.dense(2, 2, [0, 1, 2, 3])
371
- mlDM2 = mllibDM.asML()
372
- self.assertEqual(mlDM2, mlDM1)
373
- # transposed
374
- mllibDMt = DenseMatrix(2, 2, [0, 1, 2, 3], True)
375
- mlDMt1 = newlinalg.DenseMatrix(2, 2, [0, 1, 2, 3], True)
376
- mlDMt2 = mllibDMt.asML()
377
- self.assertEqual(mlDMt2, mlDMt1)
378
- # sparse
379
- mllibSM = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
380
- mlSM1 = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
381
- mlSM2 = mllibSM.asML()
382
- self.assertEqual(mlSM2, mlSM1)
383
- # transposed
384
- mllibSMt = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
385
- mlSMt1 = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
386
- mlSMt2 = mllibSMt.asML()
387
- self.assertEqual(mlSMt2, mlSMt1)
388
- # from ml
389
- # dense
390
- mllibDM1 = Matrices.dense(2, 2, [1, 2, 3, 4])
391
- mlDM = newlinalg.Matrices.dense(2, 2, [1, 2, 3, 4])
392
- mllibDM2 = Matrices.fromML(mlDM)
393
- self.assertEqual(mllibDM1, mllibDM2)
394
- # transposed
395
- mllibDMt1 = DenseMatrix(2, 2, [1, 2, 3, 4], True)
396
- mlDMt = newlinalg.DenseMatrix(2, 2, [1, 2, 3, 4], True)
397
- mllibDMt2 = Matrices.fromML(mlDMt)
398
- self.assertEqual(mllibDMt1, mllibDMt2)
399
- # sparse
400
- mllibSM1 = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
401
- mlSM = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
402
- mllibSM2 = Matrices.fromML(mlSM)
403
- self.assertEqual(mllibSM1, mllibSM2)
404
- # transposed
405
- mllibSMt1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
406
- mlSMt = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
407
- mllibSMt2 = Matrices.fromML(mlSMt)
408
- self.assertEqual(mllibSMt1, mllibSMt2)
409
-
410
-
411
- class VectorUDTTests(MLlibTestCase):
412
-
413
- dv0 = DenseVector([])
414
- dv1 = DenseVector([1.0, 2.0])
415
- sv0 = SparseVector(2, [], [])
416
- sv1 = SparseVector(2, [1], [2.0])
417
- udt = VectorUDT()
418
-
419
- def test_json_schema(self):
420
- self.assertEqual(VectorUDT.fromJson(self.udt.jsonValue()), self.udt)
421
-
422
- def test_serialization(self):
423
- for v in [self.dv0, self.dv1, self.sv0, self.sv1]:
424
- self.assertEqual(v, self.udt.deserialize(self.udt.serialize(v)))
425
-
426
- def test_infer_schema(self):
427
- rdd = self.sc.parallelize([LabeledPoint(1.0, self.dv1), LabeledPoint(0.0, self.sv1)])
428
- df = rdd.toDF()
429
- schema = df.schema
430
- field = [f for f in schema.fields if f.name == "features"][0]
431
- self.assertEqual(field.dataType, self.udt)
432
- vectors = df.rdd.map(lambda p: p.features).collect()
433
- self.assertEqual(len(vectors), 2)
434
- for v in vectors:
435
- if isinstance(v, SparseVector):
436
- self.assertEqual(v, self.sv1)
437
- elif isinstance(v, DenseVector):
438
- self.assertEqual(v, self.dv1)
439
- else:
440
- raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
441
-
442
- def test_row_matrix_from_dataframe(self):
443
- from pyspark.errors import IllegalArgumentException
444
-
445
- df = self.spark.createDataFrame([Row(Vectors.dense(1))])
446
- row_matrix = RowMatrix(df)
447
- self.assertEqual(row_matrix.numRows(), 1)
448
- self.assertEqual(row_matrix.numCols(), 1)
449
- with self.assertRaises(IllegalArgumentException):
450
- RowMatrix(df.selectExpr("'monkey'"))
451
-
452
- def test_indexed_row_matrix_from_dataframe(self):
453
- from pyspark.errors import IllegalArgumentException
454
-
455
- df = self.spark.createDataFrame([Row(int(0), Vectors.dense(1))])
456
- matrix = IndexedRowMatrix(df)
457
- self.assertEqual(matrix.numRows(), 1)
458
- self.assertEqual(matrix.numCols(), 1)
459
- with self.assertRaises(IllegalArgumentException):
460
- IndexedRowMatrix(df.drop("_1"))
461
-
462
- def test_row_matrix_invalid_type(self):
463
- rows = self.sc.parallelize([[1, 2, 3], [4, 5, 6]])
464
- invalid_type = ""
465
- matrix = RowMatrix(rows)
466
- self.assertRaises(TypeError, matrix.multiply, invalid_type)
467
-
468
- irows = self.sc.parallelize([IndexedRow(0, [1, 2, 3]), IndexedRow(1, [4, 5, 6])])
469
- imatrix = IndexedRowMatrix(irows)
470
- self.assertRaises(TypeError, imatrix.multiply, invalid_type)
471
-
472
-
473
- class MatrixUDTTests(MLlibTestCase):
474
-
475
- dm1 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10])
476
- dm2 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10], isTransposed=True)
477
- sm1 = SparseMatrix(1, 1, [0, 1], [0], [2.0])
478
- sm2 = SparseMatrix(2, 1, [0, 0, 1], [0], [5.0], isTransposed=True)
479
- udt = MatrixUDT()
480
-
481
- def test_json_schema(self):
482
- self.assertEqual(MatrixUDT.fromJson(self.udt.jsonValue()), self.udt)
483
-
484
- def test_serialization(self):
485
- for m in [self.dm1, self.dm2, self.sm1, self.sm2]:
486
- self.assertEqual(m, self.udt.deserialize(self.udt.serialize(m)))
487
-
488
- def test_infer_schema(self):
489
- rdd = self.sc.parallelize([("dense", self.dm1), ("sparse", self.sm1)])
490
- df = rdd.toDF()
491
- schema = df.schema
492
- self.assertTrue(schema.fields[1].dataType, self.udt)
493
- matrices = df.rdd.map(lambda x: x._2).collect()
494
- self.assertEqual(len(matrices), 2)
495
- for m in matrices:
496
- if isinstance(m, DenseMatrix):
497
- self.assertTrue(m, self.dm1)
498
- elif isinstance(m, SparseMatrix):
499
- self.assertTrue(m, self.sm1)
500
- else:
501
- raise ValueError("Expected a matrix but got type %r" % type(m))
502
-
503
-
504
- @unittest.skipIf(not have_scipy, "SciPy not installed")
505
- class SciPyTests(MLlibTestCase):
506
-
507
- """
508
- Test both vector operations and MLlib algorithms with SciPy sparse matrices,
509
- if SciPy is available.
510
- """
511
-
512
- def test_serialize(self):
513
- from scipy.sparse import lil_matrix
514
-
515
- ser = CPickleSerializer()
516
- lil = lil_matrix((4, 1))
517
- lil[1, 0] = 1
518
- lil[3, 0] = 2
519
- sv = SparseVector(4, {1: 1, 3: 2})
520
- self.assertEqual(sv, _convert_to_vector(lil))
521
- self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
522
- self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
523
- self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
524
- self.assertEqual(sv, _convert_to_vector(lil.todok()))
525
-
526
- def serialize(d):
527
- return ser.loads(ser.dumps(_convert_to_vector(d)))
528
-
529
- self.assertEqual(sv, serialize(lil))
530
- self.assertEqual(sv, serialize(lil.tocsc()))
531
- self.assertEqual(sv, serialize(lil.tocsr()))
532
- self.assertEqual(sv, serialize(lil.todok()))
533
-
534
- def test_convert_to_vector(self):
535
- from scipy.sparse import csc_matrix
536
-
537
- # Create a CSC matrix with non-sorted indices
538
- indptr = array([0, 2])
539
- indices = array([3, 1])
540
- data = array([2.0, 1.0])
541
- csc = csc_matrix((data, indices, indptr))
542
- self.assertFalse(csc.has_sorted_indices)
543
- sv = SparseVector(4, {1: 1, 3: 2})
544
- self.assertEqual(sv, _convert_to_vector(csc))
545
-
546
- def test_dot(self):
547
- from scipy.sparse import lil_matrix
548
-
549
- lil = lil_matrix((4, 1))
550
- lil[1, 0] = 1
551
- lil[3, 0] = 2
552
- dv = DenseVector(array([1.0, 2.0, 3.0, 4.0]))
553
- self.assertEqual(10.0, dv.dot(lil))
554
-
555
- def test_squared_distance(self):
556
- from scipy.sparse import lil_matrix
557
-
558
- lil = lil_matrix((4, 1))
559
- lil[1, 0] = 3
560
- lil[3, 0] = 2
561
- dv = DenseVector(array([1.0, 2.0, 3.0, 4.0]))
562
- sv = SparseVector(4, {0: 1, 1: 2, 2: 3, 3: 4})
563
- self.assertEqual(15.0, dv.squared_distance(lil))
564
- self.assertEqual(15.0, sv.squared_distance(lil))
565
-
566
- def scipy_matrix(self, size, values):
567
- """Create a column SciPy matrix from a dictionary of values"""
568
- from scipy.sparse import lil_matrix
569
-
570
- lil = lil_matrix((size, 1))
571
- for key, value in values.items():
572
- lil[key, 0] = value
573
- return lil
574
-
575
- def test_clustering(self):
576
- from pyspark.mllib.clustering import KMeans
577
-
578
- data = [
579
- self.scipy_matrix(3, {1: 1.0}),
580
- self.scipy_matrix(3, {1: 1.1}),
581
- self.scipy_matrix(3, {2: 1.0}),
582
- self.scipy_matrix(3, {2: 1.1}),
583
- ]
584
- clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||")
585
- self.assertEqual(clusters.predict(data[0]), clusters.predict(data[1]))
586
- self.assertEqual(clusters.predict(data[2]), clusters.predict(data[3]))
587
-
588
- def test_classification(self):
589
- from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
590
- from pyspark.mllib.tree import DecisionTree
591
-
592
- data = [
593
- LabeledPoint(0.0, self.scipy_matrix(2, {0: 1.0})),
594
- LabeledPoint(1.0, self.scipy_matrix(2, {1: 1.0})),
595
- LabeledPoint(0.0, self.scipy_matrix(2, {0: 2.0})),
596
- LabeledPoint(1.0, self.scipy_matrix(2, {1: 2.0})),
597
- ]
598
- rdd = self.sc.parallelize(data)
599
- features = [p.features for p in data]
600
-
601
- lr_model = LogisticRegressionWithSGD.train(rdd)
602
- self.assertTrue(lr_model.predict(features[0]) <= 0)
603
- self.assertTrue(lr_model.predict(features[1]) > 0)
604
- self.assertTrue(lr_model.predict(features[2]) <= 0)
605
- self.assertTrue(lr_model.predict(features[3]) > 0)
606
-
607
- svm_model = SVMWithSGD.train(rdd)
608
- self.assertTrue(svm_model.predict(features[0]) <= 0)
609
- self.assertTrue(svm_model.predict(features[1]) > 0)
610
- self.assertTrue(svm_model.predict(features[2]) <= 0)
611
- self.assertTrue(svm_model.predict(features[3]) > 0)
612
-
613
- nb_model = NaiveBayes.train(rdd)
614
- self.assertTrue(nb_model.predict(features[0]) <= 0)
615
- self.assertTrue(nb_model.predict(features[1]) > 0)
616
- self.assertTrue(nb_model.predict(features[2]) <= 0)
617
- self.assertTrue(nb_model.predict(features[3]) > 0)
618
-
619
- categoricalFeaturesInfo = {0: 3} # feature 0 has 3 categories
620
- dt_model = DecisionTree.trainClassifier(
621
- rdd, numClasses=2, categoricalFeaturesInfo=categoricalFeaturesInfo
622
- )
623
- self.assertTrue(dt_model.predict(features[0]) <= 0)
624
- self.assertTrue(dt_model.predict(features[1]) > 0)
625
- self.assertTrue(dt_model.predict(features[2]) <= 0)
626
- self.assertTrue(dt_model.predict(features[3]) > 0)
627
-
628
- def test_regression(self):
629
- from pyspark.mllib.regression import (
630
- LinearRegressionWithSGD,
631
- LassoWithSGD,
632
- RidgeRegressionWithSGD,
633
- )
634
- from pyspark.mllib.tree import DecisionTree
635
-
636
- data = [
637
- LabeledPoint(-1.0, self.scipy_matrix(2, {1: -1.0})),
638
- LabeledPoint(1.0, self.scipy_matrix(2, {1: 1.0})),
639
- LabeledPoint(-1.0, self.scipy_matrix(2, {1: -2.0})),
640
- LabeledPoint(1.0, self.scipy_matrix(2, {1: 2.0})),
641
- ]
642
- rdd = self.sc.parallelize(data)
643
- features = [p.features for p in data]
644
-
645
- lr_model = LinearRegressionWithSGD.train(rdd)
646
- self.assertTrue(lr_model.predict(features[0]) <= 0)
647
- self.assertTrue(lr_model.predict(features[1]) > 0)
648
- self.assertTrue(lr_model.predict(features[2]) <= 0)
649
- self.assertTrue(lr_model.predict(features[3]) > 0)
650
-
651
- lasso_model = LassoWithSGD.train(rdd)
652
- self.assertTrue(lasso_model.predict(features[0]) <= 0)
653
- self.assertTrue(lasso_model.predict(features[1]) > 0)
654
- self.assertTrue(lasso_model.predict(features[2]) <= 0)
655
- self.assertTrue(lasso_model.predict(features[3]) > 0)
656
-
657
- rr_model = RidgeRegressionWithSGD.train(rdd)
658
- self.assertTrue(rr_model.predict(features[0]) <= 0)
659
- self.assertTrue(rr_model.predict(features[1]) > 0)
660
- self.assertTrue(rr_model.predict(features[2]) <= 0)
661
- self.assertTrue(rr_model.predict(features[3]) > 0)
662
-
663
- categoricalFeaturesInfo = {0: 2} # feature 0 has 2 categories
664
- dt_model = DecisionTree.trainRegressor(rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
665
- self.assertTrue(dt_model.predict(features[0]) <= 0)
666
- self.assertTrue(dt_model.predict(features[1]) > 0)
667
- self.assertTrue(dt_model.predict(features[2]) <= 0)
668
- self.assertTrue(dt_model.predict(features[3]) > 0)
669
-
670
-
671
- if __name__ == "__main__":
672
- from pyspark.mllib.tests.test_linalg import * # noqa: F401
673
-
674
- try:
675
- import xmlrunner
676
-
677
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
678
- except ImportError:
679
- testRunner = None
680
- unittest.main(testRunner=testRunner, verbosity=2)