snowpark-connect 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (476) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/function_defaults.py +207 -0
  7. snowflake/snowpark_connect/expression/literal.py +18 -2
  8. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  9. snowflake/snowpark_connect/expression/map_expression.py +10 -1
  10. snowflake/snowpark_connect/expression/map_extension.py +12 -2
  11. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  12. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  13. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  14. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  15. snowflake/snowpark_connect/expression/map_unresolved_function.py +836 -365
  16. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  17. snowflake/snowpark_connect/hidden_column.py +39 -0
  18. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  19. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  20. snowflake/snowpark_connect/relation/map_column_ops.py +18 -36
  21. snowflake/snowpark_connect/relation/map_extension.py +56 -15
  22. snowflake/snowpark_connect/relation/map_join.py +258 -62
  23. snowflake/snowpark_connect/relation/map_row_ops.py +2 -29
  24. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  25. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  26. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  27. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  28. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  29. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  30. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  31. snowflake/snowpark_connect/relation/read/utils.py +6 -7
  32. snowflake/snowpark_connect/relation/utils.py +1 -170
  33. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  34. snowflake/snowpark_connect/resources_initializer.py +29 -1
  35. snowflake/snowpark_connect/server.py +18 -3
  36. snowflake/snowpark_connect/type_mapping.py +29 -25
  37. snowflake/snowpark_connect/typed_column.py +14 -0
  38. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  39. snowflake/snowpark_connect/utils/context.py +6 -1
  40. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  41. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  42. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  43. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  44. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  45. snowflake/snowpark_connect/version.py +1 -1
  46. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  47. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  48. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  52. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  53. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  93. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  94. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  95. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  96. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  97. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  98. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  100. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  101. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  102. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  103. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  104. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  105. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  362. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  439. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  440. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  444. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  445. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  466. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  467. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  468. snowpark_connect-0.23.0.dist-info/RECORD +0 -893
  469. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  470. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  471. {snowpark_connect-0.23.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  472. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  473. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  474. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  475. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  476. {snowpark_connect-0.23.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,1339 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one or more
3
- # contributor license agreements. See the NOTICE file distributed with
4
- # this work for additional information regarding copyright ownership.
5
- # The ASF licenses this file to You under the Apache License, Version 2.0
6
- # (the "License"); you may not use this file except in compliance with
7
- # the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing, software
12
- # distributed under the License is distributed on an "AS IS" BASIS,
13
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
- # See the License for the specific language governing permissions and
15
- # limitations under the License.
16
- #
17
-
18
- import datetime
19
- from distutils.version import LooseVersion
20
- import unittest
21
-
22
- import numpy as np
23
- import pandas as pd
24
-
25
- from pyspark import pandas as ps
26
- from pyspark.pandas.exceptions import SparkPandasIndexingError
27
- from pyspark.testing.pandasutils import ComparisonTestBase, compare_both
28
-
29
-
30
- class BasicIndexingTestsMixin:
31
- @property
32
- def pdf(self):
33
- return pd.DataFrame(
34
- {"month": [1, 4, 7, 10], "year": [2012, 2014, 2013, 2014], "sale": [55, 40, 84, 31]}
35
- )
36
-
37
- @compare_both(almost=False)
38
- def test_indexing(self, df):
39
- df1 = df.set_index("month")
40
- yield df1
41
-
42
- yield df.set_index("month", drop=False)
43
- yield df.set_index("month", append=True)
44
- yield df.set_index(["year", "month"])
45
- yield df.set_index(["year", "month"], drop=False)
46
- yield df.set_index(["year", "month"], append=True)
47
-
48
- yield df1.set_index("year", drop=False, append=True)
49
-
50
- df2 = df1.copy()
51
- df2.set_index("year", append=True, inplace=True)
52
- yield df2
53
-
54
- self.assertRaisesRegex(KeyError, "unknown", lambda: df.set_index("unknown"))
55
- self.assertRaisesRegex(KeyError, "unknown", lambda: df.set_index(["month", "unknown"]))
56
-
57
- for d in [df, df1, df2]:
58
- yield d.reset_index()
59
- yield d.reset_index(drop=True)
60
-
61
- yield df1.reset_index(level=0)
62
- yield df2.reset_index(level=1)
63
- yield df2.reset_index(level=[1, 0])
64
- yield df1.reset_index(level="month")
65
- yield df2.reset_index(level="year")
66
- yield df2.reset_index(level=["month", "year"])
67
- yield df2.reset_index(level="month", drop=True)
68
- yield df2.reset_index(level=["month", "year"], drop=True)
69
-
70
- self.assertRaisesRegex(
71
- IndexError,
72
- "Too many levels: Index has only 1 level, not 3",
73
- lambda: df1.reset_index(level=2),
74
- )
75
- self.assertRaisesRegex(
76
- IndexError,
77
- "Too many levels: Index has only 1 level, not 4",
78
- lambda: df1.reset_index(level=[3, 2]),
79
- )
80
- self.assertRaisesRegex(KeyError, "unknown.*month", lambda: df1.reset_index(level="unknown"))
81
- self.assertRaisesRegex(
82
- KeyError, "Level unknown not found", lambda: df2.reset_index(level="unknown")
83
- )
84
-
85
- df3 = df2.copy()
86
- df3.reset_index(inplace=True)
87
- yield df3
88
-
89
- yield df1.sale.reset_index()
90
- yield df1.sale.reset_index(level=0)
91
- yield df2.sale.reset_index(level=[1, 0])
92
- yield df1.sale.reset_index(drop=True)
93
- yield df1.sale.reset_index(name="s")
94
- yield df1.sale.reset_index(name="s", drop=True)
95
-
96
- s = df1.sale
97
- self.assertRaisesRegex(
98
- TypeError,
99
- "Cannot reset_index inplace on a Series to create a DataFrame",
100
- lambda: s.reset_index(inplace=True),
101
- )
102
- s.reset_index(drop=True, inplace=True)
103
- yield s
104
- yield df1
105
-
106
- # multi-index columns
107
- df4 = df.copy()
108
- df4.columns = pd.MultiIndex.from_tuples(
109
- [("cal", "month"), ("cal", "year"), ("num", "sale")]
110
- )
111
- df5 = df4.set_index(("cal", "month"))
112
- yield df5
113
- yield df4.set_index([("cal", "month"), ("num", "sale")])
114
-
115
- self.assertRaises(KeyError, lambda: df5.reset_index(level=("cal", "month")))
116
-
117
- yield df5.reset_index(level=[("cal", "month")])
118
-
119
- # non-string names
120
- df6 = df.copy()
121
- df6.columns = [10.0, 20.0, 30.0]
122
- df7 = df6.set_index(10.0)
123
- yield df7
124
- yield df6.set_index([10.0, 30.0])
125
-
126
- yield df7.reset_index(level=10.0)
127
- yield df7.reset_index(level=[10.0])
128
-
129
- df8 = df.copy()
130
- df8.columns = pd.MultiIndex.from_tuples([(10, "month"), (10, "year"), (20, "sale")])
131
- df9 = df8.set_index((10, "month"))
132
- yield df9
133
- yield df8.set_index([(10, "month"), (20, "sale")])
134
-
135
- yield df9.reset_index(level=[(10, "month")])
136
-
137
- def test_from_pandas_with_explicit_index(self):
138
- pdf = self.pdf
139
-
140
- df1 = ps.from_pandas(pdf.set_index("month"))
141
- self.assertPandasEqual(df1._to_pandas(), pdf.set_index("month"))
142
-
143
- df2 = ps.from_pandas(pdf.set_index(["year", "month"]))
144
- self.assertPandasEqual(df2._to_pandas(), pdf.set_index(["year", "month"]))
145
-
146
- def test_limitations(self):
147
- df = self.psdf.set_index("month")
148
-
149
- self.assertRaisesRegex(
150
- ValueError,
151
- "Level should be all int or all string.",
152
- lambda: df.reset_index([1, "month"]),
153
- )
154
-
155
-
156
- class IndexingTest(ComparisonTestBase):
157
- @property
158
- def pdf(self):
159
- return pd.DataFrame(
160
- {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
161
- index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
162
- )
163
-
164
- @property
165
- def pdf2(self):
166
- return pd.DataFrame(
167
- {0: [1, 2, 3, 4, 5, 6, 7, 8, 9], 1: [4, 5, 6, 3, 2, 1, 0, 0, 0]},
168
- index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
169
- )
170
-
171
- @property
172
- def psdf2(self):
173
- return ps.from_pandas(self.pdf2)
174
-
175
- def test_at(self):
176
- pdf = self.pdf
177
- psdf = self.psdf
178
- # Create the equivalent of pdf.loc[3] as a Koalas Series
179
- # This is necessary because .loc[n] does not currently work with Koalas DataFrames (#383)
180
- test_series = ps.Series([3, 6], index=["a", "b"], name="3")
181
-
182
- # Assert invalided signatures raise TypeError
183
- with self.assertRaises(TypeError, msg="Use DataFrame.at like .at[row_index, column_name]"):
184
- psdf.at[3]
185
- with self.assertRaises(TypeError, msg="Use DataFrame.at like .at[row_index, column_name]"):
186
- psdf.at["ab"] # 'ab' is of length 2 but str type instead of tuple
187
- with self.assertRaises(TypeError, msg="Use Series.at like .at[column_name]"):
188
- test_series.at[3, "b"]
189
-
190
- # Assert .at for DataFrames
191
- self.assertEqual(psdf.at[3, "b"], 6)
192
- self.assertEqual(psdf.at[3, "b"], pdf.at[3, "b"])
193
- self.assert_eq(psdf.at[9, "b"], np.array([0, 0, 0]))
194
- self.assert_eq(psdf.at[9, "b"], pdf.at[9, "b"])
195
-
196
- # Assert .at for Series
197
- self.assertEqual(test_series.at["b"], 6)
198
- self.assertEqual(test_series.at["b"], pdf.loc[3].at["b"])
199
-
200
- # Assert multi-character indices
201
- self.assertEqual(
202
- ps.Series([0, 1], index=["ab", "cd"]).at["ab"],
203
- pd.Series([0, 1], index=["ab", "cd"]).at["ab"],
204
- )
205
-
206
- # Assert invalid column or index names result in a KeyError like with pandas
207
- with self.assertRaises(KeyError, msg="x"):
208
- psdf.at[3, "x"]
209
- with self.assertRaises(KeyError, msg=99):
210
- psdf.at[99, "b"]
211
-
212
- with self.assertRaises(ValueError):
213
- psdf.at[(3, 6), "b"]
214
- with self.assertRaises(KeyError):
215
- psdf.at[3, ("x", "b")]
216
-
217
- # Assert setting values fails
218
- with self.assertRaises(TypeError):
219
- psdf.at[3, "b"] = 10
220
-
221
- # non-string column names
222
- pdf = self.pdf2
223
- psdf = self.psdf2
224
-
225
- # Assert .at for DataFrames
226
- self.assertEqual(psdf.at[3, 1], 6)
227
- self.assertEqual(psdf.at[3, 1], pdf.at[3, 1])
228
- self.assert_eq(psdf.at[9, 1], np.array([0, 0, 0]))
229
- self.assert_eq(psdf.at[9, 1], pdf.at[9, 1])
230
-
231
- def test_at_multiindex(self):
232
- pdf = self.pdf.set_index("b", append=True)
233
- psdf = self.psdf.set_index("b", append=True)
234
-
235
- # TODO: seems like a pandas' bug in pandas>=1.1.0
236
- if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
237
- self.assert_eq(psdf.at[(3, 6), "a"], pdf.at[(3, 6), "a"])
238
- self.assert_eq(psdf.at[(3,), "a"], pdf.at[(3,), "a"])
239
- self.assert_eq(list(psdf.at[(9, 0), "a"]), list(pdf.at[(9, 0), "a"]))
240
- self.assert_eq(list(psdf.at[(9,), "a"]), list(pdf.at[(9,), "a"]))
241
- else:
242
- self.assert_eq(psdf.at[(3, 6), "a"], 3)
243
- self.assert_eq(psdf.at[(3,), "a"], np.array([3]))
244
- self.assert_eq(list(psdf.at[(9, 0), "a"]), [7, 8, 9])
245
- self.assert_eq(list(psdf.at[(9,), "a"]), [7, 8, 9])
246
-
247
- with self.assertRaises(ValueError):
248
- psdf.at[3, "a"]
249
-
250
- def test_at_multiindex_columns(self):
251
- arrays = [np.array(["bar", "bar", "baz", "baz"]), np.array(["one", "two", "one", "two"])]
252
-
253
- pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
254
- psdf = ps.from_pandas(pdf)
255
-
256
- self.assert_eq(psdf.at["B", ("bar", "one")], pdf.at["B", ("bar", "one")])
257
-
258
- with self.assertRaises(KeyError):
259
- psdf.at["B", "bar"]
260
-
261
- # non-string column names
262
- arrays = [np.array([0, 0, 1, 1]), np.array([1, 2, 1, 2])]
263
-
264
- pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
265
- psdf = ps.from_pandas(pdf)
266
-
267
- self.assert_eq(psdf.at["B", (0, 1)], pdf.at["B", (0, 1)])
268
-
269
- def test_iat(self):
270
- pdf = self.pdf
271
- psdf = self.psdf
272
- # Create the equivalent of pdf.loc[3] as a Koalas Series
273
- # This is necessary because .loc[n] does not currently work with Koalas DataFrames (#383)
274
- test_series = ps.Series([3, 6], index=["a", "b"], name="3")
275
-
276
- # Assert invalided signatures raise TypeError
277
- with self.assertRaises(
278
- TypeError,
279
- msg="Use DataFrame.at like .iat[row_interget_position, column_integer_position]",
280
- ):
281
- psdf.iat[3]
282
- with self.assertRaises(
283
- ValueError, msg="iAt based indexing on multi-index can only have tuple values"
284
- ):
285
- psdf.iat[3, "b"] # 'ab' is of length 2 but str type instead of tuple
286
- with self.assertRaises(TypeError, msg="Use Series.iat like .iat[row_integer_position]"):
287
- test_series.iat[3, "b"]
288
-
289
- # Assert .iat for DataFrames
290
- self.assertEqual(psdf.iat[7, 0], 8)
291
- self.assertEqual(psdf.iat[7, 0], pdf.iat[7, 0])
292
-
293
- # Assert .iat for Series
294
- self.assertEqual(test_series.iat[1], 6)
295
- self.assertEqual(test_series.iat[1], pdf.loc[3].iat[1])
296
-
297
- # Assert invalid column or integer position result in a KeyError like with pandas
298
- with self.assertRaises(KeyError, msg=99):
299
- psdf.iat[0, 99]
300
- with self.assertRaises(KeyError, msg=99):
301
- psdf.iat[99, 0]
302
-
303
- with self.assertRaises(ValueError):
304
- psdf.iat[(1, 1), 1]
305
- with self.assertRaises(ValueError):
306
- psdf.iat[1, (1, 1)]
307
-
308
- # Assert setting values fails
309
- with self.assertRaises(TypeError):
310
- psdf.iat[4, 1] = 10
311
-
312
- def test_iat_multiindex(self):
313
- pdf = self.pdf.set_index("b", append=True)
314
- psdf = self.psdf.set_index("b", append=True)
315
-
316
- self.assert_eq(psdf.iat[7, 0], pdf.iat[7, 0])
317
-
318
- with self.assertRaises(ValueError):
319
- psdf.iat[3, "a"]
320
-
321
- def test_iat_multiindex_columns(self):
322
- arrays = [np.array(["bar", "bar", "baz", "baz"]), np.array(["one", "two", "one", "two"])]
323
-
324
- pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
325
- psdf = ps.from_pandas(pdf)
326
-
327
- self.assert_eq(psdf.iat[1, 3], pdf.iat[1, 3])
328
-
329
- with self.assertRaises(KeyError):
330
- psdf.iat[0, 99]
331
- with self.assertRaises(KeyError):
332
- psdf.iat[99, 0]
333
-
334
- def test_loc(self):
335
- psdf = self.psdf
336
- pdf = self.pdf
337
-
338
- self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
339
- self.assert_eq(psdf.loc[3:8], pdf.loc[3:8])
340
- self.assert_eq(psdf.loc[:8], pdf.loc[:8])
341
- self.assert_eq(psdf.loc[3:], pdf.loc[3:])
342
- self.assert_eq(psdf.loc[[5]], pdf.loc[[5]])
343
- self.assert_eq(psdf.loc[:], pdf.loc[:])
344
-
345
- # TODO?: self.assert_eq(psdf.loc[[3, 4, 1, 8]], pdf.loc[[3, 4, 1, 8]])
346
- # TODO?: self.assert_eq(psdf.loc[[3, 4, 1, 9]], pdf.loc[[3, 4, 1, 9]])
347
- # TODO?: self.assert_eq(psdf.loc[np.array([3, 4, 1, 9])], pdf.loc[np.array([3, 4, 1, 9])])
348
-
349
- self.assert_eq(psdf.a.loc[5:5], pdf.a.loc[5:5])
350
- self.assert_eq(psdf.a.loc[3:8], pdf.a.loc[3:8])
351
- self.assert_eq(psdf.a.loc[:8], pdf.a.loc[:8])
352
- self.assert_eq(psdf.a.loc[3:], pdf.a.loc[3:])
353
- self.assert_eq(psdf.a.loc[[5]], pdf.a.loc[[5]])
354
-
355
- # TODO?: self.assert_eq(psdf.a.loc[[3, 4, 1, 8]], pdf.a.loc[[3, 4, 1, 8]])
356
- # TODO?: self.assert_eq(psdf.a.loc[[3, 4, 1, 9]], pdf.a.loc[[3, 4, 1, 9]])
357
- # TODO?: self.assert_eq(psdf.a.loc[np.array([3, 4, 1, 9])],
358
- # pdf.a.loc[np.array([3, 4, 1, 9])])
359
-
360
- self.assert_eq(psdf.a.loc[[]], pdf.a.loc[[]])
361
- self.assert_eq(psdf.a.loc[np.array([])], pdf.a.loc[np.array([])])
362
-
363
- self.assert_eq(psdf.loc[1000:], pdf.loc[1000:])
364
- self.assert_eq(psdf.loc[-2000:-1000], pdf.loc[-2000:-1000])
365
-
366
- self.assert_eq(psdf.loc[5], pdf.loc[5])
367
- self.assert_eq(psdf.loc[9], pdf.loc[9])
368
- self.assert_eq(psdf.a.loc[5], pdf.a.loc[5])
369
- self.assert_eq(psdf.a.loc[9], pdf.a.loc[9])
370
-
371
- self.assertRaises(KeyError, lambda: psdf.loc[10])
372
- self.assertRaises(KeyError, lambda: psdf.a.loc[10])
373
-
374
- # monotonically increasing index test
375
- pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, index=[0, 1, 1, 2, 2, 2, 4, 5, 6])
376
- psdf = ps.from_pandas(pdf)
377
-
378
- self.assert_eq(psdf.loc[:2], pdf.loc[:2])
379
- self.assert_eq(psdf.loc[:3], pdf.loc[:3])
380
- self.assert_eq(psdf.loc[3:], pdf.loc[3:])
381
- self.assert_eq(psdf.loc[4:], pdf.loc[4:])
382
- self.assert_eq(psdf.loc[3:2], pdf.loc[3:2])
383
- self.assert_eq(psdf.loc[-1:2], pdf.loc[-1:2])
384
- self.assert_eq(psdf.loc[3:10], pdf.loc[3:10])
385
-
386
- # monotonically decreasing index test
387
- pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, index=[6, 5, 5, 4, 4, 4, 2, 1, 0])
388
- psdf = ps.from_pandas(pdf)
389
-
390
- self.assert_eq(psdf.loc[:4], pdf.loc[:4])
391
- self.assert_eq(psdf.loc[:3], pdf.loc[:3])
392
- self.assert_eq(psdf.loc[3:], pdf.loc[3:])
393
- self.assert_eq(psdf.loc[2:], pdf.loc[2:])
394
- self.assert_eq(psdf.loc[2:3], pdf.loc[2:3])
395
- self.assert_eq(psdf.loc[2:-1], pdf.loc[2:-1])
396
- self.assert_eq(psdf.loc[10:3], pdf.loc[10:3])
397
-
398
- # test when type of key is string and given value is not included in key
399
- pdf = pd.DataFrame({"a": [1, 2, 3]}, index=["a", "b", "d"])
400
- psdf = ps.from_pandas(pdf)
401
-
402
- self.assert_eq(psdf.loc["a":"z"], pdf.loc["a":"z"])
403
-
404
- # KeyError when index is not monotonic increasing or decreasing
405
- # and specified values don't exist in index
406
- psdf = ps.DataFrame([[1, 2], [4, 5], [7, 8]], index=["cobra", "viper", "sidewinder"])
407
-
408
- self.assertRaises(KeyError, lambda: psdf.loc["cobra":"koalas"])
409
- self.assertRaises(KeyError, lambda: psdf.loc["koalas":"viper"])
410
-
411
- psdf = ps.DataFrame([[1, 2], [4, 5], [7, 8]], index=[10, 30, 20])
412
-
413
- self.assertRaises(KeyError, lambda: psdf.loc[0:30])
414
- self.assertRaises(KeyError, lambda: psdf.loc[10:100])
415
-
416
- def test_loc_getitem_boolean_series(self):
417
- pdf = pd.DataFrame(
418
- {"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]}, index=[20, 10, 30, 0, 50]
419
- )
420
- psdf = ps.from_pandas(pdf)
421
- self.assert_eq(pdf.A.loc[pdf.B > 200], psdf.A.loc[psdf.B > 200])
422
- self.assert_eq(pdf.B.loc[pdf.B > 200], psdf.B.loc[psdf.B > 200])
423
- self.assert_eq(pdf.loc[pdf.B > 200], psdf.loc[psdf.B > 200])
424
-
425
- def test_loc_non_informative_index(self):
426
- pdf = pd.DataFrame({"x": [1, 2, 3, 4]}, index=[10, 20, 30, 40])
427
- psdf = ps.from_pandas(pdf)
428
-
429
- self.assert_eq(psdf.loc[20:30], pdf.loc[20:30])
430
-
431
- pdf = pd.DataFrame({"x": [1, 2, 3, 4]}, index=[10, 20, 20, 40])
432
- psdf = ps.from_pandas(pdf)
433
- self.assert_eq(psdf.loc[20:20], pdf.loc[20:20])
434
-
435
- def test_loc_with_series(self):
436
- psdf = self.psdf
437
- pdf = self.pdf
438
-
439
- self.assert_eq(psdf.loc[psdf.a % 2 == 0], pdf.loc[pdf.a % 2 == 0])
440
- self.assert_eq(psdf.loc[psdf.a % 2 == 0, "a"], pdf.loc[pdf.a % 2 == 0, "a"])
441
- self.assert_eq(psdf.loc[psdf.a % 2 == 0, ["a"]], pdf.loc[pdf.a % 2 == 0, ["a"]])
442
- self.assert_eq(psdf.a.loc[psdf.a % 2 == 0], pdf.a.loc[pdf.a % 2 == 0])
443
-
444
- self.assert_eq(psdf.loc[psdf.copy().a % 2 == 0], pdf.loc[pdf.copy().a % 2 == 0])
445
- self.assert_eq(psdf.loc[psdf.copy().a % 2 == 0, "a"], pdf.loc[pdf.copy().a % 2 == 0, "a"])
446
- self.assert_eq(
447
- psdf.loc[psdf.copy().a % 2 == 0, ["a"]], pdf.loc[pdf.copy().a % 2 == 0, ["a"]]
448
- )
449
- self.assert_eq(psdf.a.loc[psdf.copy().a % 2 == 0], pdf.a.loc[pdf.copy().a % 2 == 0])
450
-
451
- def test_loc_noindex(self):
452
- psdf = self.psdf
453
- psdf = psdf.reset_index()
454
- pdf = self.pdf
455
- pdf = pdf.reset_index()
456
-
457
- self.assert_eq(psdf[["a"]], pdf[["a"]])
458
-
459
- self.assert_eq(psdf.loc[:], pdf.loc[:])
460
- self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
461
-
462
- def test_loc_multiindex(self):
463
- psdf = self.psdf
464
- psdf = psdf.set_index("b", append=True)
465
- pdf = self.pdf
466
- pdf = pdf.set_index("b", append=True)
467
-
468
- self.assert_eq(psdf.loc[:], pdf.loc[:])
469
- self.assert_eq(psdf.loc[5:5], pdf.loc[5:5])
470
- self.assert_eq(psdf.loc[5:9], pdf.loc[5:9])
471
-
472
- self.assert_eq(psdf.loc[5], pdf.loc[5])
473
- self.assert_eq(psdf.loc[9], pdf.loc[9])
474
- # TODO: self.assert_eq(psdf.loc[(5, 3)], pdf.loc[(5, 3)])
475
- # TODO: self.assert_eq(psdf.loc[(9, 0)], pdf.loc[(9, 0)])
476
- self.assert_eq(psdf.a.loc[5], pdf.a.loc[5])
477
- self.assert_eq(psdf.a.loc[9], pdf.a.loc[9])
478
- self.assertTrue((psdf.a.loc[(5, 3)] == pdf.a.loc[(5, 3)]).all())
479
- self.assert_eq(psdf.a.loc[(9, 0)], pdf.a.loc[(9, 0)])
480
-
481
- # monotonically increasing index test
482
- pdf = pd.DataFrame(
483
- {"a": [1, 2, 3, 4, 5]},
484
- index=pd.MultiIndex.from_tuples(
485
- [("x", "a"), ("x", "b"), ("y", "c"), ("y", "d"), ("z", "e")]
486
- ),
487
- )
488
- psdf = ps.from_pandas(pdf)
489
-
490
- for rows_sel in [
491
- slice(None),
492
- slice("y", None),
493
- slice(None, "y"),
494
- slice(("x", "b"), None),
495
- slice(None, ("y", "c")),
496
- slice(("x", "b"), ("y", "c")),
497
- slice("x", ("y", "c")),
498
- slice(("x", "b"), "y"),
499
- ]:
500
- with self.subTest("monotonically increasing", rows_sel=rows_sel):
501
- self.assert_eq(psdf.loc[rows_sel], pdf.loc[rows_sel])
502
- self.assert_eq(psdf.a.loc[rows_sel], pdf.a.loc[rows_sel])
503
-
504
- # monotonically increasing first index test
505
- pdf = pd.DataFrame(
506
- {"a": [1, 2, 3, 4, 5]},
507
- index=pd.MultiIndex.from_tuples(
508
- [("x", "a"), ("x", "b"), ("y", "c"), ("y", "a"), ("z", "e")]
509
- ),
510
- )
511
- psdf = ps.from_pandas(pdf)
512
-
513
- for rows_sel in [
514
- slice(None),
515
- slice("y", None),
516
- slice(None, "y"),
517
- ]:
518
- with self.subTest("monotonically increasing first index", rows_sel=rows_sel):
519
- self.assert_eq(psdf.loc[rows_sel], pdf.loc[rows_sel])
520
- self.assert_eq(psdf.a.loc[rows_sel], pdf.a.loc[rows_sel])
521
-
522
- for rows_sel in [
523
- slice(("x", "b"), None),
524
- slice(None, ("y", "c")),
525
- slice(("x", "b"), ("y", "c")),
526
- slice("x", ("y", "c")),
527
- slice(("x", "b"), "y"),
528
- ]:
529
- with self.subTest("monotonically increasing first index", rows_sel=rows_sel):
530
- self.assertRaises(KeyError, lambda: psdf.loc[rows_sel])
531
- self.assertRaises(KeyError, lambda: psdf.a.loc[rows_sel])
532
-
533
- # not monotonically increasing index test
534
- pdf = pd.DataFrame(
535
- {"a": [1, 2, 3, 4, 5]},
536
- index=pd.MultiIndex.from_tuples(
537
- [("z", "e"), ("y", "d"), ("y", "c"), ("x", "b"), ("x", "a")]
538
- ),
539
- )
540
- psdf = ps.from_pandas(pdf)
541
-
542
- for rows_sel in [
543
- slice("y", None),
544
- slice(None, "y"),
545
- slice(("x", "b"), None),
546
- slice(None, ("y", "c")),
547
- slice(("x", "b"), ("y", "c")),
548
- slice("x", ("y", "c")),
549
- slice(("x", "b"), "y"),
550
- ]:
551
- with self.subTest("monotonically decreasing", rows_sel=rows_sel):
552
- self.assertRaises(KeyError, lambda: psdf.loc[rows_sel])
553
- self.assertRaises(KeyError, lambda: psdf.a.loc[rows_sel])
554
-
555
- def test_loc2d_multiindex(self):
556
- psdf = self.psdf
557
- psdf = psdf.set_index("b", append=True)
558
- pdf = self.pdf
559
- pdf = pdf.set_index("b", append=True)
560
-
561
- self.assert_eq(psdf.loc[:, :], pdf.loc[:, :])
562
- self.assert_eq(psdf.loc[:, "a"], pdf.loc[:, "a"])
563
- self.assert_eq(psdf.loc[5:5, "a"], pdf.loc[5:5, "a"])
564
-
565
- self.assert_eq(psdf.loc[:, "a":"a"], pdf.loc[:, "a":"a"])
566
- self.assert_eq(psdf.loc[:, "a":"c"], pdf.loc[:, "a":"c"])
567
- self.assert_eq(psdf.loc[:, "b":"c"], pdf.loc[:, "b":"c"])
568
-
569
- def test_loc2d(self):
570
- psdf = self.psdf
571
- pdf = self.pdf
572
-
573
- # index indexer is always regarded as slice for duplicated values
574
- self.assert_eq(psdf.loc[5:5, "a"], pdf.loc[5:5, "a"])
575
- self.assert_eq(psdf.loc[[5], "a"], pdf.loc[[5], "a"])
576
- self.assert_eq(psdf.loc[5:5, ["a"]], pdf.loc[5:5, ["a"]])
577
- self.assert_eq(psdf.loc[[5], ["a"]], pdf.loc[[5], ["a"]])
578
- self.assert_eq(psdf.loc[:, :], pdf.loc[:, :])
579
-
580
- self.assert_eq(psdf.loc[3:8, "a"], pdf.loc[3:8, "a"])
581
- self.assert_eq(psdf.loc[:8, "a"], pdf.loc[:8, "a"])
582
- self.assert_eq(psdf.loc[3:, "a"], pdf.loc[3:, "a"])
583
- self.assert_eq(psdf.loc[[8], "a"], pdf.loc[[8], "a"])
584
-
585
- self.assert_eq(psdf.loc[3:8, ["a"]], pdf.loc[3:8, ["a"]])
586
- self.assert_eq(psdf.loc[:8, ["a"]], pdf.loc[:8, ["a"]])
587
- self.assert_eq(psdf.loc[3:, ["a"]], pdf.loc[3:, ["a"]])
588
- # TODO?: self.assert_eq(psdf.loc[[3, 4, 3], ['a']], pdf.loc[[3, 4, 3], ['a']])
589
-
590
- self.assertRaises(SparkPandasIndexingError, lambda: psdf.loc[3, 3, 3])
591
- self.assertRaises(SparkPandasIndexingError, lambda: psdf.a.loc[3, 3])
592
- self.assertRaises(SparkPandasIndexingError, lambda: psdf.a.loc[3:, 3])
593
- self.assertRaises(SparkPandasIndexingError, lambda: psdf.a.loc[psdf.a % 2 == 0, 3])
594
-
595
- self.assert_eq(psdf.loc[5, "a"], pdf.loc[5, "a"])
596
- self.assert_eq(psdf.loc[9, "a"], pdf.loc[9, "a"])
597
- self.assert_eq(psdf.loc[5, ["a"]], pdf.loc[5, ["a"]])
598
- self.assert_eq(psdf.loc[9, ["a"]], pdf.loc[9, ["a"]])
599
-
600
- self.assert_eq(psdf.loc[:, "a":"a"], pdf.loc[:, "a":"a"])
601
- self.assert_eq(psdf.loc[:, "a":"d"], pdf.loc[:, "a":"d"])
602
- self.assert_eq(psdf.loc[:, "c":"d"], pdf.loc[:, "c":"d"])
603
-
604
- # bool list-like column select
605
- bool_list = [True, False]
606
- self.assert_eq(psdf.loc[:, bool_list], pdf.loc[:, bool_list])
607
- self.assert_eq(psdf.loc[:, np.array(bool_list)], pdf.loc[:, np.array(bool_list)])
608
-
609
- pser = pd.Series(bool_list, index=pdf.columns)
610
- self.assert_eq(psdf.loc[:, pser], pdf.loc[:, pser])
611
- pser = pd.Series(list(reversed(bool_list)), index=list(reversed(pdf.columns)))
612
- self.assert_eq(psdf.loc[:, pser], pdf.loc[:, pser])
613
-
614
- self.assertRaises(IndexError, lambda: psdf.loc[:, bool_list[:-1]])
615
- self.assertRaises(IndexError, lambda: psdf.loc[:, np.array(bool_list + [True])])
616
- self.assertRaises(SparkPandasIndexingError, lambda: psdf.loc[:, pd.Series(bool_list)])
617
-
618
- # non-string column names
619
- psdf = self.psdf2
620
- pdf = self.pdf2
621
-
622
- self.assert_eq(psdf.loc[5:5, 0], pdf.loc[5:5, 0])
623
- self.assert_eq(psdf.loc[5:5, [0]], pdf.loc[5:5, [0]])
624
- self.assert_eq(psdf.loc[3:8, 0], pdf.loc[3:8, 0])
625
- self.assert_eq(psdf.loc[3:8, [0]], pdf.loc[3:8, [0]])
626
-
627
- self.assert_eq(psdf.loc[:, 0:0], pdf.loc[:, 0:0])
628
- self.assert_eq(psdf.loc[:, 0:3], pdf.loc[:, 0:3])
629
- self.assert_eq(psdf.loc[:, 2:3], pdf.loc[:, 2:3])
630
-
631
- def test_loc2d_multiindex_columns(self):
632
- arrays = [np.array(["bar", "bar", "baz", "baz"]), np.array(["one", "two", "one", "two"])]
633
-
634
- pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
635
- psdf = ps.from_pandas(pdf)
636
-
637
- self.assert_eq(psdf.loc["B":"B", "bar"], pdf.loc["B":"B", "bar"])
638
- self.assert_eq(psdf.loc["B":"B", ["bar"]], pdf.loc["B":"B", ["bar"]])
639
-
640
- self.assert_eq(psdf.loc[:, "bar":"bar"], pdf.loc[:, "bar":"bar"])
641
- self.assert_eq(psdf.loc[:, "bar":("baz", "one")], pdf.loc[:, "bar":("baz", "one")])
642
- self.assert_eq(
643
- psdf.loc[:, ("bar", "two"):("baz", "one")], pdf.loc[:, ("bar", "two"):("baz", "one")]
644
- )
645
- self.assert_eq(psdf.loc[:, ("bar", "two"):"bar"], pdf.loc[:, ("bar", "two"):"bar"])
646
- self.assert_eq(psdf.loc[:, "a":"bax"], pdf.loc[:, "a":"bax"])
647
- self.assert_eq(
648
- psdf.loc[:, ("bar", "x"):("baz", "a")],
649
- pdf.loc[:, ("bar", "x"):("baz", "a")],
650
- almost=True,
651
- )
652
-
653
- pdf = pd.DataFrame(
654
- np.random.randn(3, 4),
655
- index=["A", "B", "C"],
656
- columns=pd.MultiIndex.from_tuples(
657
- [("bar", "two"), ("bar", "one"), ("baz", "one"), ("baz", "two")]
658
- ),
659
- )
660
- psdf = ps.from_pandas(pdf)
661
-
662
- self.assert_eq(psdf.loc[:, "bar":"baz"], pdf.loc[:, "bar":"baz"])
663
-
664
- self.assertRaises(KeyError, lambda: psdf.loc[:, "bar":("baz", "one")])
665
- self.assertRaises(KeyError, lambda: psdf.loc[:, ("bar", "two"):"bar"])
666
-
667
- # bool list-like column select
668
- bool_list = [True, False, True, False]
669
- self.assert_eq(psdf.loc[:, bool_list], pdf.loc[:, bool_list])
670
- self.assert_eq(psdf.loc[:, np.array(bool_list)], pdf.loc[:, np.array(bool_list)])
671
-
672
- pser = pd.Series(bool_list, index=pdf.columns)
673
- self.assert_eq(psdf.loc[:, pser], pdf.loc[:, pser])
674
-
675
- pser = pd.Series(list(reversed(bool_list)), index=list(reversed(pdf.columns)))
676
- self.assert_eq(psdf.loc[:, pser], pdf.loc[:, pser])
677
-
678
- # non-string column names
679
- arrays = [np.array([0, 0, 1, 1]), np.array([1, 2, 1, 2])]
680
-
681
- pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
682
- psdf = ps.from_pandas(pdf)
683
-
684
- self.assert_eq(psdf.loc["B":"B", 0], pdf.loc["B":"B", 0])
685
- self.assert_eq(psdf.loc["B":"B", [0]], pdf.loc["B":"B", [0]])
686
- self.assert_eq(psdf.loc[:, 0:0], pdf.loc[:, 0:0])
687
- self.assert_eq(psdf.loc[:, 0:(1, 1)], pdf.loc[:, 0:(1, 1)])
688
- self.assert_eq(psdf.loc[:, (0, 2):(1, 1)], pdf.loc[:, (0, 2):(1, 1)])
689
- self.assert_eq(psdf.loc[:, (0, 2):0], pdf.loc[:, (0, 2):0])
690
- self.assert_eq(psdf.loc[:, -1:2], pdf.loc[:, -1:2])
691
-
692
- def test_loc2d_with_known_divisions(self):
693
- pdf = pd.DataFrame(
694
- np.random.randn(20, 5), index=list("abcdefghijklmnopqrst"), columns=list("ABCDE")
695
- )
696
- psdf = ps.from_pandas(pdf)
697
-
698
- self.assert_eq(psdf.loc[["a"], "A"], pdf.loc[["a"], "A"])
699
- self.assert_eq(psdf.loc[["a"], ["A"]], pdf.loc[["a"], ["A"]])
700
- self.assert_eq(psdf.loc["a":"o", "A"], pdf.loc["a":"o", "A"])
701
- self.assert_eq(psdf.loc["a":"o", ["A"]], pdf.loc["a":"o", ["A"]])
702
- self.assert_eq(psdf.loc[["n"], ["A"]], pdf.loc[["n"], ["A"]])
703
- self.assert_eq(psdf.loc[["a", "c", "n"], ["A"]], pdf.loc[["a", "c", "n"], ["A"]])
704
- # TODO?: self.assert_eq(psdf.loc[['t', 'b'], ['A']], pdf.loc[['t', 'b'], ['A']])
705
- # TODO?: self.assert_eq(psdf.loc[['r', 'r', 'c', 'g', 'h'], ['A']],
706
- # TODO?: pdf.loc[['r', 'r', 'c', 'g', 'h'], ['A']])
707
-
708
- @unittest.skip("TODO: should handle duplicated columns properly")
709
- def test_loc2d_duplicated_columns(self):
710
- pdf = pd.DataFrame(
711
- np.random.randn(20, 5), index=list("abcdefghijklmnopqrst"), columns=list("AABCD")
712
- )
713
- psdf = ps.from_pandas(pdf)
714
-
715
- # TODO?: self.assert_eq(psdf.loc[['a'], 'A'], pdf.loc[['a'], 'A'])
716
- # TODO?: self.assert_eq(psdf.loc[['a'], ['A']], pdf.loc[['a'], ['A']])
717
- self.assert_eq(psdf.loc[["j"], "B"], pdf.loc[["j"], "B"])
718
- self.assert_eq(psdf.loc[["j"], ["B"]], pdf.loc[["j"], ["B"]])
719
-
720
- # TODO?: self.assert_eq(psdf.loc['a':'o', 'A'], pdf.loc['a':'o', 'A'])
721
- # TODO?: self.assert_eq(psdf.loc['a':'o', ['A']], pdf.loc['a':'o', ['A']])
722
- self.assert_eq(psdf.loc["j":"q", "B"], pdf.loc["j":"q", "B"])
723
- self.assert_eq(psdf.loc["j":"q", ["B"]], pdf.loc["j":"q", ["B"]])
724
-
725
- # TODO?: self.assert_eq(psdf.loc['a':'o', 'B':'D'], pdf.loc['a':'o', 'B':'D'])
726
- # TODO?: self.assert_eq(psdf.loc['a':'o', 'B':'D'], pdf.loc['a':'o', 'B':'D'])
727
- # TODO?: self.assert_eq(psdf.loc['j':'q', 'B':'A'], pdf.loc['j':'q', 'B':'A'])
728
- # TODO?: self.assert_eq(psdf.loc['j':'q', 'B':'A'], pdf.loc['j':'q', 'B':'A'])
729
-
730
- self.assert_eq(psdf.loc[psdf.B > 0, "B"], pdf.loc[pdf.B > 0, "B"])
731
- # TODO?: self.assert_eq(psdf.loc[psdf.B > 0, ['A', 'C']], pdf.loc[pdf.B > 0, ['A', 'C']])
732
-
733
- def test_getitem(self):
734
- pdf = pd.DataFrame(
735
- {
736
- "A": [1, 2, 3, 4, 5, 6, 7, 8, 9],
737
- "B": [9, 8, 7, 6, 5, 4, 3, 2, 1],
738
- "C": [True, False, True] * 3,
739
- },
740
- columns=list("ABC"),
741
- )
742
- psdf = ps.from_pandas(pdf)
743
- self.assert_eq(psdf["A"], pdf["A"])
744
-
745
- self.assert_eq(psdf[["A", "B"]], pdf[["A", "B"]])
746
-
747
- self.assert_eq(psdf[psdf.C], pdf[pdf.C])
748
-
749
- self.assertRaises(KeyError, lambda: psdf["X"])
750
- self.assertRaises(KeyError, lambda: psdf[["A", "X"]])
751
- self.assertRaises(AttributeError, lambda: psdf.X)
752
-
753
- # not str/unicode
754
- # TODO?: pdf = pd.DataFrame(np.random.randn(10, 5))
755
- # TODO?: psdf = ps.from_pandas(pdf)
756
- # TODO?: self.assert_eq(psdf[0], pdf[0])
757
- # TODO?: self.assert_eq(psdf[[1, 2]], pdf[[1, 2]])
758
-
759
- # TODO?: self.assertRaises(KeyError, lambda: pdf[8])
760
- # TODO?: self.assertRaises(KeyError, lambda: pdf[[1, 8]])
761
-
762
- # non-string column names
763
- pdf = pd.DataFrame(
764
- {
765
- 10: [1, 2, 3, 4, 5, 6, 7, 8, 9],
766
- 20: [9, 8, 7, 6, 5, 4, 3, 2, 1],
767
- 30: [True, False, True] * 3,
768
- }
769
- )
770
- psdf = ps.from_pandas(pdf)
771
-
772
- self.assert_eq(psdf[10], pdf[10])
773
- self.assert_eq(psdf[[10, 20]], pdf[[10, 20]])
774
-
775
- def test_getitem_slice(self):
776
- pdf = pd.DataFrame(
777
- {
778
- "A": [1, 2, 3, 4, 5, 6, 7, 8, 9],
779
- "B": [9, 8, 7, 6, 5, 4, 3, 2, 1],
780
- "C": [True, False, True] * 3,
781
- },
782
- index=list("abcdefghi"),
783
- )
784
- psdf = ps.from_pandas(pdf)
785
- self.assert_eq(psdf["a":"e"], pdf["a":"e"])
786
- self.assert_eq(psdf["a":"b"], pdf["a":"b"])
787
- self.assert_eq(psdf["f":], pdf["f":])
788
-
789
- def test_loc_on_numpy_datetimes(self):
790
- pdf = pd.DataFrame(
791
- {"x": [1, 2, 3]}, index=list(map(np.datetime64, ["2014", "2015", "2016"]))
792
- )
793
- psdf = ps.from_pandas(pdf)
794
-
795
- self.assert_eq(psdf.loc["2014":"2015"], pdf.loc["2014":"2015"])
796
-
797
- def test_loc_on_pandas_datetimes(self):
798
- pdf = pd.DataFrame(
799
- {"x": [1, 2, 3]}, index=list(map(pd.Timestamp, ["2014", "2015", "2016"]))
800
- )
801
- psdf = ps.from_pandas(pdf)
802
-
803
- self.assert_eq(psdf.loc["2014":"2015"], pdf.loc["2014":"2015"])
804
-
805
- @unittest.skip("TODO?: the behavior of slice for datetime")
806
- def test_loc_datetime_no_freq(self):
807
- datetime_index = pd.date_range("2016-01-01", "2016-01-31", freq="12h")
808
- datetime_index.freq = None # FORGET FREQUENCY
809
- pdf = pd.DataFrame({"num": range(len(datetime_index))}, index=datetime_index)
810
- psdf = ps.from_pandas(pdf)
811
-
812
- slice_ = slice("2016-01-03", "2016-01-05")
813
- result = psdf.loc[slice_, :]
814
- expected = pdf.loc[slice_, :]
815
- self.assert_eq(result, expected)
816
-
817
- @unittest.skip("TODO?: the behavior of slice for datetime")
818
- def test_loc_timestamp_str(self):
819
- pdf = pd.DataFrame(
820
- {"A": np.random.randn(100), "B": np.random.randn(100)},
821
- index=pd.date_range("2011-01-01", freq="H", periods=100),
822
- )
823
- psdf = ps.from_pandas(pdf)
824
-
825
- # partial string slice
826
- # TODO?: self.assert_eq(pdf.loc['2011-01-02'],
827
- # TODO?: psdf.loc['2011-01-02'])
828
- self.assert_eq(pdf.loc["2011-01-02":"2011-01-05"], psdf.loc["2011-01-02":"2011-01-05"])
829
-
830
- # series
831
- # TODO?: self.assert_eq(pdf.A.loc['2011-01-02'],
832
- # TODO?: psdf.A.loc['2011-01-02'])
833
- self.assert_eq(pdf.A.loc["2011-01-02":"2011-01-05"], psdf.A.loc["2011-01-02":"2011-01-05"])
834
-
835
- pdf = pd.DataFrame(
836
- {"A": np.random.randn(100), "B": np.random.randn(100)},
837
- index=pd.date_range("2011-01-01", freq="M", periods=100),
838
- )
839
- psdf = ps.from_pandas(pdf)
840
- # TODO?: self.assert_eq(pdf.loc['2011-01'], psdf.loc['2011-01'])
841
- # TODO?: self.assert_eq(pdf.loc['2011'], psdf.loc['2011'])
842
-
843
- self.assert_eq(pdf.loc["2011-01":"2012-05"], psdf.loc["2011-01":"2012-05"])
844
- self.assert_eq(pdf.loc["2011":"2015"], psdf.loc["2011":"2015"])
845
-
846
- # series
847
- # TODO?: self.assert_eq(pdf.B.loc['2011-01'], psdf.B.loc['2011-01'])
848
- # TODO?: self.assert_eq(pdf.B.loc['2011'], psdf.B.loc['2011'])
849
-
850
- self.assert_eq(pdf.B.loc["2011-01":"2012-05"], psdf.B.loc["2011-01":"2012-05"])
851
- self.assert_eq(pdf.B.loc["2011":"2015"], psdf.B.loc["2011":"2015"])
852
-
853
- @unittest.skip("TODO?: the behavior of slice for datetime")
854
- def test_getitem_timestamp_str(self):
855
- pdf = pd.DataFrame(
856
- {"A": np.random.randn(100), "B": np.random.randn(100)},
857
- index=pd.date_range("2011-01-01", freq="H", periods=100),
858
- )
859
- psdf = ps.from_pandas(pdf)
860
-
861
- # partial string slice
862
- # TODO?: self.assert_eq(pdf['2011-01-02'],
863
- # TODO?: psdf['2011-01-02'])
864
- self.assert_eq(pdf["2011-01-02":"2011-01-05"], psdf["2011-01-02":"2011-01-05"])
865
-
866
- pdf = pd.DataFrame(
867
- {"A": np.random.randn(100), "B": np.random.randn(100)},
868
- index=pd.date_range("2011-01-01", freq="M", periods=100),
869
- )
870
- psdf = ps.from_pandas(pdf)
871
-
872
- # TODO?: self.assert_eq(pdf['2011-01'], psdf['2011-01'])
873
- # TODO?: self.assert_eq(pdf['2011'], psdf['2011'])
874
-
875
- self.assert_eq(pdf["2011-01":"2012-05"], psdf["2011-01":"2012-05"])
876
- self.assert_eq(pdf["2011":"2015"], psdf["2011":"2015"])
877
-
878
- @unittest.skip("TODO?: period index can't convert to DataFrame correctly")
879
- def test_getitem_period_str(self):
880
- pdf = pd.DataFrame(
881
- {"A": np.random.randn(100), "B": np.random.randn(100)},
882
- index=pd.period_range("2011-01-01", freq="H", periods=100),
883
- )
884
- psdf = ps.from_pandas(pdf)
885
-
886
- # partial string slice
887
- # TODO?: self.assert_eq(pdf['2011-01-02'],
888
- # TODO?: psdf['2011-01-02'])
889
- self.assert_eq(pdf["2011-01-02":"2011-01-05"], psdf["2011-01-02":"2011-01-05"])
890
-
891
- pdf = pd.DataFrame(
892
- {"A": np.random.randn(100), "B": np.random.randn(100)},
893
- index=pd.period_range("2011-01-01", freq="M", periods=100),
894
- )
895
- psdf = ps.from_pandas(pdf)
896
-
897
- # TODO?: self.assert_eq(pdf['2011-01'], psdf['2011-01'])
898
- # TODO?: self.assert_eq(pdf['2011'], psdf['2011'])
899
-
900
- self.assert_eq(pdf["2011-01":"2012-05"], psdf["2011-01":"2012-05"])
901
- self.assert_eq(pdf["2011":"2015"], psdf["2011":"2015"])
902
-
903
- def test_iloc(self):
904
- pdf = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
905
- psdf = ps.from_pandas(pdf)
906
-
907
- self.assert_eq(psdf.iloc[0, 0], pdf.iloc[0, 0])
908
- for indexer in [0, [0], [0, 1], [1, 0], [False, True, True], slice(0, 1)]:
909
- self.assert_eq(psdf.iloc[:, indexer], pdf.iloc[:, indexer])
910
- self.assert_eq(psdf.iloc[:1, indexer], pdf.iloc[:1, indexer])
911
- self.assert_eq(psdf.iloc[:-1, indexer], pdf.iloc[:-1, indexer])
912
- # self.assert_eq(psdf.iloc[psdf.index == 2, indexer], pdf.iloc[pdf.index == 2, indexer])
913
-
914
- def test_iloc_multiindex_columns(self):
915
- arrays = [np.array(["bar", "bar", "baz", "baz"]), np.array(["one", "two", "one", "two"])]
916
-
917
- pdf = pd.DataFrame(np.random.randn(3, 4), index=["A", "B", "C"], columns=arrays)
918
- psdf = ps.from_pandas(pdf)
919
-
920
- for indexer in [0, [0], [0, 1], [1, 0], [False, True, True, True], slice(0, 1)]:
921
- self.assert_eq(psdf.iloc[:, indexer], pdf.iloc[:, indexer])
922
- self.assert_eq(psdf.iloc[:1, indexer], pdf.iloc[:1, indexer])
923
- self.assert_eq(psdf.iloc[:-1, indexer], pdf.iloc[:-1, indexer])
924
- # self.assert_eq(psdf.iloc[psdf.index == "B", indexer],
925
- # pdf.iloc[pdf.index == "B", indexer])
926
-
927
- def test_iloc_series(self):
928
- pser = pd.Series([1, 2, 3])
929
- psser = ps.from_pandas(pser)
930
-
931
- self.assert_eq(psser.iloc[0], pser.iloc[0])
932
- self.assert_eq(psser.iloc[:], pser.iloc[:])
933
- self.assert_eq(psser.iloc[:1], pser.iloc[:1])
934
- self.assert_eq(psser.iloc[:-1], pser.iloc[:-1])
935
-
936
- self.assert_eq((psser + 1).iloc[0], (pser + 1).iloc[0])
937
- self.assert_eq((psser + 1).iloc[:], (pser + 1).iloc[:])
938
- self.assert_eq((psser + 1).iloc[:1], (pser + 1).iloc[:1])
939
- self.assert_eq((psser + 1).iloc[:-1], (pser + 1).iloc[:-1])
940
-
941
- def test_iloc_slice_rows_sel(self):
942
- pdf = pd.DataFrame({"A": [1, 2] * 5, "B": [3, 4] * 5, "C": [5, 6] * 5})
943
- psdf = ps.from_pandas(pdf)
944
-
945
- for rows_sel in [
946
- slice(None),
947
- slice(0, 1),
948
- slice(1, 2),
949
- slice(-3, None),
950
- slice(None, -3),
951
- slice(None, 0),
952
- slice(None, None, 3),
953
- slice(3, 8, 2),
954
- slice(None, None, -2),
955
- slice(8, 3, -2),
956
- slice(8, None, -2),
957
- slice(None, 3, -2),
958
- ]:
959
- with self.subTest(rows_sel=rows_sel):
960
- self.assert_eq(psdf.iloc[rows_sel].sort_index(), pdf.iloc[rows_sel].sort_index())
961
- self.assert_eq(
962
- psdf.A.iloc[rows_sel].sort_index(), pdf.A.iloc[rows_sel].sort_index()
963
- )
964
- self.assert_eq(
965
- (psdf.A + 1).iloc[rows_sel].sort_index(),
966
- (pdf.A + 1).iloc[rows_sel].sort_index(),
967
- )
968
-
969
- def test_iloc_iterable_rows_sel(self):
970
- pdf = pd.DataFrame({"A": [1, 2] * 5, "B": [3, 4] * 5, "C": [5, 6] * 5})
971
- psdf = ps.from_pandas(pdf)
972
-
973
- for rows_sel in [
974
- [],
975
- np.array([0, 1]),
976
- [1, 2],
977
- np.array([-3]),
978
- [3],
979
- np.array([-2]),
980
- [8, 3, -5],
981
- ]:
982
- with self.subTest(rows_sel=rows_sel):
983
- self.assert_eq(psdf.iloc[rows_sel].sort_index(), pdf.iloc[rows_sel].sort_index())
984
- self.assert_eq(
985
- psdf.A.iloc[rows_sel].sort_index(), pdf.A.iloc[rows_sel].sort_index()
986
- )
987
- self.assert_eq(
988
- (psdf.A + 1).iloc[rows_sel].sort_index(),
989
- (pdf.A + 1).iloc[rows_sel].sort_index(),
990
- )
991
-
992
- with self.subTest(rows_sel=rows_sel):
993
- self.assert_eq(
994
- psdf.iloc[rows_sel, :].sort_index(), pdf.iloc[rows_sel, :].sort_index()
995
- )
996
-
997
- with self.subTest(rows_sel=rows_sel):
998
- self.assert_eq(
999
- psdf.iloc[rows_sel, :1].sort_index(), pdf.iloc[rows_sel, :1].sort_index()
1000
- )
1001
-
1002
- def test_frame_loc_setitem(self):
1003
- pdf = pd.DataFrame(
1004
- [[1, 2], [4, 5], [7, 8]],
1005
- index=["cobra", "viper", "sidewinder"],
1006
- columns=["max_speed", "shield"],
1007
- )
1008
- psdf = ps.from_pandas(pdf)
1009
-
1010
- pser1 = pdf.max_speed
1011
- pser2 = pdf.shield
1012
- psser1 = psdf.max_speed
1013
- psser2 = psdf.shield
1014
-
1015
- pdf.loc[["viper", "sidewinder"], ["shield", "max_speed"]] = 10
1016
- psdf.loc[["viper", "sidewinder"], ["shield", "max_speed"]] = 10
1017
- self.assert_eq(psdf, pdf)
1018
- self.assert_eq(psser1, pser1)
1019
- self.assert_eq(psser2, pser2)
1020
-
1021
- pdf.loc[["viper", "sidewinder"], "shield"] = 50
1022
- psdf.loc[["viper", "sidewinder"], "shield"] = 50
1023
- self.assert_eq(psdf, pdf)
1024
- self.assert_eq(psser1, pser1)
1025
- self.assert_eq(psser2, pser2)
1026
-
1027
- pdf.loc["cobra", "max_speed"] = 30
1028
- psdf.loc["cobra", "max_speed"] = 30
1029
- self.assert_eq(psdf, pdf)
1030
- self.assert_eq(psser1, pser1)
1031
- self.assert_eq(psser2, pser2)
1032
-
1033
- pdf.loc[pdf.max_speed < 5, "max_speed"] = -pdf.max_speed
1034
- psdf.loc[psdf.max_speed < 5, "max_speed"] = -psdf.max_speed
1035
- self.assert_eq(psdf, pdf)
1036
- self.assert_eq(psser1, pser1)
1037
- self.assert_eq(psser2, pser2)
1038
-
1039
- pdf.loc[pdf.max_speed < 2, "max_speed"] = -pdf.max_speed
1040
- psdf.loc[psdf.max_speed < 2, "max_speed"] = -psdf.max_speed
1041
- self.assert_eq(psdf, pdf)
1042
- self.assert_eq(psser1, pser1)
1043
- self.assert_eq(psser2, pser2)
1044
-
1045
- pdf.loc[:, "min_speed"] = 0
1046
- psdf.loc[:, "min_speed"] = 0
1047
- self.assert_eq(psdf, pdf, almost=True)
1048
- self.assert_eq(psser1, pser1)
1049
- self.assert_eq(psser2, pser2)
1050
-
1051
- with self.assertRaisesRegex(ValueError, "Incompatible indexer with Series"):
1052
- psdf.loc["cobra", "max_speed"] = -psdf.max_speed
1053
- with self.assertRaisesRegex(ValueError, "shape mismatch"):
1054
- psdf.loc[:, ["shield", "max_speed"]] = -psdf.max_speed
1055
- with self.assertRaisesRegex(ValueError, "Only a dataframe with one column can be assigned"):
1056
- psdf.loc[:, "max_speed"] = psdf
1057
-
1058
- # multi-index columns
1059
- columns = pd.MultiIndex.from_tuples(
1060
- [("x", "max_speed"), ("x", "shield"), ("y", "min_speed")]
1061
- )
1062
- pdf.columns = columns
1063
- psdf.columns = columns
1064
-
1065
- pdf.loc[:, ("y", "shield")] = -pdf[("x", "shield")]
1066
- psdf.loc[:, ("y", "shield")] = -psdf[("x", "shield")]
1067
- self.assert_eq(psdf, pdf, almost=True)
1068
- self.assert_eq(psser1, pser1)
1069
- self.assert_eq(psser2, pser2)
1070
-
1071
- pdf.loc[:, "z"] = 100
1072
- psdf.loc[:, "z"] = 100
1073
- self.assert_eq(psdf, pdf, almost=True)
1074
- self.assert_eq(psser1, pser1)
1075
- self.assert_eq(psser2, pser2)
1076
-
1077
- with self.assertRaisesRegex(KeyError, "Key length \\(3\\) exceeds index depth \\(2\\)"):
1078
- psdf.loc[:, [("x", "max_speed", "foo")]] = -psdf[("x", "shield")]
1079
-
1080
- pdf = pd.DataFrame(
1081
- [[1], [4], [7]], index=["cobra", "viper", "sidewinder"], columns=["max_speed"]
1082
- )
1083
- psdf = ps.from_pandas(pdf)
1084
-
1085
- pdf.loc[:, "max_speed"] = pdf
1086
- psdf.loc[:, "max_speed"] = psdf
1087
- self.assert_eq(psdf, pdf)
1088
-
1089
- def test_frame_iloc_setitem(self):
1090
- pdf = pd.DataFrame(
1091
- [[1, 2], [4, 5], [7, 8]],
1092
- index=["cobra", "viper", "sidewinder"],
1093
- columns=["max_speed", "shield"],
1094
- )
1095
- psdf = ps.from_pandas(pdf)
1096
-
1097
- pdf.iloc[[1, 2], [1, 0]] = 10
1098
- psdf.iloc[[1, 2], [1, 0]] = 10
1099
- self.assert_eq(psdf, pdf)
1100
-
1101
- pdf.iloc[0, 1] = 50
1102
- psdf.iloc[0, 1] = 50
1103
- self.assert_eq(psdf, pdf)
1104
-
1105
- with self.assertRaisesRegex(ValueError, "setting an array element with a sequence."):
1106
- psdf.iloc[0, 0] = -psdf.max_speed
1107
- with self.assertRaisesRegex(ValueError, "shape mismatch"):
1108
- psdf.iloc[:, [1, 0]] = -psdf.max_speed
1109
- with self.assertRaisesRegex(ValueError, "Only a dataframe with one column can be assigned"):
1110
- psdf.iloc[:, 0] = psdf
1111
-
1112
- pdf = pd.DataFrame(
1113
- [[1], [4], [7]], index=["cobra", "viper", "sidewinder"], columns=["max_speed"]
1114
- )
1115
- psdf = ps.from_pandas(pdf)
1116
-
1117
- pdf.iloc[:, 0] = pdf
1118
- psdf.iloc[:, 0] = psdf
1119
- self.assert_eq(psdf, pdf)
1120
-
1121
- def test_series_loc_setitem(self):
1122
- pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
1123
- psdf = ps.from_pandas(pdf)
1124
-
1125
- pser = pdf.x
1126
- psery = pdf.y
1127
- psser = psdf.x
1128
- pssery = psdf.y
1129
-
1130
- pser.loc[pser % 2 == 1] = -pser
1131
- psser.loc[psser % 2 == 1] = -psser
1132
- self.assert_eq(psser, pser)
1133
- self.assert_eq(psdf, pdf)
1134
- self.assert_eq(pssery, psery)
1135
-
1136
- for key, value in [
1137
- (["viper", "sidewinder"], 10),
1138
- ("viper", 50),
1139
- (slice(None), 10),
1140
- (slice(None, "viper"), 20),
1141
- (slice("viper", None), 30),
1142
- ]:
1143
- with self.subTest(key=key, value=value):
1144
- pser.loc[key] = value
1145
- psser.loc[key] = value
1146
- self.assert_eq(psser, pser)
1147
- self.assert_eq(psdf, pdf)
1148
- self.assert_eq(pssery, psery)
1149
-
1150
- with self.assertRaises(ValueError):
1151
- psser.loc["viper"] = -psser
1152
-
1153
- # multiindex
1154
- pser = pd.Series(
1155
- [1, 2, 3],
1156
- index=pd.MultiIndex.from_tuples([("x", "cobra"), ("x", "viper"), ("y", "sidewinder")]),
1157
- )
1158
- psser = ps.from_pandas(pser)
1159
-
1160
- pser.loc["x"] = pser * 10
1161
- psser.loc["x"] = psser * 10
1162
- self.assert_eq(psser, pser)
1163
-
1164
- pser.loc["y"] = pser * 10
1165
- psser.loc["y"] = psser * 10
1166
- self.assert_eq(psser, pser)
1167
-
1168
- def test_series_iloc_setitem(self):
1169
- pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
1170
- psdf = ps.from_pandas(pdf)
1171
-
1172
- pser = pdf.x
1173
- psery = pdf.y
1174
- psser = psdf.x
1175
- pssery = psdf.y
1176
-
1177
- piloc = pser.iloc
1178
- kiloc = psser.iloc
1179
-
1180
- pser1 = pser + 1
1181
- psser1 = psser + 1
1182
-
1183
- for key, value in [
1184
- ([1, 2], 10),
1185
- (1, 50),
1186
- (slice(None), 10),
1187
- (slice(None, 1), 20),
1188
- (slice(1, None), 30),
1189
- ]:
1190
- with self.subTest(key=key, value=value):
1191
- pser.iloc[key] = value
1192
- psser.iloc[key] = value
1193
- self.assert_eq(psser, pser)
1194
- self.assert_eq(psdf, pdf)
1195
- self.assert_eq(pssery, psery)
1196
-
1197
- piloc[key] = -value
1198
- kiloc[key] = -value
1199
- self.assert_eq(psser, pser)
1200
- self.assert_eq(psdf, pdf)
1201
- self.assert_eq(pssery, psery)
1202
-
1203
- pser1.iloc[key] = value
1204
- psser1.iloc[key] = value
1205
- self.assert_eq(psser1, pser1)
1206
- self.assert_eq(psdf, pdf)
1207
- self.assert_eq(pssery, psery)
1208
-
1209
- with self.assertRaises(ValueError):
1210
- psser.iloc[1] = -psser
1211
-
1212
- pser = pd.Index([1, 2, 3]).to_series()
1213
- psser = ps.Index([1, 2, 3]).to_series()
1214
-
1215
- pser1 = pser + 1
1216
- psser1 = psser + 1
1217
-
1218
- pser.iloc[0] = 10
1219
- psser.iloc[0] = 10
1220
- self.assert_eq(psser, pser)
1221
-
1222
- pser1.iloc[0] = 20
1223
- psser1.iloc[0] = 20
1224
- self.assert_eq(psser1, pser1)
1225
-
1226
- pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
1227
- psdf = ps.from_pandas(pdf)
1228
-
1229
- pser = pdf.a
1230
- psser = psdf.a
1231
-
1232
- pser.iloc[[0, 1, 2]] = -pdf.b
1233
- psser.iloc[[0, 1, 2]] = -psdf.b
1234
- self.assert_eq(psser, pser)
1235
- self.assert_eq(psdf, pdf)
1236
-
1237
- with self.assertRaisesRegex(ValueError, "setting an array element with a sequence."):
1238
- psser.iloc[1] = psdf[["b"]]
1239
-
1240
- def test_iloc_raises(self):
1241
- pdf = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
1242
- psdf = ps.from_pandas(pdf)
1243
-
1244
- with self.assertRaisesRegex(SparkPandasIndexingError, "Only accepts pairs of candidates"):
1245
- psdf.iloc[[0, 1], [0, 1], [1, 2]]
1246
-
1247
- with self.assertRaisesRegex(SparkPandasIndexingError, "Too many indexers"):
1248
- psdf.A.iloc[[0, 1], [0, 1]]
1249
-
1250
- with self.assertRaisesRegex(TypeError, "cannot do slice indexing with these indexers"):
1251
- psdf.iloc[:"b", :]
1252
-
1253
- with self.assertRaisesRegex(TypeError, "cannot do slice indexing with these indexers"):
1254
- psdf.iloc[:, :"b"]
1255
-
1256
- with self.assertRaisesRegex(TypeError, "cannot perform reduce with flexible type"):
1257
- psdf.iloc[:, ["A"]]
1258
-
1259
- with self.assertRaisesRegex(ValueError, "Location based indexing can only have"):
1260
- psdf.iloc[:, "A"]
1261
-
1262
- with self.assertRaisesRegex(IndexError, "out of range"):
1263
- psdf.iloc[:, [5, 6]]
1264
-
1265
- def test_index_operator_datetime(self):
1266
- dates = pd.date_range("20130101", periods=6)
1267
- pdf = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
1268
- psdf = ps.from_pandas(pdf)
1269
-
1270
- # Positional iloc search
1271
- self.assert_eq(psdf[:4], pdf[:4], almost=True)
1272
- self.assert_eq(psdf[:3], pdf[:3], almost=True)
1273
- self.assert_eq(psdf[3:], pdf[3:], almost=True)
1274
- self.assert_eq(psdf[2:], pdf[2:], almost=True)
1275
- self.assert_eq(psdf[2:3], pdf[2:3], almost=True)
1276
- self.assert_eq(psdf[2:-1], pdf[2:-1], almost=True)
1277
- self.assert_eq(psdf[10:3], pdf[10:3], almost=True)
1278
-
1279
- # Index loc search
1280
- self.assert_eq(psdf.A[4], pdf.A[4])
1281
- self.assert_eq(psdf.A[3], pdf.A[3])
1282
-
1283
- # Positional iloc search
1284
- self.assert_eq(psdf.A[:4], pdf.A[:4], almost=True)
1285
- self.assert_eq(psdf.A[:3], pdf.A[:3], almost=True)
1286
- self.assert_eq(psdf.A[3:], pdf.A[3:], almost=True)
1287
- self.assert_eq(psdf.A[2:], pdf.A[2:], almost=True)
1288
- self.assert_eq(psdf.A[2:3], pdf.A[2:3], almost=True)
1289
- self.assert_eq(psdf.A[2:-1], pdf.A[2:-1], almost=True)
1290
- self.assert_eq(psdf.A[10:3], pdf.A[10:3], almost=True)
1291
-
1292
- dt1 = datetime.datetime.strptime("2013-01-02", "%Y-%m-%d")
1293
- dt2 = datetime.datetime.strptime("2013-01-04", "%Y-%m-%d")
1294
-
1295
- # Index loc search
1296
- self.assert_eq(psdf[:dt2], pdf[:dt2], almost=True)
1297
- self.assert_eq(psdf[dt1:], pdf[dt1:], almost=True)
1298
- self.assert_eq(psdf[dt1:dt2], pdf[dt1:dt2], almost=True)
1299
- self.assert_eq(psdf.A[dt2], pdf.A[dt2], almost=True)
1300
- self.assert_eq(psdf.A[:dt2], pdf.A[:dt2], almost=True)
1301
- self.assert_eq(psdf.A[dt1:], pdf.A[dt1:], almost=True)
1302
- self.assert_eq(psdf.A[dt1:dt2], pdf.A[dt1:dt2], almost=True)
1303
-
1304
- def test_index_operator_int(self):
1305
- pdf = pd.DataFrame(np.random.randn(6, 4), index=[1, 3, 5, 7, 9, 11], columns=list("ABCD"))
1306
- psdf = ps.from_pandas(pdf)
1307
-
1308
- # Positional iloc search
1309
- self.assert_eq(psdf[:4], pdf[:4])
1310
- self.assert_eq(psdf[:3], pdf[:3])
1311
- self.assert_eq(psdf[3:], pdf[3:])
1312
- self.assert_eq(psdf[2:], pdf[2:])
1313
- self.assert_eq(psdf[2:3], pdf[2:3])
1314
- self.assert_eq(psdf[2:-1], pdf[2:-1])
1315
- self.assert_eq(psdf[10:3], pdf[10:3])
1316
-
1317
- # Index loc search
1318
- self.assert_eq(psdf.A[5], pdf.A[5])
1319
- self.assert_eq(psdf.A[3], pdf.A[3])
1320
- with self.assertRaisesRegex(
1321
- NotImplementedError, "Duplicated row selection is not currently supported"
1322
- ):
1323
- psdf.iloc[[1, 1]]
1324
-
1325
-
1326
- class BasicIndexingTests(BasicIndexingTestsMixin, ComparisonTestBase):
1327
- pass
1328
-
1329
-
1330
- if __name__ == "__main__":
1331
- from pyspark.pandas.tests.test_indexing import * # noqa: F401
1332
-
1333
- try:
1334
- import xmlrunner
1335
-
1336
- testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
1337
- except ImportError:
1338
- testRunner = None
1339
- unittest.main(testRunner=testRunner, verbosity=2)